Python ZipInfo.extra Exemples, zipfile.ZipInfo.extra Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : epubwatermark.py Projet : morrisxu/DeDRM_tools

def removeCDPwatermark(object, path_to_ebook):
    # "META-INF/cdp.info" is a watermark file used by some Tolino vendors.
    # We don't want that in our eBooks, so lets remove that file.
    try:
        infile = ZipFile(open(path_to_ebook, 'rb'))
        namelist = infile.namelist()
        if 'META-INF/cdp.info' not in namelist:
            return path_to_ebook

        namelist.remove("mimetype")
        namelist.remove("META-INF/cdp.info")

        output = object.temporary_file(".epub").name

        kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
        with closing(ZipFile(open(output, 'wb'), 'w', **kwds)) as outf:
            for path in (["mimetype"] + namelist):

                data = infile.read(path)

                zi = ZipInfo(path)
                oldzi = infile.getinfo(path)
                try:
                    zi.compress_type = oldzi.compress_type
                    if path == "mimetype":
                        zi.compress_type = ZIP_STORED
                    zi.date_time = oldzi.date_time
                    zi.comment = oldzi.comment
                    zi.extra = oldzi.extra
                    zi.internal_attr = oldzi.internal_attr
                    zi.external_attr = oldzi.external_attr
                    zi.create_system = oldzi.create_system
                    if any(ord(c) >= 128 for c in path) or any(
                            ord(c) >= 128 for c in zi.comment):
                        # If the file name or the comment contains any non-ASCII char, set the UTF8-flag
                        zi.flag_bits |= 0x800
                except:
                    pass

                outf.writestr(zi, data)

        print("Watermark: Successfully removed cdp.info watermark")
        return output

    except:
        traceback.print_exc()
        return path_to_ebook

Exemple #2

0

Afficher le fichier

Fichier : archive_test.py Projet : carriercomm/sparchive

 def test_parseextra(self):
     info = ZipInfo("foo")
     info.extra = struct.pack('<HHBl', 0x5455, 5, 1, 978307200)
     extra = Archive.parse_extra(info)
     assert_true(0x5455 in extra)
     assert_equal(extra[0x5455], struct.pack('<Bl', 1, 978307200))

Exemple #3

0

Afficher le fichier

def decryptBook(userkey, inpath, outpath):
    if AES is None:
        raise ADEPTError(u"PyCrypto or OpenSSL must be installed.")
    rsa = RSA(userkey)
    with closing(ZipFile(open(inpath, 'rb'))) as inf:
        namelist = set(inf.namelist())
        if 'META-INF/rights.xml' not in namelist or \
           'META-INF/encryption.xml' not in namelist:
            print u"{0:s} is DRM-free.".format(os.path.basename(inpath))
            return 1
        for name in META_NAMES:
            namelist.remove(name)
        try:
            rights = etree.fromstring(inf.read('META-INF/rights.xml'))
            adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
            expr = './/%s' % (adept('encryptedKey'),)
            bookkey = ''.join(rights.findtext(expr))
            if len(bookkey) != 172:
                print u"{0:s} is not a secure Adobe Adept ePub.".format(os.path.basename(inpath))
                return 1
            bookkey = rsa.decrypt(bookkey.decode('base64'))
            # Padded as per RSAES-PKCS1-v1_5
            if bookkey[-17] != '\x00':
                print u"Could not decrypt {0:s}. Wrong key".format(os.path.basename(inpath))
                return 2
            encryption = inf.read('META-INF/encryption.xml')
            decryptor = Decryptor(bookkey[-16:], encryption)
            kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
            with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf:
                zi = ZipInfo('mimetype')
                zi.compress_type=ZIP_STORED
                try:
                    # if the mimetype is present, get its info, including time-stamp
                    oldzi = inf.getinfo('mimetype')
                    # copy across fields to be preserved
                    zi.date_time = oldzi.date_time
                    zi.comment = oldzi.comment
                    zi.extra = oldzi.extra
                    zi.internal_attr = oldzi.internal_attr
                    # external attributes are dependent on the create system, so copy both.
                    zi.external_attr = oldzi.external_attr
                    zi.create_system = oldzi.create_system
                except:
                    pass
                outf.writestr(zi, inf.read('mimetype'))
                for path in namelist:
                    data = inf.read(path)
                    zi = ZipInfo(path)
                    zi.compress_type=ZIP_DEFLATED
                    try:
                        # get the file info, including time-stamp
                        oldzi = inf.getinfo(path)
                        # copy across useful fields
                        zi.date_time = oldzi.date_time
                        zi.comment = oldzi.comment
                        zi.extra = oldzi.extra
                        zi.internal_attr = oldzi.internal_attr
                        # external attributes are dependent on the create system, so copy both.
                        zi.external_attr = oldzi.external_attr
                        zi.create_system = oldzi.create_system
                    except:
                        pass
                    outf.writestr(zi, decryptor.decrypt(path, data))
        except:
            print u"Could not decrypt {0:s} because of an exception:\n{1:s}".format(os.path.basename(inpath), traceback.format_exc())
            return 2
    return 0

Exemple #4

0

Afficher le fichier

Fichier : epubfontdecrypt.py Projet : morrisxu/DeDRM_tools

def decryptFontsBook(inpath, outpath):

    with closing(ZipFile(open(inpath, 'rb'))) as inf:
        namelist = inf.namelist()
        if 'META-INF/encryption.xml' not in namelist:
            return 1

        # Font key handling:

        font_master_key = None
        adobe_master_encryption_key = None

        contNS = lambda tag: '{%s}%s' % (
            'urn:oasis:names:tc:opendocument:xmlns:container', tag)
        path = None

        try:
            container = etree.fromstring(inf.read("META-INF/container.xml"))
            rootfiles = container.find(contNS("rootfiles")).findall(
                contNS("rootfile"))
            for rootfile in rootfiles:
                path = rootfile.get("full-path", None)
                if (path is not None):
                    break
        except:
            pass

        # If path is None, we didn't find an OPF, so we probably don't have a font key.
        # If path is set, it's the path to the main content OPF file.

        if (path is None):
            print("FontDecrypt: No OPF for font obfuscation found")
            return 1
        else:
            packageNS = lambda tag: '{%s}%s' % ('http://www.idpf.org/2007/opf',
                                                tag)
            metadataDCNS = lambda tag: '{%s}%s' % (
                'http://purl.org/dc/elements/1.1/', tag)

            try:
                container = etree.fromstring(inf.read(path))
            except:
                container = []

            ## IETF font key algorithm:
            print(
                "FontDecrypt: Checking {0} for IETF font obfuscation keys ... "
                .format(path),
                end='')
            secret_key_name = None
            try:
                secret_key_name = container.get("unique-identifier")
            except:
                pass

            try:
                identify_element = container.find(packageNS("metadata")).find(
                    metadataDCNS("identifier"))
                if (secret_key_name is None
                        or secret_key_name == identify_element.get("id")):
                    font_master_key = identify_element.text
            except:
                pass

            if (font_master_key is not None):
                if (secret_key_name is None):
                    print("found '%s'" % (font_master_key))
                else:
                    print("found '%s' (%s)" %
                          (font_master_key, secret_key_name))

                # Trim / remove forbidden characters from the key, then hash it:
                font_master_key = font_master_key.replace(' ', '')
                font_master_key = font_master_key.replace('\t', '')
                font_master_key = font_master_key.replace('\r', '')
                font_master_key = font_master_key.replace('\n', '')
                font_master_key = font_master_key.encode('utf-8')
                font_master_key = hashlib.sha1(font_master_key).digest()
            else:
                print("not found")

            ## Adobe font key algorithm
            print(
                "FontDecrypt: Checking {0} for Adobe font obfuscation keys ... "
                .format(path),
                end='')

            try:
                metadata = container.find(packageNS("metadata"))
                identifiers = metadata.findall(metadataDCNS("identifier"))

                uid = None
                uidMalformed = False

                for identifier in identifiers:
                    if identifier.get(packageNS("scheme")) == "UUID":
                        if identifier.text[:9] == "urn:uuid:":
                            uid = identifier.text[9:]
                        else:
                            uid = identifier.text
                        break
                    if identifier.text[:9] == "urn:uuid:":
                        uid = identifier.text[9:]
                        break

                if uid is not None:
                    uid = uid.replace(chr(0x20), '').replace(chr(0x09), '')
                    uid = uid.replace(chr(0x0D),
                                      '').replace(chr(0x0A),
                                                  '').replace('-', '')

                    if len(uid) < 16:
                        uidMalformed = True
                    if not all(c in "0123456789abcdefABCDEF" for c in uid):
                        uidMalformed = True

                    if not uidMalformed:
                        print("found '{0}'".format(uid))
                        uid = uid + uid
                        adobe_master_encryption_key = binascii.unhexlify(
                            uid[:32])

                if adobe_master_encryption_key is None:
                    print("not found")

            except:
                print("exception")
                pass

        # Begin decrypting.

        try:
            encryption = inf.read('META-INF/encryption.xml')
            decryptor = Decryptor(font_master_key, adobe_master_encryption_key,
                                  encryption)
            kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
            with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf:

                # Mimetype needs to be the first entry, so remove it from the list
                # whereever it is, then add it at the beginning.
                namelist.remove("mimetype")

                for path in (["mimetype"] + namelist):
                    data = inf.read(path)
                    zi = ZipInfo(path)
                    zi.compress_type = ZIP_DEFLATED

                    if path == "mimetype":
                        # mimetype must not be compressed
                        zi.compress_type = ZIP_STORED

                    elif path == "META-INF/encryption.xml":
                        # Check if there's still other entries not related to fonts
                        if (decryptor.check_if_remaining()):
                            data = decryptor.get_xml()
                            print(
                                "FontDecrypt: There's remaining entries in encryption.xml, adding file ..."
                            )
                        else:
                            # No remaining entries, no need for that file.
                            continue

                    try:
                        # get the file info, including time-stamp
                        oldzi = inf.getinfo(path)
                        # copy across useful fields
                        zi.date_time = oldzi.date_time
                        zi.comment = oldzi.comment
                        zi.extra = oldzi.extra
                        zi.internal_attr = oldzi.internal_attr
                        # external attributes are dependent on the create system, so copy both.
                        zi.external_attr = oldzi.external_attr
                        zi.create_system = oldzi.create_system
                        if any(ord(c) >= 128 for c in path) or any(
                                ord(c) >= 128 for c in zi.comment):
                            # If the file name or the comment contains any non-ASCII char, set the UTF8-flag
                            zi.flag_bits |= 0x800
                    except:
                        pass

                    if path == "mimetype":
                        outf.writestr(zi, inf.read('mimetype'))
                    elif path == "META-INF/encryption.xml":
                        outf.writestr(zi, data)
                    else:
                        outf.writestr(zi, decryptor.decrypt(path, data))
        except:
            print(
                "FontDecrypt: Could not decrypt fonts in {0:s} because of an exception:\n{1:s}"
                .format(os.path.basename(inpath), traceback.format_exc()))
            traceback.print_exc()
            return 2
    return 0

Exemple #5

0

Afficher le fichier

Fichier : ignobleepub.py Projet : kindlychung/dedrm-ebook-tools

def decryptBook(keyb64, inpath, outpath):
    if AES is None:
        raise IGNOBLEError(u"PyCrypto or OpenSSL must be installed.")
    key = keyb64.decode("base64")[:16]
    aes = AES(key)
    with closing(ZipFile(open(inpath, "rb"))) as inf:
        namelist = set(inf.namelist())
        if "META-INF/rights.xml" not in namelist or "META-INF/encryption.xml" not in namelist:
            print u"{0:s} is DRM-free.".format(os.path.basename(inpath))
            return 1
        for name in META_NAMES:
            namelist.remove(name)
        try:
            rights = etree.fromstring(inf.read("META-INF/rights.xml"))
            adept = lambda tag: "{%s}%s" % (NSMAP["adept"], tag)
            expr = ".//%s" % (adept("encryptedKey"),)
            bookkey = "".join(rights.findtext(expr))
            if len(bookkey) != 64:
                print u"{0:s} is not a secure Barnes & Noble ePub.".format(os.path.basename(inpath))
                return 1
            bookkey = aes.decrypt(bookkey.decode("base64"))
            bookkey = bookkey[: -ord(bookkey[-1])]
            encryption = inf.read("META-INF/encryption.xml")
            decryptor = Decryptor(bookkey[-16:], encryption)
            kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
            with closing(ZipFile(open(outpath, "wb"), "w", **kwds)) as outf:
                zi = ZipInfo("mimetype")
                zi.compress_type = ZIP_STORED
                try:
                    # if the mimetype is present, get its info, including time-stamp
                    oldzi = inf.getinfo("mimetype")
                    # copy across fields to be preserved
                    zi.date_time = oldzi.date_time
                    zi.comment = oldzi.comment
                    zi.extra = oldzi.extra
                    zi.internal_attr = oldzi.internal_attr
                    # external attributes are dependent on the create system, so copy both.
                    zi.external_attr = oldzi.external_attr
                    zi.create_system = oldzi.create_system
                except:
                    pass
                outf.writestr(zi, inf.read("mimetype"))
                for path in namelist:
                    data = inf.read(path)
                    zi = ZipInfo(path)
                    zi.compress_type = ZIP_DEFLATED
                    try:
                        # get the file info, including time-stamp
                        oldzi = inf.getinfo(path)
                        # copy across useful fields
                        zi.date_time = oldzi.date_time
                        zi.comment = oldzi.comment
                        zi.extra = oldzi.extra
                        zi.internal_attr = oldzi.internal_attr
                        # external attributes are dependent on the create system, so copy both.
                        zi.external_attr = oldzi.external_attr
                        zi.create_system = oldzi.create_system
                    except:
                        pass
                    outf.writestr(zi, decryptor.decrypt(path, data))
        except:
            print u"Could not decrypt {0:s} because of an exception:\n{1:s}".format(
                os.path.basename(inpath), traceback.format_exc()
            )
            return 2
    return 0

Exemple #6

0

Afficher le fichier

def decryptBook(userkey, inpath, outpath):
    with closing(ZipFile(open(inpath, 'rb'))) as inf:
        namelist = inf.namelist()
        if 'META-INF/rights.xml' not in namelist or \
           'META-INF/encryption.xml' not in namelist:
            print("{0:s} is DRM-free.".format(os.path.basename(inpath)))
            return 1
        for name in META_NAMES:
            namelist.remove(name)
        try:
            rights = etree.fromstring(inf.read('META-INF/rights.xml'))
            adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
            expr = './/%s' % (adept('encryptedKey'), )
            bookkeyelem = rights.find(expr)
            bookkey = bookkeyelem.text
            keytype = bookkeyelem.attrib.get('keyType', '0')
            if len(bookkey) >= 172 and int(keytype, 10) > 2:
                print("{0:s} is a secure Adobe Adept ePub with hardening.".
                      format(os.path.basename(inpath)))
            elif len(bookkey) == 172:
                print("{0:s} is a secure Adobe Adept ePub.".format(
                    os.path.basename(inpath)))
            elif len(bookkey) == 64:
                print("{0:s} is a secure Adobe PassHash (B&N) ePub.".format(
                    os.path.basename(inpath)))
            else:
                print("{0:s} is not an Adobe-protected ePub!".format(
                    os.path.basename(inpath)))
                return 1

            if len(bookkey) != 64:
                # Normal or "hardened" Adobe ADEPT
                rsakey = RSA.import_key(userkey)  # parses the ASN1 structure
                bookkey = base64.b64decode(bookkey)
                if int(keytype, 10) > 2:
                    bookkey = removeHardening(rights, keytype, bookkey)
                try:
                    bookkey = PKCS1_v1_5.new(rsakey).decrypt(
                        bookkey, None)  # automatically unpads
                except ValueError:
                    bookkey = None

                if bookkey is None:
                    print("Could not decrypt {0:s}. Wrong key".format(
                        os.path.basename(inpath)))
                    return 2
            else:
                # Adobe PassHash / B&N
                key = base64.b64decode(userkey)[:16]
                bookkey = base64.b64decode(bookkey)
                bookkey = unpad(
                    AES.new(key, AES.MODE_CBC, b'\x00' * 16).decrypt(bookkey),
                    16)  # PKCS#7

                if len(bookkey) > 16:
                    bookkey = bookkey[-16:]

            encryption = inf.read('META-INF/encryption.xml')
            decryptor = Decryptor(bookkey, encryption)
            kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
            with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf:

                for path in (["mimetype"] + namelist):
                    data = inf.read(path)
                    zi = ZipInfo(path)
                    zi.compress_type = ZIP_DEFLATED

                    if path == "mimetype":
                        zi.compress_type = ZIP_STORED

                    elif path == "META-INF/encryption.xml":
                        # Check if there's still something in there
                        if (decryptor.check_if_remaining()):
                            data = decryptor.get_xml()
                            print(
                                "Adding encryption.xml for the remaining embedded files."
                            )
                            # We removed DRM, but there's still stuff like obfuscated fonts.
                        else:
                            continue

                    try:
                        # get the file info, including time-stamp
                        oldzi = inf.getinfo(path)
                        # copy across useful fields
                        zi.date_time = oldzi.date_time
                        zi.comment = oldzi.comment
                        zi.extra = oldzi.extra
                        zi.internal_attr = oldzi.internal_attr
                        # external attributes are dependent on the create system, so copy both.
                        zi.external_attr = oldzi.external_attr
                        zi.create_system = oldzi.create_system
                        if any(ord(c) >= 128 for c in path) or any(
                                ord(c) >= 128 for c in zi.comment):
                            # If the file name or the comment contains any non-ASCII char, set the UTF8-flag
                            zi.flag_bits |= 0x800
                    except:
                        pass
                    if path == "META-INF/encryption.xml":
                        outf.writestr(zi, data)
                    else:
                        outf.writestr(zi, decryptor.decrypt(path, data))
        except:
            print("Could not decrypt {0:s} because of an exception:\n{1:s}".
                  format(os.path.basename(inpath), traceback.format_exc()))
            return 2
    return 0

Exemple #7

0

Afficher le fichier

Fichier : epubwatermark.py Projet : morrisxu/DeDRM_tools

def removeOPFwatermarks(object, path_to_ebook):
    contNS = lambda tag: '{%s}%s' % (
        'urn:oasis:names:tc:opendocument:xmlns:container', tag)
    opf_path = None

    try:
        inf = ZipFile(open(path_to_ebook, 'rb'))
        container = etree.fromstring(inf.read("META-INF/container.xml"))
        rootfiles = container.find(contNS("rootfiles")).findall(
            contNS("rootfile"))
        for rootfile in rootfiles:
            opf_path = rootfile.get("full-path", None)
            if (opf_path is not None):
                break
    except:
        traceback.print_exc()
        return path_to_ebook

    # If path is None, we didn't find an OPF, so we probably don't have a font key.
    # If path is set, it's the path to the main content OPF file.

    if (opf_path is None):
        # No OPF found - no watermark
        return path_to_ebook
    else:
        try:
            container_str = inf.read(opf_path).decode("utf-8")
            container_str_new = container_str

            had_amazon = False
            had_elibri = False

            # Remove Amazon hex watermarks
            # Match optional newline at the beginning, then spaces, then a "meta" tag with name = "Watermark" or "Watermark_(hex)" and a "content" element.
            # This regex also matches DuMont watermarks with meta name="watermark", with the case-insensitive match on the "w" in watermark.
            pre_remove = container_str_new
            container_str_new = re.sub(
                r'((\r\n|\r|\n)\s*)?\<meta\s+name=\"[Ww]atermark(_\(hex\))?\"\s+content=\"[0-9a-fA-F]+\"\s*\/>',
                '', container_str_new)
            container_str_new = re.sub(
                r'((\r\n|\r|\n)\s*)?\<meta\s+content=\"[0-9a-fA-F]+\"\s+name=\"[Ww]atermark(_\(hex\))?\"\s*\/>',
                '', container_str_new)
            if pre_remove != container_str_new:
                had_amazon = True

            # Remove elibri / lemonink watermark
            # Lemonink replaces all "id" fields in the opf with "idX_Y", with X being the watermark and Y being a number for that particular ID.
            # This regex replaces all "idX_Y" IDs with "id_Y", removing the watermark IDs.
            pre_remove = container_str_new
            container_str_new = re.sub(
                r'((\r\n|\r|\n)\s*)?\<\!\-\-\s*Wygenerowane przez elibri dla zamówienia numer [0-9a-fA-F]+\s*\-\-\>',
                '', container_str_new)
            if pre_remove != container_str_new:
                # To prevent this Regex from applying to books without that watermark, only do that if the watermark above was found.
                container_str_new = re.sub(r'\=\"id[0-9]+_([0-9]+)\"',
                                           r'="id_\1"', container_str_new)
            if pre_remove != container_str_new:
                had_elibri = True

        except:
            traceback.print_exc()
            return path_to_ebook

        if (container_str == container_str_new):
            # container didn't change - no watermark
            return path_to_ebook

        # Re-package without watermark
        namelist = inf.namelist()
        namelist.remove("mimetype")

        try:
            output = object.temporary_file(".epub").name
            kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
            with closing(ZipFile(open(output, 'wb'), 'w', **kwds)) as outf:
                for path in (["mimetype"] + namelist):

                    data = inf.read(path)
                    if path == opf_path:
                        # Found OPF, replacing ...
                        data = container_str_new

                    zi = ZipInfo(path)
                    oldzi = inf.getinfo(path)
                    try:
                        zi.compress_type = oldzi.compress_type
                        if path == "mimetype":
                            zi.compress_type = ZIP_STORED
                        zi.date_time = oldzi.date_time
                        zi.comment = oldzi.comment
                        zi.extra = oldzi.extra
                        zi.internal_attr = oldzi.internal_attr
                        zi.external_attr = oldzi.external_attr
                        zi.create_system = oldzi.create_system
                        if any(ord(c) >= 128 for c in path) or any(
                                ord(c) >= 128 for c in zi.comment):
                            # If the file name or the comment contains any non-ASCII char, set the UTF8-flag
                            zi.flag_bits |= 0x800
                    except:
                        pass

                    outf.writestr(zi, data)
        except:
            traceback.print_exc()
            return path_to_ebook

        if had_elibri:
            print(
                "Watermark: Successfully stripped eLibri watermark from OPF file."
            )
        if had_amazon:
            print(
                "Watermark: Successfully stripped Amazon watermark from OPF file."
            )

        return output

Exemple #8

0

Afficher le fichier

def decryptBook(userkey, inpath, outpath):
    if AES is None:
        raise ADEPTError("PyCrypto or OpenSSL must be installed.")

    with closing(ZipFile(open(inpath, 'rb'))) as inf:
        namelist = inf.namelist()
        if 'META-INF/rights.xml' not in namelist or \
           'META-INF/encryption.xml' not in namelist:
            print("{0:s} is DRM-free.".format(os.path.basename(inpath)))
            return 1
        for name in META_NAMES:
            namelist.remove(name)
        try:
            rights = etree.fromstring(inf.read('META-INF/rights.xml'))
            adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
            expr = './/%s' % (adept('encryptedKey'), )
            bookkey = ''.join(rights.findtext(expr))
            if len(bookkey) == 192:
                print(
                    "{0:s} seems to be an Adobe ADEPT ePub with Adobe's new DRM"
                    .format(os.path.basename(inpath)))
                print("This DRM cannot be removed yet. ")
                print(
                    "Try getting your distributor to give you a new ACSM file, then open that in an old version of ADE (2.0)."
                )
                print(
                    "If your book distributor is not enforcing the new DRM yet, this will give you a copy with the old DRM."
                )
                raise ADEPTNewVersionError("Book uses new ADEPT encryption")

            if len(bookkey) == 172:
                print("{0:s} is a secure Adobe Adept ePub.".format(
                    os.path.basename(inpath)))
            elif len(bookkey) == 64:
                print("{0:s} is a secure Adobe PassHash (B&N) ePub.".format(
                    os.path.basename(inpath)))
            else:
                print("{0:s} is not an Adobe-protected ePub!".format(
                    os.path.basename(inpath)))
                return 1

            if len(bookkey) != 64:
                # Normal Adobe ADEPT
                rsa = RSA(userkey)
                bookkey = rsa.decrypt(base64.b64decode(
                    bookkey.encode('ascii')))
            else:
                # Adobe PassHash / B&N
                key = base64.b64decode(userkey)[:16]
                aes = AES(key)
                bookkey = aes.decrypt(base64.b64decode(bookkey))
                if type(bookkey[-1]) != int:
                    pad = ord(bookkey[-1])
                else:
                    pad = bookkey[-1]

                bookkey = bookkey[:-pad]

            # Padded as per RSAES-PKCS1-v1_5
            if len(bookkey) > 16:
                if verify_book_key(bookkey):
                    bookkey = bookkey[-16:]
                else:
                    print("Could not decrypt {0:s}. Wrong key".format(
                        os.path.basename(inpath)))
                    return 2

            encryption = inf.read('META-INF/encryption.xml')
            decryptor = Decryptor(bookkey, encryption)
            kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
            with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf:

                for path in (["mimetype"] + namelist):
                    data = inf.read(path)
                    zi = ZipInfo(path)
                    zi.compress_type = ZIP_DEFLATED

                    if path == "mimetype":
                        zi.compress_type = ZIP_STORED

                    elif path == "META-INF/encryption.xml":
                        # Check if there's still something in there
                        if (decryptor.check_if_remaining()):
                            data = decryptor.get_xml()
                            print(
                                "Adding encryption.xml for the remaining embedded files."
                            )
                            # We removed DRM, but there's still stuff like obfuscated fonts.
                        else:
                            continue

                    try:
                        # get the file info, including time-stamp
                        oldzi = inf.getinfo(path)
                        # copy across useful fields
                        zi.date_time = oldzi.date_time
                        zi.comment = oldzi.comment
                        zi.extra = oldzi.extra
                        zi.internal_attr = oldzi.internal_attr
                        # external attributes are dependent on the create system, so copy both.
                        zi.external_attr = oldzi.external_attr
                        zi.create_system = oldzi.create_system
                        if any(ord(c) >= 128 for c in path) or any(
                                ord(c) >= 128 for c in zi.comment):
                            # If the file name or the comment contains any non-ASCII char, set the UTF8-flag
                            zi.flag_bits |= 0x800
                    except:
                        pass
                    if path == "META-INF/encryption.xml":
                        outf.writestr(zi, data)
                    else:
                        outf.writestr(zi, decryptor.decrypt(path, data))
        except:
            print("Could not decrypt {0:s} because of an exception:\n{1:s}".
                  format(os.path.basename(inpath), traceback.format_exc()))
            return 2
    return 0

Exemple #9

0

Afficher le fichier

Fichier : repair_zip.py Projet : CybercentreCanada/assemblyline-service-extract

    def fix_zip(self):
        if not self.broken:
            return False
        self.fp.seek(0, 2)
        file_len = self.fp.tell()
        mm = mmap.mmap(self.fp.fileno(), 0, access=mmap.ACCESS_READ)
        offset = 0
        file_list = {}
        cd_list = {}

        try:

            # pass one, parse the zip file
            while offset + 4 < file_len:
                hdr_off = mm.find(b"PK", offset)
                if hdr_off == -1:
                    break
                hdr_type = mm[hdr_off:hdr_off + 4]
                if hdr_type == stringFileHeader:
                    # local file header
                    if hdr_off + sizeFileHeader > file_len:
                        break
                    fheader = mm[hdr_off:hdr_off + sizeFileHeader]
                    fheader = struct.unpack(structFileHeader, fheader)
                    start = hdr_off
                    size = sizeFileHeader + fheader[_FH_COMPRESSED_SIZE] + fheader[_FH_FILENAME_LENGTH] + \
                        fheader[_FH_EXTRA_FIELD_LENGTH]
                    name = mm[hdr_off + sizeFileHeader:hdr_off +
                              sizeFileHeader + fheader[_FH_FILENAME_LENGTH]]
                    file_list[name] = [start, size, fheader]
                    offset = hdr_off + size
                elif hdr_type == stringCentralDir:
                    if hdr_off + sizeCentralDir > file_len:
                        break
                    centdir = mm[hdr_off:hdr_off + sizeCentralDir]
                    centdir = struct.unpack(structCentralDir, centdir)
                    start = hdr_off
                    size = sizeCentralDir + centdir[_CD_FILENAME_LENGTH] + centdir[_CD_EXTRA_FIELD_LENGTH] + \
                        centdir[_CD_COMMENT_LENGTH]
                    name = mm[hdr_off + sizeCentralDir:hdr_off +
                              sizeCentralDir + centdir[_CD_FILENAME_LENGTH]]
                    cd_list[name] = [start, size, centdir]
                    offset = hdr_off + size
                elif hdr_type == stringEndArchive:
                    offset = hdr_off + sizeEndCentDir
                else:
                    offset = hdr_off + 1

            # Guesses
            last_cv = 20
            last_ea = 0
            last_cs = 0
            last_dt = (0, 0)

            # Pass two, repair
            for filename, (start, end, centdir) in cd_list.items():
                if filename not in file_list:
                    continue

                if isinstance(filename, bytes):
                    x = ZipInfo(filename.decode('utf-8', 'backslashreplace'))
                else:
                    x = ZipInfo(filename)
                extra_off = start + sizeCentralDir
                x.extra = mm[extra_off:extra_off +
                             centdir[_CD_EXTRA_FIELD_LENGTH]]
                extra_off += centdir[_CD_EXTRA_FIELD_LENGTH]
                x.comment = mm[extra_off:extra_off +
                               centdir[_CD_EXTRA_FIELD_LENGTH]]

                x.header_offset = file_list[filename][0]

                (x.create_version, x.create_system, x.extract_version,
                 x.reserved, x.flag_bits, x.compress_type, t, d, x.CRC,
                 x.compress_size, x.file_size) = centdir[1:12]
                x.volume, x.internal_attr, x.external_attr = centdir[15:18]
                # Convert date/time code to (year, month, day, hour, min, sec)
                x._raw_time = t
                x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F,
                               t >> 11, (t >> 5) & 0x3F, (t & 0x1F) * 2)

                last_ea = x.external_attr
                last_cs = x.create_system
                last_cv = x.create_version
                last_dt = (d, t)

                # noinspection PyProtectedMember
                x._decodeExtra()
                # x.filename = x._decodeFilename()
                self.filelist.append(x)
                self.NameToInfo[x.filename] = x

            for filename, (start, end, fheader) in file_list.items():
                if filename in cd_list:
                    continue

                x = ZipInfo(filename.decode('utf-8', 'backslashreplace'))
                x.extra = ""
                x.comment = ""

                x.header_offset = file_list[filename][0]

                x.create_version = last_cv
                x.create_system = last_cs
                x.extract_version = fheader[_FH_EXTRACT_VERSION]
                x.reserved = 0
                x.flag_bits = fheader[_FH_GENERAL_PURPOSE_FLAG_BITS]
                x.compress_type = fheader[_FH_COMPRESSION_METHOD]
                d, t = last_dt
                x.CRC = fheader[_FH_CRC]
                x.compress_size = fheader[_FH_COMPRESSED_SIZE]
                x.file_size = fheader[_FH_UNCOMPRESSED_SIZE]

                x.volume = 0
                x.internal_attr = 0
                x.external_attr = last_ea

                # Convert date/time code to (year, month, day, hour, min, sec)
                x._raw_time = t
                x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F,
                               t >> 11, (t >> 5) & 0x3F, (t & 0x1F) * 2)

                # noinspection PyProtectedMember
                x._decodeExtra()
                # x.filename = x._decodeFilename()
                self.filelist.append(x)
                self.NameToInfo[x.filename] = x
        finally:
            mm.close()

Exemple #10

0

Afficher le fichier

Fichier : lcpdedrm.py Projet : mexisme/DeDRM_tools

def decryptLCPbook(inpath, passphrases, parent_object):

    if not isLCPbook(inpath):
        raise LCPError("This is not an LCP-encrypted book")

    file = ZipFile(open(inpath, 'rb'))

    license = json.loads(file.read('META-INF/license.lcpl'))
    print("LCP: Found LCP-encrypted book {0}".format(license["id"]))

    user_info_string1 = returnUserInfoStringForLicense(license, None)
    if (user_info_string1 is not None):
        print("LCP: Account information: " + user_info_string1)

    # Check algorithm:
    if license["encryption"][
            "profile"] == "http://readium.org/lcp/basic-profile":
        print("LCP: Book is using lcp/basic-profile encryption.")
        transform_algo = LCPTransform.secret_transform_basic
    elif license["encryption"][
            "profile"] == "http://readium.org/lcp/profile-1.0":
        print("LCP: Book is using lcp/profile-1.0 encryption")
        transform_algo = LCPTransform.secret_transform_profile10
    else:
        file.close()
        raise LCPError(
            "Book is using an unknown LCP encryption standard: {0}".format(
                license["encryption"]["profile"]))

    if ("algorithm" in license["encryption"]["content_key"]
            and license["encryption"]["content_key"]["algorithm"] !=
            "http://www.w3.org/2001/04/xmlenc#aes256-cbc"):
        file.close()
        raise LCPError(
            "Book is using an unknown LCP encryption algorithm: {0}".format(
                license["encryption"]["content_key"]["algorithm"]))

    key_check = license["encryption"]["user_key"]["key_check"]
    encrypted_content_key = license["encryption"]["content_key"][
        "encrypted_value"]

    # Prepare a list of encryption keys to test:
    password_hashes = []

    # Some providers hard-code the passphrase in the LCPL file. That doesn't happen often,
    # but when it does, these files can be decrypted without knowing any passphrase.

    if "value" in license["encryption"]["user_key"]:
        try:
            password_hashes.append(
                binascii.hexlify(
                    base64.decodebytes(license["encryption"]["user_key"]
                                       ["value"].encode())).decode("ascii"))
        except AttributeError:
            # Python 2
            password_hashes.append(
                binascii.hexlify(
                    base64.decodestring(license["encryption"]["user_key"]
                                        ["value"].encode())).decode("ascii"))
    if "hex_value" in license["encryption"]["user_key"]:
        password_hashes.append(
            binascii.hexlify(
                bytearray.fromhex(license["encryption"]["user_key"]
                                  ["hex_value"])).decode("ascii"))

    # Hash all the passwords provided by the user:
    for possible_passphrase in passphrases:
        algo = "http://www.w3.org/2001/04/xmlenc#sha256"
        if "algorithm" in license["encryption"]["user_key"]:
            algo = license["encryption"]["user_key"]["algorithm"]

        algo, tmp_pw = LCPTransform.userpass_to_hash(
            possible_passphrase.encode('utf-8'), algo)
        if tmp_pw is not None:
            password_hashes.append(tmp_pw)

    # For all the password hashes, check if one of them decrypts the book:
    correct_password_hash = None

    for possible_hash in password_hashes:
        transformed_hash = transform_algo(possible_hash)
        try:
            decrypted = None
            decrypted = dataDecryptLCP(key_check, transformed_hash)
        except:
            pass

        if (decrypted is not None and decrypted.decode(
                "ascii", errors="ignore") == license["id"]):
            # Found correct password hash, hooray!
            correct_password_hash = transformed_hash
            break

    # Print an error message if none of the passwords worked
    if (correct_password_hash is None):
        print(
            "LCP: Tried {0} passphrases, but none of them could decrypt the book ..."
            .format(len(password_hashes)))

        # Print password hint, if available
        if ("text_hint" in license["encryption"]["user_key"]
                and license["encryption"]["user_key"]["text_hint"] != ""):
            print(
                "LCP: The book distributor has given you the following passphrase hint: \"{0}\""
                .format(license["encryption"]["user_key"]["text_hint"]))

        print(
            "LCP: Enter the correct passphrase in the DeDRM plugin settings, then try again."
        )

        # Print password reset instructions, if available
        for link in license["links"]:
            if ("rel" in link and link["rel"] == "hint"):
                print(
                    "LCP: You may be able to find or reset your LCP passphrase on the following webpage: {0}"
                    .format(link["href"]))
                break

        file.close()
        raise LCPError("No correct passphrase found")

    print("LCP: Found correct passphrase, decrypting book ...")
    user_info_string2 = returnUserInfoStringForLicense(license,
                                                       correct_password_hash)
    if (user_info_string2 is not None):
        if (user_info_string1 != user_info_string2):
            print("LCP: Account information: " + user_info_string2)

    # Take the key we found and decrypt the content key:
    decrypted_content_key = dataDecryptLCP(encrypted_content_key,
                                           correct_password_hash)

    if decrypted_content_key is None:
        raise LCPError("Decrypted content key is None")

    # Begin decrypting

    encryption = file.read('META-INF/encryption.xml')
    decryptor = Decryptor(decrypted_content_key, encryption)
    kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)

    mimetype = file.read("mimetype").decode("latin-1")

    if mimetype == "application/pdf":
        # Check how many PDF files there are.
        # Usually, an LCP-protected PDF/ZIP is only supposed to contain one
        # PDF file, but if there are multiple, return a ZIP that contains them all.

        pdf_files = []
        for filename in file.namelist():
            if filename.endswith(".pdf"):
                pdf_files.append(filename)

        if len(pdf_files) == 0:
            file.close()
            raise LCPError(
                "Error: Book is an LCP-protected PDF, but doesn't contain any PDF files ..."
            )

        elif len(pdf_files) == 1:
            # One PDF file found - extract and return that.
            pdfdata = file.read(pdf_files[0])
            outputname = parent_object.temporary_file(".pdf").name
            print("LCP: Successfully decrypted, exporting to {0}".format(
                outputname))

            with open(outputname, 'wb') as f:
                f.write(decryptor.decrypt(pdf_files[0], pdfdata))

            file.close()
            return outputname

        else:
            # Multiple PDFs found
            outputname = parent_object.temporary_file(".zip").name
            with closing(ZipFile(open(outputname, 'wb'), 'w',
                                 **kwds)) as outfile:
                for path in pdf_files:
                    data = file.read(path)
                    outfile.writestr(path, decryptor.decrypt(path, data))

            print(
                "LCP: Successfully decrypted a multi-PDF ZIP file, exporting to {0}"
                .format(outputname))
            file.close()
            return outputname

    else:
        # Not a PDF -> EPUB

        if mimetype == "application/epub+zip":
            outputname = parent_object.temporary_file(".epub").name
        else:
            outputname = parent_object.temporary_file(".zip").name

        with closing(ZipFile(open(outputname, 'wb'), 'w', **kwds)) as outfile:

            # mimetype must be 1st file. Remove from list and manually add at the beginning
            namelist = file.namelist()
            namelist.remove("mimetype")
            namelist.remove("META-INF/license.lcpl")

            for path in (["mimetype"] + namelist):
                data = file.read(path)
                zi = ZipInfo(path)

                if path == "META-INF/encryption.xml":
                    # Check if that's still needed
                    if (decryptor.check_if_remaining()):
                        data = decryptor.get_xml()
                        print(
                            "LCP: Adding encryption.xml for the remaining files."
                        )
                    else:
                        continue

                try:
                    oldzi = file.getinfo(path)
                    if path == "mimetype":
                        zi.compress_type = ZIP_STORED
                    else:
                        zi.compress_type = ZIP_DEFLATED
                    zi.date_time = oldzi.date_time
                    zi.comment = oldzi.comment
                    zi.extra = oldzi.extra
                    zi.internal_attr = oldzi.internal_attr
                    zi.external_attr = oldzi.external_attr
                    zi.create_system = oldzi.create_system
                    if any(ord(c) >= 128 for c in path) or any(
                            ord(c) >= 128 for c in zi.comment):
                        # If the file name or the comment contains any non-ASCII char, set the UTF8-flag
                        zi.flag_bits |= 0x800
                except:
                    pass

                if path == "META-INF/encryption.xml":
                    outfile.writestr(zi, data)
                else:
                    outfile.writestr(zi, decryptor.decrypt(path, data))

        print(
            "LCP: Successfully decrypted, exporting to {0}".format(outputname))
        file.close()
        return outputname

Exemple #11

0

Afficher le fichier

Fichier : archive_test.py Projet : Cloudxtreme/sparchive

 def test_parseextra(self):
     info = ZipInfo("foo")
     info.extra = struct.pack('<HHBl', 0x5455, 5, 1, 978307200)
     extra = Archive.parse_extra(info)
     assert_true(0x5455 in extra)
     assert_equal(extra[0x5455], struct.pack('<Bl', 1, 978307200))

Exemple #12

0

Afficher le fichier

def DecryptBook(bookId):
    global gCurBook

    print("[I] Decrypt  book: " + bookId + " [" + gBookData[bookId][0] + "]")

    encFile = os.path.join(gOutDir, ENC_BOOKS_DIR, bookId + EXT_EPUB)
    decFile = os.path.join(gOutDir, DEC_BOOKS_DIR, bookId + EXT_EPUB)

    if not os.path.isfile(encFile):
        print("[E]   File not found: " + encFile)
        return EResult.NO_GOOD

    if not CheckEpubIntegrity(bookId):
        print("[E]   Corrupted ePub file! (Re-download)")
        return EResult.NO_GOOD

    with closing(ZipFile(open(encFile, "rb"))) as inf:
        namelist = set(inf.namelist())

        if ENCRYPTION_XML not in namelist:
            print("[W]   Can't find " + ENCRYPTION_XML +
                  ". Assume it's DRM-free book")
            if os.path.isfile(decFile):
                os.remove(decFile)
            shutil.copyfile(encFile, decFile)
            return EResult.OKAY

        for name in META_NAMES:
            namelist.remove(name)

        try:
            # get book AES key from META-INF/encryption.xml
            encryption = etree.fromstring(inf.read(ENCRYPTION_XML))
            aesKeyB64 = encryption.findtext('.//enc:CipherValue', None, NSMAP)
            if aesKeyB64 is None:
                print("[E]   Can't find encrypted AES key!")
                return EResult.NO_GOOD

            for k in gRsaKeys:
                bookkey = k.decrypt(base64.b64decode(aesKeyB64))
                if bookkey is not None:
                    break

            if bookkey is None:
                print("[E]   Can't decrypt AES key!")
                return EResult.NO_GOOD

            gCurBook._id = bookId
            gCurBook._aeskey = ''.join(hex(x)[2:].zfill(2)
                                       for x in bookkey).upper()

            print("      AES KEY = {0}".format(gCurBook._aeskey))

            decryptor = Decryptor(bookkey, encryption)
            if len(decryptor._encFontIdpf) > 0:
                decryptor.SetBookUid(GetBookUid(decryptor, inf))
            opfs = GetOpfNamesFromEpub(inf)
            if len(opfs) > 1:
                print("[W]   Num of rootfile = " + str(len(opfs)))
            kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
            with closing(ZipFile(open(decFile, 'wb'), 'w', **kwds)) as outf:
                zi = ZipInfo(MIMETYPE)
                zi.compress_type = ZIP_STORED
                try:
                    # if the mimetype is present, get its info, including time-stamp
                    oldzi = inf.getinfo(MIMETYPE)
                    # copy across fields to be preserved
                    zi.date_time = oldzi.date_time
                    zi.comment = oldzi.comment
                    zi.extra = oldzi.extra
                    zi.internal_attr = oldzi.internal_attr
                    # external attributes are dependent on the create system, so copy both.
                    zi.external_attr = oldzi.external_attr
                    zi.create_system = oldzi.create_system
                except:
                    pass
                outf.writestr(zi, inf.read(MIMETYPE))

                # process files in ePub
                for path in namelist:
                    data = inf.read(path)
                    zi = ZipInfo(path)
                    zi.compress_type = ZIP_DEFLATED
                    try:
                        # get the file info, including time-stamp
                        oldzi = inf.getinfo(path)
                        # copy across useful fields
                        zi.date_time = oldzi.date_time
                        zi.comment = oldzi.comment
                        zi.extra = oldzi.extra
                        zi.internal_attr = oldzi.internal_attr
                        # external attributes are dependent on the create system, so copy both.
                        zi.external_attr = oldzi.external_attr
                        zi.create_system = oldzi.create_system
                    except:
                        pass
                    data = decryptor.decrypt(path, data)
                    if path in opfs:
                        if bookId in gTitleMap:
                            data = ChangeTitle(data, gTitleMap[bookId])
                        if bookId in gAuthorMap:
                            data = ChangeAuthor(data, gAuthorMap[bookId])
                        ShowBookInfo(data)
                    outf.writestr(zi, data)
        except Exception as e:
            print("[E]   Can't decrypt book! (" + str(e) + ")")
            if os.path.isfile(decFile):
                os.remove(decFile)
            return EResult.NO_GOOD

    RenameBook(bookId)
    SaveBookInfo()

    return EResult.OKAY

Exemple #13

0

Afficher le fichier

Fichier : _lean_zipfile.py Projet : cournape/zipfile2

    def get_zip_infos(self, *filenames):
        """Read in the table of contents for the ZIP file."""
        fp = self.fp
        max_file_count = self.max_file_count

        if not fp:
            raise RuntimeError(
                "Attempt to read ZIP archive that was already closed")

        filenames = set(filenames)
        if len(filenames) == 0:
            return

        try:
            endrec = _EndRecData(fp)
        except OSError:
            raise BadZipFile("File is not a zip file")
        if not endrec:
            raise BadZipFile("File is not a zip file")

        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory

        # "concat" is zero, unless zip was concatenated to another file
        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
            # If Zip64 extension structures are present, account for them
            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)

        # start_dir:  Position of start of central directory
        start_dir = offset_cd + concat
        fp.seek(start_dir, 0)
        data = fp.read(size_cd)
        fp = BytesIO(data)
        total = 0
        file_count = 0
        while total < size_cd:
            centdir = fp.read(sizeCentralDir)
            if len(centdir) != sizeCentralDir:
                raise BadZipFile("Truncated central directory")
            centdir = struct.unpack(structCentralDir, centdir)
            if centdir[_CD_SIGNATURE] != stringCentralDir:
                raise BadZipFile("Bad magic number for central directory")
            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
            flags = centdir[5]
            if flags & _UTF8_EXTENSION_FLAG:
                # UTF-8 file names extension
                filename = filename.decode('utf-8')
            else:
                # Historical ZIP filename encoding
                filename = filename.decode('cp437')
            # Create ZipInfo instance to store file information
            x = ZipInfo(filename)
            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
            (x.create_version, x.create_system, x.extract_version, x.reserved,
             x.flag_bits, x.compress_type, t, d,
             x.CRC, x.compress_size, x.file_size) = centdir[1:12]
            if x.extract_version > MAX_EXTRACT_VERSION:
                raise NotImplementedError("zip file version %.1f" %
                                          (x.extract_version / 10))
            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
            # Convert date/time code to (year, month, day, hour, min, sec)
            x._raw_time = t
            x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F,
                           t >> 11, (t >> 5) & 0x3F, (t & 0x1F) * 2)

            x._decodeExtra()
            x.header_offset = x.header_offset + concat

            # update total bytes read from central directory
            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
                     + centdir[_CD_EXTRA_FIELD_LENGTH]
                     + centdir[_CD_COMMENT_LENGTH])

            file_count += 1
            if max_file_count is not None and file_count > max_file_count:
                raise TooManyFiles('Too many files in egg')

            if x.filename in filenames:
                filenames.discard(x.filename)
                yield x

            if len(filenames) == 0:
                return

Exemple #14

0

Afficher le fichier

Fichier : _lean_zipfile.py Projet : fatihh92/password_creator

    def get_zip_infos(self, *filenames):
        """Read in the table of contents for the ZIP file."""
        fp = self.fp
        max_file_count = self.max_file_count

        if not fp:
            raise RuntimeError(
                "Attempt to read ZIP archive that was already closed")

        filenames = set(filenames)
        if len(filenames) == 0:
            return

        try:
            endrec = _EndRecData(fp)
        except OSError:
            raise BadZipFile("File is not a zip file")
        if not endrec:
            raise BadZipFile("File is not a zip file")

        size_cd = endrec[_ECD_SIZE]  # bytes in central directory
        offset_cd = endrec[_ECD_OFFSET]  # offset of central directory

        # "concat" is zero, unless zip was concatenated to another file
        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
            # If Zip64 extension structures are present, account for them
            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)

        # start_dir:  Position of start of central directory
        start_dir = offset_cd + concat
        fp.seek(start_dir, 0)
        data = fp.read(size_cd)
        fp = BytesIO(data)
        total = 0
        file_count = 0
        while total < size_cd:
            centdir = fp.read(sizeCentralDir)
            if len(centdir) != sizeCentralDir:
                raise BadZipFile("Truncated central directory")
            centdir = struct.unpack(structCentralDir, centdir)
            if centdir[_CD_SIGNATURE] != stringCentralDir:
                raise BadZipFile("Bad magic number for central directory")
            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
            flags = centdir[5]
            if flags & _UTF8_EXTENSION_FLAG:
                # UTF-8 file names extension
                filename = filename.decode('utf-8')
            else:
                # Historical ZIP filename encoding
                filename = filename.decode('cp437')
            # Create ZipInfo instance to store file information
            x = ZipInfo(filename)
            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
            (x.create_version, x.create_system, x.extract_version, x.reserved,
             x.flag_bits, x.compress_type, t, d, x.CRC, x.compress_size,
             x.file_size) = centdir[1:12]
            if x.extract_version > MAX_EXTRACT_VERSION:
                raise NotImplementedError("zip file version %.1f" %
                                          (x.extract_version / 10))
            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
            # Convert date/time code to (year, month, day, hour, min, sec)
            x._raw_time = t
            x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F, t >> 11,
                           (t >> 5) & 0x3F, (t & 0x1F) * 2)

            x._decodeExtra()
            x.header_offset = x.header_offset + concat

            # update total bytes read from central directory
            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] +
                     centdir[_CD_EXTRA_FIELD_LENGTH] +
                     centdir[_CD_COMMENT_LENGTH])

            file_count += 1
            if max_file_count is not None and file_count > max_file_count:
                raise TooManyFiles('Too many files in egg')

            if x.filename in filenames:
                filenames.discard(x.filename)
                yield x

            if len(filenames) == 0:
                return

Exemple #15

0

Afficher le fichier

Fichier : epubwatermark.py Projet : morrisxu/DeDRM_tools

def removeHTMLwatermarks(object, path_to_ebook):
    try:
        inf = ZipFile(open(path_to_ebook, 'rb'))
        namelist = inf.namelist()

        modded_names = []
        modded_contents = []

        count_adept = 0
        count_pocketbook = 0
        count_lemonink_invisible = 0
        count_lemonink_visible = 0
        lemonink_trackingID = None

        for file in namelist:
            if not (file.endswith('.html') or file.endswith('.xhtml')
                    or file.endswith('.xml')):
                continue

            try:
                file_str = inf.read(file).decode("utf-8")
                str_new = file_str

                # Remove Adobe ADEPT watermarks
                # Match optional newline at the beginning, then a "meta" tag with name = "Adept.expected.resource" or "Adept.resource"
                # and either a "value" or a "content" element with an Adobe UUID
                pre_remove = str_new
                str_new = re.sub(
                    r'((\r\n|\r|\n)\s*)?\<meta\s+name=\"(Adept\.resource|Adept\.expected\.resource)\"\s+(content|value)=\"urn:uuid:[0-9a-fA-F\-]+\"\s*\/>',
                    '', str_new)
                str_new = re.sub(
                    r'((\r\n|\r|\n)\s*)?\<meta\s+(content|value)=\"urn:uuid:[0-9a-fA-F\-]+\"\s+name=\"(Adept\.resource|Adept\.expected\.resource)\"\s*\/>',
                    '', str_new)

                if (str_new != pre_remove):
                    count_adept += 1

                # Remove Pocketbook watermarks
                pre_remove = str_new
                str_new = re.sub(
                    r'\<div style\=\"padding\:0\;border\:0\;text\-indent\:0\;line\-height\:normal\;margin\:0 1cm 0.5cm 1cm\;[^\"]*opacity:0.0\;[^\"]*text\-decoration\:none\;[^\"]*background\:none\;[^\"]*\"\>(.*?)\<\/div\>',
                    '', str_new)

                if (str_new != pre_remove):
                    count_pocketbook += 1

                # Remove eLibri / LemonInk watermark
                # Run this in a loop, as it is possible a file has been watermarked twice ...
                while True:
                    pre_remove = str_new
                    unique_id = re.search(
                        r'<body[^>]+class="[^"]*(t0x[0-9a-fA-F]{25})[^"]*"[^>]*>',
                        str_new)
                    if (unique_id):
                        lemonink_trackingID = unique_id.groups()[0]
                        count_lemonink_invisible += 1
                        str_new = re.sub(lemonink_trackingID, '', str_new)
                        pre_remove = str_new
                        pm = r'(<body[^>]+class="[^"]*"[^>]*>)'
                        pm += r'\<div style\=\'padding\:0\;border\:0\;text\-indent\:0\;line\-height\:normal\;margin\:0 1cm 0.5cm 1cm\;[^\']*text\-decoration\:none\;[^\']*background\:none\;[^\']*\'\>(.*?)</div>'
                        pm += r'\<div style\=\'padding\:0\;border\:0\;text\-indent\:0\;line\-height\:normal\;margin\:0 1cm 0.5cm 1cm\;[^\']*text\-decoration\:none\;[^\']*background\:none\;[^\']*\'\>(.*?)</div>'
                        str_new = re.sub(pm, r'\1', str_new)

                        if (str_new != pre_remove):
                            count_lemonink_visible += 1
                    else:
                        break

            except:
                traceback.print_exc()
                continue

            if (file_str == str_new):
                continue

            modded_names.append(file)
            modded_contents.append(str_new)

        if len(modded_names) == 0:
            # No file modified, return original
            return path_to_ebook

        if len(modded_names) != len(modded_contents):
            # Something went terribly wrong, return original
            print("Watermark: Error during watermark removal")
            return path_to_ebook

        # Re-package with modified files:
        namelist.remove("mimetype")

        try:
            output = object.temporary_file(".epub").name
            kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
            with closing(ZipFile(open(output, 'wb'), 'w', **kwds)) as outf:
                for path in (["mimetype"] + namelist):

                    data = inf.read(path)

                    try:
                        modded_index = None
                        modded_index = modded_names.index(path)
                    except:
                        pass

                    if modded_index is not None:
                        # Found modified file - replace contents
                        data = modded_contents[modded_index]

                    zi = ZipInfo(path)
                    oldzi = inf.getinfo(path)
                    try:
                        zi.compress_type = oldzi.compress_type
                        if path == "mimetype":
                            zi.compress_type = ZIP_STORED
                        zi.date_time = oldzi.date_time
                        zi.comment = oldzi.comment
                        zi.extra = oldzi.extra
                        zi.internal_attr = oldzi.internal_attr
                        zi.external_attr = oldzi.external_attr
                        zi.create_system = oldzi.create_system
                        if any(ord(c) >= 128 for c in path) or any(
                                ord(c) >= 128 for c in zi.comment):
                            # If the file name or the comment contains any non-ASCII char, set the UTF8-flag
                            zi.flag_bits |= 0x800
                    except:
                        pass

                    outf.writestr(zi, data)
        except:
            traceback.print_exc()
            return path_to_ebook

        if (count_adept > 0):
            print(
                "Watermark: Successfully stripped {0} ADEPT watermark(s) from ebook."
                .format(count_adept))

        if (count_lemonink_invisible > 0 or count_lemonink_visible > 0):
            print(
                "Watermark: Successfully stripped {0} visible and {1} invisible LemonInk watermark(s) (\"{2}\") from ebook."
                .format(count_lemonink_visible, count_lemonink_invisible,
                        lemonink_trackingID))

        if (count_pocketbook > 0):
            print(
                "Watermark: Successfully stripped {0} Pocketbook watermark(s) from ebook."
                .format(count_pocketbook))

        return output

    except:
        traceback.print_exc()
        return path_to_ebook

Exemple #16

0

Afficher le fichier

Fichier : remotezip.py Projet : WSDOT-GIS/faa-dof-gp

    def _RealGetContents(self):
        """Read in the table of contents for the ZIP file."""
        try:
            endrec = _EndRecData(self.url)
        except IOError:
            raise BadZipfile("File is not a zip file")
        if not endrec:
            raise BadZipfile, "File is not a zip file"
        if self.debug > 1:
            print endrec
        size_cd = endrec[_ECD_SIZE]  # bytes in central directory
        offset_cd = endrec[_ECD_OFFSET]  # offset of central directory
        self.comment = endrec[_ECD_COMMENT]  # archive comment

        # "concat" is zero, unless zip was concatenated to another file
        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
        # if endrec[_ECD_SIGNATURE] == stringEndArchive64:
        #   # If Zip64 extension structures are present, account for them
        #   concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)

        if self.debug > 2:
            inferred = concat + offset_cd
            print "given, inferred, offset", offset_cd, inferred, concat
            # self.start_dir:  Position of start of central directory
        self.start_dir = offset_cd + concat
        ECD = _http_get_partial_data(self.url, self.start_dir, self.start_dir + size_cd - 1)
        data = ECD.read()
        ECD.close()
        fp = cStringIO.StringIO(data)
        total = 0
        while total < size_cd:
            centdir = fp.read(sizeCentralDir)
            if centdir[0:4] != stringCentralDir:
                raise BadZipfile, "Bad magic number for central directory"
            centdir = struct.unpack(structCentralDir, centdir)
            if self.debug > 2:
                print centdir
            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
            # Create ZipInfo instance to store file information
            x = ZipInfo(filename)
            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
            (
                x.create_version,
                x.create_system,
                x.extract_version,
                x.reserved,
                x.flag_bits,
                x.compress_type,
                t,
                d,
                x.CRC,
                x.compress_size,
                x.file_size,
            ) = centdir[1:12]
            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
            # Convert date/time code to (year, month, day, hour, min, sec)
            x._raw_time = t
            x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F, t >> 11, (t >> 5) & 0x3F, (t & 0x1F) * 2)

            x._decodeExtra()
            x.header_offset = x.header_offset + concat
            x.filename = x._decodeFilename()
            self.filelist.append(x)
            self.NameToInfo[x.filename] = x

            # update total bytes read from central directory
            total = (
                total
                + sizeCentralDir
                + centdir[_CD_FILENAME_LENGTH]
                + centdir[_CD_EXTRA_FIELD_LENGTH]
                + centdir[_CD_COMMENT_LENGTH]
            )

        if self.debug > 2:
            print "total", total

Exemple #17

0

Afficher le fichier

    def next(self):
        """Return the next member of the archive as a ZipInfo object. Returns
        None if there is no more available. This method is analogous to
        TarFile.next().

        We construct a ZipInfo object using the information stored in the next file header.
        The logic here is based on the implementation of ZipFile._RealGetContents(), which
        constructs a ZipInfo object from information in a central directory file header, but
        modified to work with the file-header-specific struct
        (for the implementation of ZipFile._RealGetContents(), see
        https://github.com/python/cpython/blob/048f54dc75d51e8a1c5822ab7b2828295192aaa5/Lib/zipfile.py#L1316).
        """
        fp = self.fp

        # First, advance to the next header, if needed.
        fp.read(self._next_header_pos - fp.tell())

        # Read the next header.
        fheader = fp.read(sizeFileHeader)
        if len(fheader) != sizeFileHeader:
            raise BadZipFile("Truncated file header")
        fheader = struct.unpack(structFileHeader, fheader)
        if fheader[_FH_SIGNATURE] == stringCentralDir:
            # We've reached the central directory. This means that we've finished iterating through
            # all entries in the zip file. We can do this check because the file header signature
            # and central directory signature are stored in the same spot (index 0) and with the same format.
            self._loaded = True
            return None
        if fheader[_FH_SIGNATURE] != stringFileHeader:
            raise BadZipFile("Bad magic number for file header")
        filename = fp.read(fheader[_FH_FILENAME_LENGTH])
        flags = fheader[_FH_GENERAL_PURPOSE_FLAG_BITS]
        if flags & 0x800:
            # UTF-8 file names extension
            filename = filename.decode('utf-8')
        else:
            # Historical ZIP filename encoding
            filename = filename.decode('cp437')
        # Create ZipInfo instance to store file information
        x = ZipInfo(filename)
        x.extra = fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
        x.header_offset = self._next_header_pos

        # The file header stores nearly all the same information needed for ZipInfo as what the
        # central directory file header stores, except for a couple of missing fields.
        # We just set them to 0 here.
        x.comment = 0
        x.create_version, x.create_system = 0, 0
        x.volume, x.internal_attr, x.external_attr = 0, 0, 0

        (x.extract_version, x.reserved, x.flag_bits, x.compress_type, t, d,
         x.CRC, x.compress_size, x.file_size) = fheader[1:10]
        if x.extract_version > MAX_EXTRACT_VERSION:
            raise NotImplementedError("zip file version %.1f" %
                                      (x.extract_version / 10))

        # Convert date/time code to (year, month, day, hour, min, sec)
        # This comes from the original cpython code.
        x._raw_time = t
        x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F, t >> 11,
                       (t >> 5) & 0x3F, (t & 0x1F) * 2)

        x._decodeExtra()
        self.filelist.append(x)
        self.NameToInfo[x.filename] = x
        self._next_header_pos = (fp.tell() + x.compress_size
                                 )  # Beginning of the next file's header.
        return x