def test_testzip_missing_hash(wheel_path): with ZipFile(wheel_path, 'w') as zf: zf.writestr(native('hello/héllö.py'), as_bytes('print("Héllö, world!")\n')) zf.writestr('test-1.0.dist-info/RECORD', '') with WheelFile(wheel_path) as wf: exc = pytest.raises(WheelError, wf.testzip) exc.match(native("^No hash found for file 'hello/héllö.py'$"))
def test_testzip_bad_hash(wheel_path): with ZipFile(wheel_path, 'w') as zf: zf.writestr(native('hello/héllö.py'), as_bytes('print("Héllö, w0rld!")\n')) zf.writestr( 'test-1.0.dist-info/RECORD', as_bytes('hello/héllö.py,sha256=bv-QV3RciQC2v3zL8Uvhd_arp40J5A9xmyubN34OVwo,25')) with WheelFile(wheel_path) as wf: exc = pytest.raises(WheelError, wf.testzip) exc.match(native("^Hash mismatch for file 'hello/héllö.py'$"))
def test_write_str(wheel_path): with WheelFile(wheel_path, 'w') as wf: wf.writestr(native('hello/héllö.py'), as_bytes('print("Héllö, world!")\n')) with ZipFile(wheel_path, 'r') as zf: infolist = zf.infolist() assert len(infolist) == 2 assert infolist[0].filename == native('hello/héllö.py') assert infolist[0].file_size == 25 assert infolist[1].filename == 'test-1.0.dist-info/RECORD' record = zf.read('test-1.0.dist-info/RECORD') assert record == as_bytes( 'hello/héllö.py,sha256=bv-QV3RciQC2v3zL8Uvhd_arp40J5A9xmyubN34OVwo,25\n' 'test-1.0.dist-info/RECORD,,\n')
def test_missing_record(wheel_path): with ZipFile(wheel_path, 'w') as zf: zf.writestr(native('hello/héllö.py'), as_bytes('print("Héllö, w0rld!")\n')) exc = pytest.raises(WheelError, WheelFile, wheel_path) exc.match("^Missing test-1.0.dist-info/RECORD file$")
def write_record(self, bdist_dir, distinfo_dir): from wheel.util import urlsafe_b64encode record_path = os.path.join(distinfo_dir, "RECORD") record_relpath = os.path.relpath(record_path, bdist_dir) def walk(): for dir, dirs, files in os.walk(bdist_dir): dirs.sort() for f in sorted(files): yield os.path.join(dir, f) def skip(path): """Wheel hashes every possible file.""" return path == record_relpath with open_for_csv(record_path, "w+") as record_file: writer = csv.writer(record_file) for path in walk(): relpath = os.path.relpath(path, bdist_dir) if skip(relpath): hash = "" size = "" else: with open(path, "rb") as f: data = f.read() digest = hashlib.sha256(data).digest() hash = "sha256=" + native(urlsafe_b64encode(digest)) size = len(data) record_path = os.path.relpath(path, bdist_dir).replace( os.path.sep, "/") writer.writerow((record_path, hash, size))
def write_record(bdist_dir, distinfo_dir): record_path = os.path.join(distinfo_dir, 'RECORD') record_relpath = os.path.relpath(record_path, bdist_dir) def walk(): for dir, dirs, files in os.walk(bdist_dir): dirs.sort() for f in sorted(files): yield os.path.join(dir, f) def skip(path): """Wheel hashes every possible file.""" return (path == record_relpath) with open_for_csv(record_path, 'w+') as record_file: writer = csv.writer(record_file) for path in walk(): relpath = os.path.relpath(path, bdist_dir) if skip(relpath): hash = '' size = '' else: with open(path, 'rb') as f: data = f.read() digest = hashlib.sha256(data).digest() hash = 'sha256=' + native(urlsafe_b64encode(digest)) size = len(data) record_path = os.path.relpath(path, bdist_dir).replace(os.path.sep, '/') writer.writerow((record_path, hash, size))
def open(self, name_or_info, mode="r", pwd=None): def _update_crc(newdata, eof=None): if eof is None: eof = ef._eof update_crc_orig(newdata) else: # Python 2 update_crc_orig(newdata, eof) running_hash.update(newdata) if eof and running_hash.digest() != expected_hash: raise WheelError("Hash mismatch for file '{}'".format( native(ef_name))) ef = super(WheelFile, self).open(name_or_info, mode, pwd) ef_name = as_unicode(name_or_info.filename if isinstance( name_or_info, ZipInfo) else name_or_info) if mode == 'r' and not ef_name.endswith('/'): if ef_name not in self._file_hashes: raise WheelError("No hash found for file '{}'".format( native(ef_name))) algorithm, expected_hash = self._file_hashes[ef_name] if expected_hash is not None: # Monkey patch the _update_crc method to also check for the hash from RECORD running_hash = hashlib.new(algorithm) update_crc_orig, ef._update_crc = ef._update_crc, _update_crc return ef
def save(self): # Try not to call this a very long time after load() path = save_config_path('wheel') conf = os.path.join(native(path), self.CONFIG_NAME) with open(conf, 'w+') as out: json.dump(self.data, out, indent=2) return self
def open(self, name_or_info, mode="r", pwd=None): def _update_crc(newdata, eof=None): if eof is None: eof = ef._eof update_crc_orig(newdata) else: # Python 2 update_crc_orig(newdata, eof) running_hash.update(newdata) if eof and running_hash.digest() != expected_hash: raise WheelError("Hash mismatch for file '{}'".format(native(ef_name))) ef = super(WheelFile, self).open(name_or_info, mode, pwd) ef_name = as_unicode(name_or_info.filename if isinstance(name_or_info, ZipInfo) else name_or_info) if mode == 'r' and not ef_name.endswith('/'): if ef_name not in self._file_hashes: raise WheelError("No hash found for file '{}'".format(native(ef_name))) algorithm, expected_hash = self._file_hashes[ef_name] if expected_hash is not None: # Monkey patch the _update_crc method to also check for the hash from RECORD running_hash = hashlib.new(algorithm) update_crc_orig, ef._update_crc = ef._update_crc, _update_crc return ef
def write_record(self, bdist_dir, distinfo_dir): from wheel.util import urlsafe_b64encode record_path = os.path.join(distinfo_dir, 'RECORD') record_relpath = os.path.relpath(record_path, bdist_dir) def walk(): for dir, dirs, files in os.walk(bdist_dir): dirs.sort() for f in sorted(files): yield os.path.join(dir, f) def skip(path): """Wheel hashes every possible file.""" return (path == record_relpath) with open_for_csv(record_path, 'w+') as record_file: writer = csv.writer(record_file) for path in walk(): relpath = os.path.relpath(path, bdist_dir) if skip(relpath): hash = '' size = '' else: with open(path, 'rb') as f: data = f.read() digest = hashlib.sha256(data).digest() hash = 'sha256=' + native(urlsafe_b64encode(digest)) size = len(data) record_path = os.path.relpath( path, bdist_dir).replace(os.path.sep, '/') writer.writerow((record_path, hash, size))
def test_timestamp(tmpdir_factory, wheel_path, monkeypatch): # An environment variable can be used to influence the timestamp on # TarInfo objects inside the zip. See issue #143. build_dir = tmpdir_factory.mktemp('build') for filename in ('one', 'two', 'three'): build_dir.join(filename).write(filename + '\n') # The earliest date representable in TarInfos, 1980-01-01 monkeypatch.setenv(native('SOURCE_DATE_EPOCH'), native('315576060')) with WheelFile(wheel_path, 'w') as wf: wf.write_files(str(build_dir)) with ZipFile(wheel_path, 'r') as zf: for info in zf.infolist(): assert info.date_time[:3] == (1980, 1, 1) assert info.compress_type == ZIP_DEFLATED
def test_weak_hash_algorithm(wheel_path, algorithm, digest): hash_string = '{}={}'.format(algorithm, digest) with ZipFile(wheel_path, 'w') as zf: zf.writestr(native('hello/héllö.py'), as_bytes('print("Héllö, w0rld!")\n')) zf.writestr('test-1.0.dist-info/RECORD', as_bytes('hello/héllö.py,{},25'.format(hash_string))) exc = pytest.raises(WheelError, WheelFile, wheel_path) exc.match(r"^Weak hash algorithm \({}\) is not permitted by PEP 427$".format(algorithm))
def test_testzip(wheel_path, algorithm, digest): hash_string = '{}={}'.format(algorithm, digest) with ZipFile(wheel_path, 'w') as zf: zf.writestr(native('hello/héllö.py'), as_bytes('print("Héllö, world!")\n')) zf.writestr('test-1.0.dist-info/RECORD', as_bytes('hello/héllö.py,{},25'.format(hash_string))) with WheelFile(wheel_path) as wf: wf.testzip()
def test_unsupported_hash_algorithm(wheel_path): with ZipFile(wheel_path, 'w') as zf: zf.writestr(native('hello/héllö.py'), as_bytes('print("Héllö, w0rld!")\n')) zf.writestr( 'test-1.0.dist-info/RECORD', as_bytes('hello/héllö.py,sha000=bv-QV3RciQC2v3zL8Uvhd_arp40J5A9xmyubN34OVwo,25')) exc = pytest.raises(WheelError, WheelFile, wheel_path) exc.match("^Unsupported hash algorithm: sha000$")
def _record_digest(self, data): '''Returns a three tuple of hash, size and digest.''' from wheel.util import urlsafe_b64encode digest = hashlib.sha256(data).digest() hash_text = 'sha256=' + native(urlsafe_b64encode(digest)) size = len(data) return (hash_text, size, digest)
def writestr(self, zinfo_or_arcname, bytes, compress_type=None): ZipFile.writestr(self, zinfo_or_arcname, bytes, compress_type) fname = (zinfo_or_arcname.filename if isinstance(zinfo_or_arcname, ZipInfo) else zinfo_or_arcname) logger.info("adding '%s'", fname) if fname != self.record_path and not fname.endswith('/'): hash_ = self._default_algorithm(bytes) self._file_hashes[fname] = hash_.name, native(urlsafe_b64encode(hash_.digest())) self._file_sizes[fname] = len(bytes)
def writestr(self, zinfo_or_arcname, bytes, compress_type=None): super(WheelFile, self).writestr(zinfo_or_arcname, bytes, compress_type) fname = (zinfo_or_arcname.filename if isinstance(zinfo_or_arcname, ZipInfo) else zinfo_or_arcname) logger.info("adding '%s'", fname) if fname != self.record_path: hash_ = self._default_algorithm(bytes) self._file_hashes[fname] = hash_.name, native(urlsafe_b64encode(hash_.digest())) self._file_sizes[fname] = len(bytes)
def verify(self, zipfile=None): """Configure the VerifyingZipFile `zipfile` by verifying its signature and setting expected hashes for every hash in RECORD. Caller must complete the verification process by completely reading every file in the archive (e.g. with extractall).""" sig = None if zipfile is None: zipfile = self.zipfile zipfile.strict = True record_name = '/'.join((self.distinfo_name, 'RECORD')) sig_name = '/'.join((self.distinfo_name, 'RECORD.jws')) # tolerate s/mime signatures: smime_sig_name = '/'.join((self.distinfo_name, 'RECORD.p7s')) zipfile.set_expected_hash(record_name, None) zipfile.set_expected_hash(sig_name, None) zipfile.set_expected_hash(smime_sig_name, None) record = zipfile.read(record_name) record_digest = urlsafe_b64encode(hashlib.sha256(record).digest()) try: sig = from_json(native(zipfile.read(sig_name))) except KeyError: # no signature pass if sig: headers, payload = signatures.verify(sig) if payload['hash'] != "sha256=" + native(record_digest): msg = "RECORD.sig claimed RECORD hash {0} != computed hash {1}." raise BadWheelFile( msg.format(payload['hash'], native(record_digest))) reader = csv.reader((native(r) for r in record.splitlines())) for row in reader: filename = row[0] hash = row[1] if not hash: if filename not in (record_name, sig_name): sys.stderr.write("%s has no hash!\n" % filename) continue algo, data = row[1].split('=', 1) assert algo == "sha256", "Unsupported hash algorithm" zipfile.set_expected_hash(filename, urlsafe_b64decode(binary(data)))
def verify(self, zipfile=None): """Configure the VerifyingZipFile `zipfile` by verifying its signature and setting expected hashes for every hash in RECORD. Caller must complete the verification process by completely reading every file in the archive (e.g. with extractall).""" sig = None if zipfile is None: zipfile = self.zipfile zipfile.strict = True record_name = '/'.join((self.distinfo_name, 'RECORD')) sig_name = '/'.join((self.distinfo_name, 'RECORD.jws')) # tolerate s/mime signatures: smime_sig_name = '/'.join((self.distinfo_name, 'RECORD.p7s')) zipfile.set_expected_hash(record_name, None) zipfile.set_expected_hash(sig_name, None) zipfile.set_expected_hash(smime_sig_name, None) record = zipfile.read(record_name) record_digest = urlsafe_b64encode(hashlib.sha256(record).digest()) try: sig = from_json(native(zipfile.read(sig_name))) except KeyError: # no signature pass if sig: headers, payload = signatures.verify(sig) if payload['hash'] != "sha256=" + native(record_digest): msg = "RECORD.sig claimed RECORD hash {0} != computed hash {1}." raise BadWheelFile(msg.format(payload['hash'], native(record_digest))) reader = csv.reader((native(r) for r in record.splitlines())) for row in reader: filename = row[0] hash = row[1] if not hash: if filename not in (record_name, sig_name): sys.stderr.write("%s has no hash!\n" % filename) continue algo, data = row[1].split('=', 1) assert algo == "sha256", "Unsupported hash algorithm" zipfile.set_expected_hash(filename, urlsafe_b64decode(binary(data)))
def _update_crc(newdata, eof=None): if eof is None: eof = ef._eof update_crc_orig(newdata) else: # Python 2 update_crc_orig(newdata, eof) running_hash.update(newdata) if eof and running_hash.digest() != expected_hash: raise WheelError("Hash mismatch for file '{}'".format(native(ef_name)))
def iter_files(self): record_names = glob.glob(os.path.join(self.path, '*.dist-info/RECORD')) if len(record_names) != 1: raise ValueError("Should be exactly one `*.dist_info` directory") with open(record_names[0]) as f: record = f.read() reader = csv.reader((native(r) for r in record.splitlines())) for row in reader: filename = row[0] yield filename
def close(self): # Write RECORD if self.fp is not None and self.mode == 'w' and self._file_hashes: content = '\n'.join('{},{}={},{}'.format(fname, algorithm, hash_, self._file_sizes[fname]) for fname, (algorithm, hash_) in self._file_hashes.items()) content += '\n{},,\n'.format(self.record_path) zinfo = ZipInfo(native(self.record_path), date_time=get_zipinfo_datetime()) zinfo.compress_type = ZIP_DEFLATED self.writestr(zinfo, as_bytes(content)) super(WheelFile, self).close()
def close(self): # Write RECORD if self.fp is not None and self.mode == 'w' and self._file_hashes: content = '\n'.join( '{},{}={},{}'.format(fname, algorithm, hash_, self._file_sizes[fname]) for fname, (algorithm, hash_) in self._file_hashes.items()) content += '\n{},,\n'.format(self.record_path) zinfo = ZipInfo(native(self.record_path), date_time=get_zipinfo_datetime()) zinfo.compress_type = ZIP_DEFLATED self.writestr(zinfo, as_bytes(content)) super(WheelFile, self).close()
def writestr(self, zinfo_or_arcname, bytes, compress_type=None): try: zinfo_or_arcname = zinfo_or_arcname.replace('\\', '/') except: pass ZipFile.writestr(self, zinfo_or_arcname, bytes, compress_type) fname = (zinfo_or_arcname.filename if isinstance( zinfo_or_arcname, ZipInfo) else zinfo_or_arcname) logger.info("adding '%s'", fname) print("adding: '%s'" % fname) if fname != self.record_path: hash_ = self._default_algorithm(bytes) self._file_hashes[fname] = hash_.name, native( urlsafe_b64encode(hash_.digest())) self._file_sizes[fname] = len(bytes)
def close(self): # Write RECORD if self.fp is not None and self.mode == "w" and self._file_hashes: content = "\n".join( "{},{}={},{}".format(fname, algorithm, hash_, self._file_sizes[fname]) for fname, (algorithm, hash_) in self._file_hashes.items()) content += "\n{},,\n".format(self.record_path) zinfo = ZipInfo(native(self.record_path), date_time=get_zipinfo_datetime()) zinfo.compress_type = ZIP_DEFLATED zinfo.external_attr = 0o664 << 16 self.writestr(zinfo, as_bytes(content)) ZipFile.close(self)
def rewrite_record(bdist_dir): """ Rewrite RECORD file with hashes for all files in `wheel_sdir` Copied from :method:`wheel.bdist_wheel.bdist_wheel.write_record` Will also unsign wheel Parameters ---------- bdist_dir : str Path of unpacked wheel file """ info_dirs = glob.glob(pjoin(bdist_dir, '*.dist-info')) if len(info_dirs) != 1: raise WheelToolsError("Should be exactly one `*.dist_info` directory") record_path = pjoin(info_dirs[0], 'RECORD') record_relpath = relpath(record_path, bdist_dir) # Unsign wheel - because we're invalidating the record hash sig_path = pjoin(info_dirs[0], 'RECORD.jws') if exists(sig_path): os.unlink(sig_path) def walk(): for dir, dirs, files in os.walk(bdist_dir): for f in files: yield pjoin(dir, f) def skip(path): """Wheel hashes every possible file.""" return (path == record_relpath) with _open_for_csv(record_path, 'w+') as record_file: writer = csv.writer(record_file) for path in walk(): relative_path = relpath(path, bdist_dir) if skip(relative_path): hash = '' size = '' else: with open(path, 'rb') as f: data = f.read() digest = hashlib.sha256(data).digest() hash = 'sha256=' + native(urlsafe_b64encode(digest)) size = len(data) path_for_record = relpath( path, bdist_dir).replace(psep, '/') writer.writerow((path_for_record, hash, size))
def load(self): # XXX JSON is not a great database for path in load_config_paths('wheel'): conf = os.path.join(native(path), self.CONFIG_NAME) if os.path.exists(conf): with open(conf, 'r') as infile: self.data = json.load(infile) for x in ('signers', 'verifiers'): if not x in self.data: self.data[x] = [] if 'schema' not in self.data: self.data['schema'] = self.SCHEMA elif self.data['schema'] != self.SCHEMA: raise ValueError( "Bad wheel.json version {0}, expected {1}".format( self.data['schema'], self.SCHEMA)) break return self
def close(self): # Write RECORD if self.fp is not None and self.mode == "w" and self._file_hashes: data = StringIO() writer = csv.writer(data, delimiter=",", quotechar='"', lineterminator="\n") writer.writerows( ( (fname, algorithm + "=" + hash_, self._file_sizes[fname]) for fname, (algorithm, hash_) in self._file_hashes.items() ) ) writer.writerow((format(self.record_path), "", "")) zinfo = ZipInfo(native(self.record_path), date_time=get_zipinfo_datetime()) zinfo.compress_type = self.compression zinfo.external_attr = 0o664 << 16 self.writestr(zinfo, as_bytes(data.getvalue())) ZipFile.close(self)
def rewrite_record(bdist_dir): """ Rewrite RECORD file with hashes for all files in `wheel_sdir` Copied from :method:`wheel.bdist_wheel.bdist_wheel.write_record` Will also unsign wheel Parameters ---------- bdist_dir : str Path of unpacked wheel file """ info_dir = _dist_info_dir(bdist_dir) record_path = pjoin(info_dir, "RECORD") record_relpath = relpath(record_path, bdist_dir) # Unsign wheel - because we're invalidating the record hash sig_path = pjoin(info_dir, "RECORD.jws") if exists(sig_path): os.unlink(sig_path) def walk(): for dir, dirs, files in os.walk(bdist_dir): for f in files: yield pjoin(dir, f) def skip(path): """Wheel hashes every possible file.""" return path == record_relpath with open_for_csv(record_path, "w+") as record_file: writer = csv.writer(record_file) for path in walk(): relative_path = relpath(path, bdist_dir) if skip(relative_path): hash = "" size = "" else: with open(path, "rb") as f: data = f.read() digest = hashlib.sha256(data).digest() hash = "sha256=" + native(urlsafe_b64encode(digest)) size = len(data) record_path = relpath(path, bdist_dir).replace(psep, "/") writer.writerow((record_path, hash, size))
def rewrite_record(bdist_dir): """ Rewrite RECORD file with hashes for all files in `wheel_sdir` Copied from :method:`wheel.bdist_wheel.bdist_wheel.write_record` Will also unsign wheel Parameters ---------- bdist_dir : str Path of unpacked wheel file """ info_dir = _dist_info_dir(bdist_dir) record_path = pjoin(info_dir, 'RECORD') record_relpath = relpath(record_path, bdist_dir) # Unsign wheel - because we're invalidating the record hash sig_path = pjoin(info_dir, 'RECORD.jws') if exists(sig_path): os.unlink(sig_path) def walk(): for dir, dirs, files in os.walk(bdist_dir): for f in files: yield pjoin(dir, f) def skip(path): """Wheel hashes every possible file.""" return (path == record_relpath) with open_for_csv(record_path, 'w+') as record_file: writer = csv.writer(record_file) for path in walk(): relative_path = relpath(path, bdist_dir) if skip(relative_path): hash = '' size = '' else: with open(path, 'rb') as f: data = f.read() digest = hashlib.sha256(data).digest() hash = 'sha256=' + native(urlsafe_b64encode(digest)) size = len(data) record_path = relpath(path, bdist_dir).replace(psep, '/') writer.writerow((record_path, hash, size))
def close(self): # Write RECORD if self.fp is not None and self.mode == 'w' and self._file_hashes: data = StringIO() writer = csv.writer(data, delimiter=',', quotechar='"', lineterminator='\n') writer.writerows(( ( fname, algorithm + "=" + hash_, self._file_sizes[fname] ) for fname, (algorithm, hash_) in self._file_hashes.items() )) writer.writerow((format(self.record_path), "", "")) zinfo = ZipInfo(native(self.record_path), date_time=get_zipinfo_datetime()) zinfo.compress_type = ZIP_DEFLATED zinfo.external_attr = 0o664 << 16 self.writestr(zinfo, as_bytes(data.getvalue())) ZipFile.close(self)
def verify_wheel(wheelfile): wf = WheelFile(wheelfile) sig_name = wf.distinfo_name + '/RECORD.jws' try: sig = json.loads(native(wf.zipfile.open(sig_name).read())) except KeyError: raise WheelValidationFailed("This wheel is not signed") verified = signatures.verify(sig) try: vk = verified[0][0]['jwk']['vk'] except (KeyError, IndexError, ValueError): raise WheelValidationFailed("Invalid signature") if vk != settings.WHEEL_USER: raise WheelValidationFailed("Wheel validation failed") kr = keyring.get_keyring() password = kr.get_password("wheel", settings.WHEEL_USER) if password != settings.WHEEL_PASSWORD: raise WheelValidationFailed("Wheel validation failed")
def iter_files(self): files = os.path.join(self.path, 'info', 'files') with open(files) as f: return [native(l.strip()) for l in f.readlines()]