def test_replace_metadata(self): old_zip_file = get_test_zipfile('LotsOfFiles') metadata = models.parse_zipfile_metadata(old_zip_file) old_zip_file.seek(0) extension = models.Extension.objects.create_from_metadata(metadata, creator=self.user) version = models.ExtensionVersion(extension=extension, source=File(old_zip_file)) version.parse_metadata_json(metadata) new_zip = version.get_zipfile('r') old_zip = ZipFile(File(old_zip_file), 'r') self.assertEqual(len(old_zip.infolist()), len(new_zip.infolist())) self.assertEqual(new_zip.read("metadata.json"), version.make_metadata_json_string()) for old_info in old_zip.infolist(): if old_info.filename == "metadata.json": continue new_info = new_zip.getinfo(old_info.filename) self.assertEqual(old_zip.read(old_info), new_zip.read(new_info)) self.assertEqual(old_info.date_time, new_info.date_time) old_zip.close() new_zip.close()
class RemoteZipFile(object): """A potentially remote ZIP file""" def __init__(self, name_or_url): # Holes smaller than 5MB will be read anyway. if name_or_url.startswith("http:"): self._f = httpopen(name_or_url) self._use_read_buffer = True # prefetch the last MB to capture most of the index self._f.prefetch([(self._f.size - 1024 * 1024, self._f.size)]) else: self._f = open(name_or_url) self._use_read_buffer = False self._zf = ZipFile(self._f) if self._use_read_buffer: self._sinfo = sorted((i.header_offset, i) for i in self._zf.infolist()) self._dict = dict((i.filename, i) for i in self._zf.infolist()) def keys(self): return self._zf.namelist() def require(self, required): if self._use_read_buffer: def get_block_range(block_id): s = self._sinfo[block_id][1].header_offset if block_id != len(self._sinfo) - 1: e = self._sinfo[block_id + 1][1].header_offset else: e = self._f.size - 1 return (s, e) blocks = [j for j, (_, i) in enumerate(self._sinfo) if i.filename in required] read_blocks = [] for i in blocks: if not read_blocks: read_blocks.append(get_block_range(i)) else: start, end = read_blocks[-1] b_start, b_end = get_block_range(i) if b_start > end + MIN_HOLE_SIZE: read_blocks.append((b_start, b_end)) else: read_blocks[-1] = (start, b_end) self._f.prefetch(read_blocks) rset = set(required) for i in self._zf.infolist(): if i.filename in rset: rset.remove(i.filename) x = self._zf.open(i) write(1, struct.pack("i", len(i.filename))) write(1, i.filename) write(1, struct.pack("i", i.file_size)) write(1, x.read()) # x.read1(i.file_size) # x.read()#1(i.file_size) assert not rset, rset
class ExtractFile(object): def __init__(self, fp, work_path, regex_util=None, show_info=False, progress=None, eu_text=None): self.fp = fp self.work_path = work_path self.zf = ZipFile(self.fp, mode="r") self.regex_util = regex_util self.progress = progress self.uncompress_size = sum((f.file_size for f in self.zf.infolist())) self.show_info = show_info self.eu_text = eu_text def clean_work_path(self): if os.path.exists(self.work_path): shutil.rmtree(self.work_path) os.mkdir(self.work_path) def extract(self): members = self.zf.infolist() if self.show_info: self.start_extract() total = 0 for zip_info in members: total += zip_info.file_size if not self.regex_util or not self.regex_util.do_match(spit_filename(zip_info.filename, True)): if len(zip_info.filename) + len(self.work_path) + 1 < 255: self.zf.extract(zip_info.filename, self.work_path) if self.eu_text and spit_filename(zip_info.filename, True) == "important.properties": file_path = os.path.join(self.work_path, zip_info.filename) try: cf = MyConfigParser(file_path, file_error=True) regrex = ["\$\{" + key + "\}" for key in cf.keys()] self.eu_text.add_regex(regrex) except Exception as exe: print exe print self.work_path + "/" + zip_info.filename if self.show_info: self.update_extract(total) else: print "path len > 255 ", self.work_path, zip_info.filename else: pass if self.show_info: self.finish_extract() self.zf.close() def start_extract(self): if self.progress: self.progress.start_extract(fp=self.fp, uncompress_size=self.uncompress_size) def finish_extract(self): if self.progress: self.progress.finish_extract(fp=self.fp) def update_extract(self, extract_size): if self.progress: self.progress.update_extract(extract_size=extract_size)
def _build_trek_ressources(self, trek, language, force): logger.info("Build %s ressources file for trek '%s'..." % (language, trek.properties.name)) output_folder = os.path.join(settings.INPUT_DATA_ROOT, language, 'api/trek') if not os.path.exists(output_folder): logger.info("Create folder %s" % output_folder) os.makedirs(output_folder) zipfilename = os.path.join(output_folder, 'trek-%u.zip' % trek.id) zipfile = ZipFile(zipfilename + '.new', 'w') media = set() missing_media = set() trek_dest = 'trek/{trek.pk}'.format(trek=trek) # All pictures for picture in trek.properties.pictures: media.add((picture['url'], trek_dest)) # Information desks picture for desk in trek.properties.information_desks: if desk['photo_url']: media.add((desk['photo_url'], trek_dest)) # Only one picture per POI for poi in trek.pois.all(): poi_dest = 'poi/{poi.pk}'.format(poi=poi) if poi.properties.pictures: media.add((poi.properties.pictures[0]['url'], poi_dest)) if missing_media: logger.warning('Missing media: ' + ', '.join(missing_media)) for url, dest in media: url = unquote(url).lstrip('/') fullpath = os.path.join(settings.INPUT_DATA_ROOT, url) arcname = os.path.join(dest, os.path.basename(url)) zipfile.write(fullpath, arcname) try: oldzipfile = ZipFile(zipfilename, 'r') except IOError: uptodate = False else: old = set([(zi.filename, zi.CRC) for zi in oldzipfile.infolist()]) new = set([(zi.filename, zi.CRC) for zi in zipfile.infolist()]) uptodate = (old == new) and not force oldzipfile.close() zipfile.close() if uptodate: os.unlink(zipfilename + '.new') logger.info('%s was up to date.' % zipfilename) else: os.rename(zipfilename + '.new', zipfilename) logger.info('%s done.' % zipfilename)
def read_words_from_anki_pkg(self, anki_pkg_file_path): """Reads words from an apkg file into a set arguments: anki_pkg_file_path(str): path to the anki pkg file returns: (set) of words """ pass anki_deck_db_url = '' #extract the collection database from .pkg file if not os.path.exists(anki_pkg_file_path): raise ValueError('Path to apkg file is not valid') try: zf = ZipFile(anki_pkg_file_path, 'r') #open the zipfile looking for the collection.anki2 file for info in zf.infolist(): if info.filename == 'collection.anki2': zf.extract(info.filename) anki_deck_db_url = "sqlite:///{0}".format(info.filename) except BadZipfile: raise ValueError('apkg file is corrupt or not valid') return if len(anki_deck_db_url) == 0: raise ValueError('No collection database file found in apkg') return Anki().distill_words(anki_deck_db_url)
def do(zip_file_name, include): print('zip_file_name:', zip_file_name) print('Include files:', include) # Измененный zip out_zip_file_name = '_' + zip_file_name try: print('open {} and {} zip arhives'.format(zip_file_name, out_zip_file_name)) zin = ZipFile(zip_file_name, 'r') zout = ZipFile(out_zip_file_name, 'w') print('start fill {} zip arhive'.format(out_zip_file_name)) for item in zin.infolist(): buffer = zin.read(item.filename) if any((fnmatch.fnmatch(item.filename, pattern) for pattern in include)): zout.writestr(item, buffer) else: print('Delete', item.filename) print('finish fill {} zip arhive'.format(out_zip_file_name)) finally: zout.close() zin.close() # Удаляем оригинальный print('remove original {} zip file'.format(zip_file_name)) os.remove(zip_file_name) # Переименновываем измененный zip в оригинальный print('rename {} zip file as original {}'.format(out_zip_file_name, zip_file_name)) os.rename(out_zip_file_name, zip_file_name)
class ApkParser: def __init__(self, file): self._file = ZipFile(file) def getManifest(self): return AXML(self._file.read('AndroidManifest.xml')).get_xml_obj() def getPackageName(self): return self.getManifest().documentElement.getAttribute('package') def getVersionCode(self): return int(self.getManifest().documentElement.getAttribute('android:versionCode')) def getVersionName(self): return self.getManifest().documentElement.getAttribute('android:versionName') def getMinSdkVersion(self): return int(self.getManifest().documentElement.getElementsByTagName('uses-sdk')[0].getAttribute('android:minSdkVersion')) def _getCerts(self): for info in self._file.infolist(): if info.filename.startswith('META-INF/') and info.filename.endswith('.RSA'): for cert in ContentInfo.load(self._file.read(info))['content']['certificates']: yield cert.dump() def getCert(self): certs = list(self._getCerts()) if len(certs) != 1: raise Exception('Cannot read certificate') return certs[0]
def getTranslations(type, localesDir, defaultLocale, projectName, key): result = urllib2.urlopen('http://api.crowdin.net/api/project/%s/export?key=%s' % (projectName, key)).read() if result.find('<success') < 0: raise Exception('Server indicated that the operation was not successful\n' + result) result = urllib2.urlopen('http://api.crowdin.net/api/project/%s/download/all.zip?key=%s' % (projectName, key)).read() zip = ZipFile(StringIO(result)) dirs = {} for info in zip.infolist(): if not info.filename.endswith('.json'): continue dir, file = os.path.split(info.filename) if not re.match(r'^[\w\-]+$', dir) or dir == defaultLocale: continue if type == 'chrome' and file.count('.') == 1: origFile = file else: origFile = re.sub(r'\.json$', '', file) if type == 'gecko' and not origFile.endswith('.dtd') and not origFile.endswith('.properties'): continue mapping = langMappingChrome if type == 'chrome' else langMappingGecko for key, value in mapping.iteritems(): if value == dir: dir = key if type == 'chrome': dir = dir.replace('-', '_') data = zip.open(info.filename).read() if data == '[]': continue if not dir in dirs: dirs[dir] = set() dirs[dir].add(origFile) path = os.path.join(localesDir, dir, origFile) if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) if type == 'chrome' and origFile.endswith('.json'): postprocessChromeLocale(path, data) elif type == 'chrome': data = json.loads(data) if origFile in data: fileHandle = codecs.open(path, 'wb', encoding='utf-8') fileHandle.write(data[origFile]['message']) fileHandle.close() else: fromJSON(path, data) # Remove any extra files for dir, files in dirs.iteritems(): baseDir = os.path.join(localesDir, dir) if not os.path.exists(baseDir): continue for file in os.listdir(baseDir): path = os.path.join(baseDir, file) if os.path.isfile(path) and (file.endswith('.json') or file.endswith('.properties') or file.endswith('.dtd')) and not file in files: os.remove(path)
def save_file_data(settle_date, data, temp_path, merchant_id, temp_prefix='unionpay_'): ''' @settle_date: like 1216 for generate filename @data: fileContent from request @temp_path: save data to a temp path ''' timeRandomString = datetime.now().strftime("%Y%m%d%H%M%S") path = os.path.join( temp_path, "%s%s%s" % (temp_prefix, datetime.now().year, settle_date)) if not os.path.exists(path): os.mkdir(path) fileWholePath = "%s/SMT_%s.zip" % (path, timeRandomString) with open(fileWholePath, 'wb') as f: f.write(data) logger.debug("temp file <%s> created!" % fileWholePath) zfile = ZipFile(fileWholePath, 'r') zfile.extractall(path) files_list = zfile.infolist() logger.debug("file <%s> unziped!" % ','.join(zfile.namelist())) zfile.close() logger.debug("balance file <%s> saved!" % path) os.unlink(fileWholePath) logger.debug("temp file deleted") balance_files = [] for item in files_list: if Signer.accept_filetype(item.filename, merchant_id): balance_files.append(os.path.join(path, item.filename)) return balance_files
def assertInZip(self, expected_files, filename, equal=True): """ Check if the given `expected_files` exists in the Zip archive. """ new_filename = None try: # If a stream is provided, dump it a file. ZipFile doesn't read file from a stream. if not isinstance(filename, str): f = filename filename = new_filename = tempfile.mktemp(prefix='rdiffweb_test_archiver_', suffix='.zip') with io.open(new_filename, 'wb') as out: byte = f.read(4096) while byte: out.write(byte) byte = f.read(4096) f.close() # Get data from zip. actual = {} a = ZipFile(filename) for m in a.infolist(): name = m.filename if isinstance(name, bytes): name = name.decode('utf8') actual[name] = m.file_size a.close() # Compare. if equal: self.assertEqual(expected_files, actual) else: for expected_file in expected_files: self.assertIn(expected_file, actual) finally: if new_filename: os.remove(new_filename)
def test(self): """ Check server response, file name and order hash and timestamp applying """ order_hash = uuid.uuid4().hex url = 'https://s3.eu-central-1.amazonaws.com/saxo-static/ebooks/line-vindernovelle-i-krimidysten.epub' response = client.get(reverse('add_mark')+'?url=%s&order_hash=%s' % (url, order_hash)) origin_name = url.split('/')[-1] self.assertEqual(status.HTTP_200_OK, response.status_code) self.assertIn('filename=%s' % origin_name, response.get('Content-Disposition')) zipped_file = ZipFile(StringIO(response.content), 'r') file_to_check = 'META-INF/container.xml' # check only date because can be next hour after file received timestamp = datetime.now().strftime('%Y-%m-%d') self.assertIn(file_to_check, zipped_file.namelist()) for item in zipped_file.infolist(): if item.filename == file_to_check: data = zipped_file.read(item.filename) self.assertIn(order_hash, data) self.assertIn(timestamp, data) zipped_file.close()
def _unpack(self, filepath): """Determines the vim script's extension and unpacks it. Sets the files variable. Removes the archive file and temp dir. """ import os.path root, ext = os.path.splitext(filepath) if ext == u'.zip': from zipfile import ZipFile archive = ZipFile(filepath, 'r') self.files = [member.filename for member in archive.infolist() if not member.filename[-1] == '/'] archive.extractall('/home/chris/.vim') elif ext == u'.tar' or ext == u'.tgz' or ext == u'.bz2': import tarfile archive = tarfile.open(filepath) self.files = [member.name for member in archive.getmembers() if member.isfile()] archive.extractall('/home/chris/.vim') elif (ext == u'.gz' or ext == u'.bz2') and (os.path.splitext(root)[1] == u'.tar'): import tarfile archive = tarfile.open(filepath) self.files = [member.name for member in archive.getmembers() if member.isfile()] archive.extractall('/home/chris/.vim') elif ext == u'.vba': vimExecute(':so %\n:q\n') self.files=[filepath] # Cleanup print "Deleting {0} and {1}".format(filepath, os.path.dirname(filepath)) os.unlink(filepath) os.rmdir(os.path.dirname(filepath))
def test_pydist(): """Make sure pydist.json exists and validates against our schema.""" # XXX this test may need manual cleanup of older wheels import jsonschema def open_json(filename): with open(filename, 'rb') as json_file: return json.loads(json_file.read().decode('utf-8')) pymeta_schema = open_json(resource_filename('wheel.test', 'pydist-schema.json')) valid = 0 for dist in ("simple.dist", "complex-dist"): basedir = pkg_resources.resource_filename('wheel.test', dist) for (dirname, subdirs, filenames) in os.walk(basedir): for filename in filenames: if filename.endswith('.whl'): whl = ZipFile(os.path.join(dirname, filename)) for entry in whl.infolist(): if entry.filename.endswith('/metadata.json'): pymeta = json.loads(whl.read(entry).decode('utf-8')) jsonschema.validate(pymeta, pymeta_schema) valid += 1 assert valid > 0, "No metadata.json found"
def zip_data(source, modified_files): file = StringIO() outzip = ZipFile(file, 'w') zip = ZipFile(StringIO(source)) for info in zip.infolist(): # Replace the data from the map if info.filename in modified_files: data = modified_files[info.filename] if data is None: continue else: data = zip.read(info.filename) # Section 17.4 says the mimetype file shall not include an extra # field. So we remove it even if present in the source. if info.filename == 'mimetype': info.extra = '' # Ok outzip.writestr(info, data) # Ok outzip.close() content = file.getvalue() file.close() return content
def mass_upload(request, form): # seems like default storage can't read from a zip file # built on an uploaded file. Try saving it first. import tempfile tmpfile = tempfile.TemporaryFile() tmpfile.write(request.FILES['file'].read()) tmpfile.seek(0) zipfile = ZipFile(tmpfile, mode="r") for info in zipfile.infolist(): # and it seems like we need a tmp file per file in the zip too tmpfile2 = tempfile.TemporaryFile() f = zipfile.open(info, "r") tmpfile2.write(f.read()) f.close() tmpfile2.seek(0) django_file = DjangoFile(tmpfile2) setattr(django_file, 'size', info.file_size) chorus_file = ChorusFile.objects.create(name=info.filename, user=request.user, comments=form.cleaned_data['comments'] or info.comment, timestamp=datetime.datetime.now(), purpose=form.cleaned_data['purpose'], season=form.cleaned_data['season'], voice=form.cleaned_data['voice'], size=info.file_size) chorus_file.save_uploaded_file(uploaded_file=django_file) chorus_file.save()
def unzip(filename, match_dir=False, destdir=None): """ Extract all files from a zip archive filename: The path to the zip file match_dir: If True all files in the zip must be contained in a subdirectory named after the archive file with extension removed destdir: Extract the zip into this directory, default current directory return: If match_dir is True then returns the subdirectory (including destdir), otherwise returns destdir or '.' """ if not destdir: destdir = '.' z = ZipFile(filename) unzipped = '.' if match_dir: if not filename.endswith('.zip'): raise FileException('Expected .zip file extension', filename) unzipped = os.path.basename(filename)[:-4] check_extracted_paths(z.namelist(), unzipped) else: check_extracted_paths(z.namelist()) # File permissions, see # http://stackoverflow.com/a/6297838 # http://stackoverflow.com/a/3015466 for info in z.infolist(): log.debug('Extracting %s to %s', info.filename, destdir) z.extract(info, destdir) os.chmod(os.path.join(destdir, info.filename), info.external_attr >> 16 & 4095) return os.path.join(destdir, unzipped)
def extractAll(self): logger.info('extracting now.') # Open database and check existing files. optimizer = ExtractionOptimizer(self.optimizeFile) try: optimizer.scanDir(self.unpackDir) zipFile = ZipFile(self.zip, 'r') try: # Register new files. for zipInfo in zipFile.infolist(): if Extractor.__isFile(zipInfo): fileInfo = FileInfo.fromZipInfo(zipInfo, 1) optimizer.registerFile(fileInfo) # Update file storage. extractor = RawExtractor(self.unpackDir, zipFile) for op in optimizer.operations(): extractor.extract(op) except IOError: return Extractor.RESULT_ERROR finally: zipFile.close() finally: optimizer.close() logger.info('extract completed.') return Extractor.RESULT_EXTRACTED
def _download_biogrid_data(url): """Downloads zipped, tab-separated Biogrid data in .tab2 format. Parameters: ----------- url : str URL of the BioGrid zip file. Returns ------- csv.reader A csv.reader object for iterating over the rows (header has already been skipped). """ res = requests.get(biogrid_file_url) if res.status_code != 200: raise Exception('Unable to download Biogrid data: status code %s' % res.status_code) zip_bytes = BytesIO(res.content) zip_file = ZipFile(zip_bytes) zip_info_list = zip_file.infolist() # There should be only one file in this zip archive if len(zip_info_list) != 1: raise Exception('There should be exactly zipfile in BioGrid zip ' 'archive: %s' % str(zip_info_list)) unzipped_bytes = zip_file.read(zip_info_list[0]) # Unzip the file biogrid_str = StringIO(unzipped_bytes.decode('utf8')) # Make file-like obj csv_reader = csv.reader(biogrid_str, delimiter='\t') # Get csv reader next(csv_reader) # Skip the header return csv_reader
def springer(): DIR = 'JHEP/' EXT = ('.xml.Meta', '.xml.scoap') BASE_DIR = '/eos/project/s/scoap3repo/BETA/harvesting/Springer/download/' + DIR zip_list = listdir(BASE_DIR) needed_dois = json.loads(open('/tmp/repo_diff_result2', 'r').read())['only_in_old'] extracted_dois = {} for file in zip_list: full_path = BASE_DIR + file if isfile(full_path) and full_path.endswith('.zip'): try: zip = ZipFile(full_path) for zip_element in zip.infolist(): fn = zip_element.filename if fn.endswith(EXT): xml = parseString(zip.read(zip_element)) doi = xml.getElementsByTagName('ArticleDOI')[0].firstChild.nodeValue if doi in needed_dois: if full_path not in extracted_dois: extracted_dois[full_path] = [] extracted_dois[full_path].append(doi) except BadZipfile as e: error('file %s: %s' % (file, e)) info('%s' % json.dumps(extracted_dois, indent=2))
def iterate_runs_points(runs): ''' Iterate over all the points, skipping share-alike sources. ''' for result in iterate_local_processed_files(runs, sort_on='source_path'): if result.run_state.share_alike == 'true': continue _L.info('Indexing points from {}'.format(result.source_base)) _L.debug('filename: {}'.format(result.filename)) _L.debug('run_state: {}'.format(result.run_state)) _L.debug('code_version: {}'.format(result.code_version)) with open(result.filename, 'rb') as file: result_zip = ZipFile(file) csv_infos = [zipinfo for zipinfo in result_zip.infolist() if splitext(zipinfo.filename)[1] == '.csv'] if not csv_infos: break zipped_file = result_zip.open(csv_infos[0].filename) point_rows = DictReader(TextIOWrapper(zipped_file)) for row in point_rows: try: lat, lon = float(row['LAT']), float(row['LON']) except ValueError: # Skip this point if the lat/lon don't parse continue # Include this point if it's on Earth if -180 <= lon <= 180 and -90 <= lat <= 90: yield Point(lon, lat, result, row)
def preloadFont(cls, font, directory=DEFAULT_DIR): """ Load font file into memory. This can be overriden with a superclass to create different font sources. """ fontPath = os.path.join(directory, font + ".flf") if not os.path.exists(fontPath): fontPath = os.path.join(directory, font + ".tlf") if not os.path.exists(fontPath): raise pyfiglet.FontNotFound("%s doesn't exist" % font) if is_zipfile(fontPath): z = None try: z = ZipFile(fontPath, "r") data = z.read(z.getinfo(z.infolist()[0].filename)) z.close() return data.decode("utf-8", "replace") if ST3 else data except Exception as e: if z is not None: z.close() raise pyfiglet.FontError("couldn't read %s: %s" % (fontPath, e)) else: try: with open(fontPath, "rb") as f: data = f.read() return data.decode("utf-8", "replace") if ST3 else data except Exception as e: raise pyfiglet.FontError("couldn't open %s: %s" % (fontPath, e)) raise pyfiglet.FontNotFound(font)
def parse_zipfile_metadata(uploaded_file): """ Given a file, extract out the metadata.json, parse, and return it. """ try: zipfile = ZipFile(uploaded_file, 'r') except (BadZipfile, zlib.error): raise InvalidExtensionData("Invalid zip file") if zipfile.testzip() is not None: raise InvalidExtensionData("Invalid zip file") total_uncompressed = sum(i.file_size for i in zipfile.infolist()) if total_uncompressed > 5*1024*1024: # 5 MB raise InvalidExtensionData("Zip file is too large") try: metadata = json.load(zipfile.open('metadata.json', 'r')) except KeyError: # no metadata.json in archive, raise error raise InvalidExtensionData("Missing metadata.json") except ValueError: # invalid JSON file, raise error raise InvalidExtensionData("Invalid JSON data") zipfile.close() return metadata
def parseZip( fn ): date_time = '' members = dict() removemembers = False zipfile = ZipFile( fn ) cache.invalidate(recordlist.output, 'list_output', ) files_of_interest = ['infolog.txt','ext.txt','platform.txt','script.txt','settings.txt','unitsync.log','client.txt','information.txt','demo.sdf'] for info in zipfile.infolist(): if info.filename in files_of_interest and info.file_size < 5 * 1024 * 1024: members[info.filename] = zipfile.read( info.filename ) if info.filename == 'infolog.txt': date_time = info.date_time else: removemembers = True if removemembers: newzipfile = ZipFile (fn + '.new', 'w') tmpfilename = '/tmp/' + os.path.basename (fn) + '.tmp' for file in members.keys (): tmpfile = open (tmpfilename, 'w') tmpfile.write (zipfile.read (file)) tmpfile.close () newzipfile.write (tmpfilename, file) os.remove (tmpfilename) newzipfile.close () zipfile.close () os.rename (fn, fn + '.orig') os.rename (fn + '.new', fn) else: zipfile.close () return db.parseZipMembers( fn, members, date_time )
def getTranslations(localesDir, defaultLocale, projectName, key): result = urllib2.urlopen('http://api.crowdin.net/api/project/%s/export?key=%s' % (projectName, key)).read() if result.find('<success') < 0: raise Exception('Server indicated that the operation was not successful\n' + result) result = urllib2.urlopen('http://api.crowdin.net/api/project/%s/download/all.zip?key=%s' % (projectName, key)).read() zip = ZipFile(StringIO(result)) dirs = {} for info in zip.infolist(): if not info.filename.endswith('.dtd.json') and not info.filename.endswith('.properties.json'): continue dir, file = os.path.split(info.filename) origFile = re.sub(r'\.json$', '', file) if not re.match(r'^[\w\-]+$', dir) or dir == defaultLocale: continue if not dir in dirs: dirs[dir] = set() dirs[dir].add(origFile) data = zip.open(info.filename).read() fromJSON(os.path.join(localesDir, dir, origFile), data) # Remove any extra files for dir, files in dirs.iteritems(): baseDir = os.path.join(localesDir, dir) if not os.path.exists(baseDir): continue for file in os.listdir(baseDir): path = os.path.join(baseDir, file) if os.path.isfile(path) and (file.endswith('.properties') or file.endswith('.dtd')) and not file in files: os.remove(path)
def update(restart=True, test=False): worker_dir = os.path.dirname(os.path.realpath(__file__)) update_dir = os.path.join(worker_dir, 'update') if not os.path.exists(update_dir): os.makedirs(update_dir) worker_zip = os.path.join(update_dir, 'wk.zip') with open(worker_zip, 'wb+') as f: f.write(requests.get(WORKER_URL).content) zip_file = ZipFile(worker_zip) zip_file.extractall(update_dir) zip_file.close() prefix = os.path.commonprefix([n.filename for n in zip_file.infolist()]) fishtest_src = os.path.join(update_dir, prefix) fishtest_dir = os.path.dirname(worker_dir) # fishtest_dir is assumed to be parent of worker_dir if not test: copy_tree(fishtest_src, fishtest_dir) else: file_list = os.listdir(fishtest_src) shutil.rmtree(update_dir) print("start_dir: " + start_dir) if restart: do_restart() if test: return file_list
def dump_files(zipfile: ZipFile): """Dump packed files to a location. """ dump_folder = CONF['packfile_dump', ''] if not dump_folder: return dump_folder = os.path.abspath(dump_folder) # Delete files in the folder, but don't delete the folder itself. try: files = os.listdir(dump_folder) except FileNotFoundError: return for name in files: name = os.path.join(dump_folder, name) if os.path.isdir(name): try: shutil.rmtree(name) except OSError: # It's possible to fail here, if the window is open elsewhere. # If so, just skip removal and fill the folder. pass else: os.remove(name) for zipinfo in zipfile.infolist(): zipfile.extract(zipinfo, dump_folder)
def main(opts): for filename in opts.files: zip_file = ZipFile(filename) info_list = zip_file.infolist() for entry in info_list: entry.filename = decode_filename(entry.filename, opts.codepage) zip_file.extract(entry, path=opts.dest)
def openZip(cls, fo): z = ZipFile(fo) zfiles = z.infolist() if len(zfiles) == 1: return z.open(zfiles[0].filename) raise NotAFBZException()
def extractAll(self): call(self.progress, 'begin_extract') # Open database and check existing files. optimizer = ExtractionOptimizer(self.optimizeFile) optimizer.scanDir(self.unpackDir) # Open and read zip file. zipFile = ZipFile(self.zip, 'r') success = True try: # Register new files. for zipInfo in zipFile.infolist(): if Extractor.__isFile(zipInfo): fileInfo = FileInfo.fromZipInfo(zipInfo, 1) optimizer.registerFile(fileInfo) # Update files. extractor = RawExtractor(self.unpackDir, zipFile) for op in optimizer.operations(): success &= extractor.extract(op) call(self.progress, 'do_extract', optimizer.currentIndex, optimizer.maxSize) # Commit new fileset. if success: optimizer.commit() finally: zipFile.close() call(self.progress, 'end_extract') return success
def extract_subfiles(source_path, dest_path, verbose=False): if os.path.isdir(source_path): for dirpath, dirnames, filenames in os.walk(source_path): relpath = os.path.relpath(dirpath, source_path) new_dir_path = os.path.join(dest_path, relpath) if not os.path.isdir(new_dir_path): os.mkdir(new_dir_path) for filename in filenames: try: source_file_path = os.path.join(dirpath, filename) relpath = os.path.relpath(source_file_path, source_path) dest_file_path = os.path.join(dest_path, relpath) print dest_file_path if dest_file_path.endswith('.cod'): zip = ZipFile(source_file_path) for info in zip.infolist(): if verbose: print ' %s (extracted)' % info.filename dest_unzip_path = os.path.split(dest_file_path)[0] if not os.path.realpath(os.path.join(dest_unzip_path, info.filename)).startswith(os.path.realpath(dest_unzip_path)): raise(Exception('Security exception: zip file %s attempted to extract to a non-local location' % info.filename)) zip.extract(info, path = dest_unzip_path) else: shutil.copyfile(source_file_path, dest_file_path) except Exception, e: if str(e) == 'File is not a zip file': # this is a cod file or some other file shutil.copyfile(source_file_path, dest_file_path) else: if verbose: print >>sys.stderr, 'Error:', print >>sys.stderr, str(e) raise(e)
def parse(self, response): if response.status == 200: if response.request.meta['type'] == 'meta': data = json.loads(response.body_as_unicode()) for resource in data['result']['resources']: if resource['format'].upper() == 'JSON': yield scrapy.Request( url=resource['url'], meta={'type': 'data'} ) else: zip_file = ZipFile(BytesIO(response.body)) for finfo in zip_file.infolist(): data = zip_file.open(finfo.filename).read() yield self.save_data_to_disk(data, finfo.filename, data_type='release_package', url=response.request.url) else: yield { 'success': False, 'file_name': hashlib.md5(response.request.url.encode('utf-8')).hexdigest() + '.json', 'url': response.request.url, 'errors': {'http_code': response.status} }
def get_text_docs(corpus_zipfile: ZipFile) -> ImmutableDict[str, str]: print(f"Reading .ltf documents in {corpus_zipfile.filename}") prefix = get_root_dir_name(corpus_zipfile) or "" parent_children_path = _find_name_in_zip( corpus_zipfile, re.compile(f"{prefix}docs/parent_children.tab")) if not parent_children_path: raise RuntimeError("Archive lacks parent_children.tab") parent_children_tab = _read_tab_file( CharSource.from_file_in_zip(corpus_zipfile, parent_children_path)) child_to_parent_map = _create_child_to_parent_map(parent_children_tab) text_docs = {} text_dir = ZipPath(corpus_zipfile, at="data/ltf/") for source_doc_path in text_dir.iterdir(): source_doc_zip = ZipFile(io.BytesIO(source_doc_path.read_bytes())) for source_info in tqdm( source_doc_zip.infolist(), desc=f"Extracting {source_doc_path.name}", bar_format="{l_bar}{bar:20}{r_bar}", ): doc = ZipPath(source_doc_zip, at=source_info.filename) try: doceid = doc.name.split(".")[0] doc_id = child_to_parent_map[doceid] text_docs[doc_id] = convert_ltf_to_raw_text( doc.read_text(encoding="utf-8")) except AttributeError: raise FileNotFoundError(f"Could not read from {doc}.") return immutabledict(text_docs)
def push_libs(self): if self.options.local_apk: with mozfile.TemporaryDirectory() as tmpdir: apk_contents = ZipFile(self.options.local_apk) szip = os.path.join(self.options.local_bin, '..', 'host', 'bin', 'szip') if not os.path.exists(szip): # Tinderbox builds must run szip from the test package szip = os.path.join(self.options.local_bin, 'host', 'szip') if not os.path.exists(szip): # If the test package doesn't contain szip, it means files # are not szipped in the test package. szip = None for info in apk_contents.infolist(): if info.filename.endswith(".so"): print >> sys.stderr, "Pushing %s.." % info.filename remote_file = posixpath.join(self.remote_bin_dir, os.path.basename(info.filename)) apk_contents.extract(info, tmpdir) file = os.path.join(tmpdir, info.filename) if szip: out = subprocess.check_output([szip, '-d', file], stderr=subprocess.STDOUT) self.device.pushFile(os.path.join(tmpdir, info.filename), remote_file) return elif self.options.local_lib: for file in os.listdir(self.options.local_lib): if file.endswith(".so"): print >> sys.stderr, "Pushing %s.." % file remote_file = posixpath.join(self.remote_bin_dir, file) self.device.pushFile(os.path.join(self.options.local_lib, file), remote_file) # Additional libraries may be found in a sub-directory such as "lib/armeabi-v7a" local_arm_lib = os.path.join(self.options.local_lib, "lib") if os.path.isdir(local_arm_lib): for root, dirs, files in os.walk(local_arm_lib): for file in files: if (file.endswith(".so")): remote_file = posixpath.join(self.remote_bin_dir, file) self.device.pushFile(os.path.join(root, file), remote_file)
def push_libs(self): if self.options.local_apk: with mozfile.TemporaryDirectory() as tmpdir: apk_contents = ZipFile(self.options.local_apk) for info in apk_contents.infolist(): if info.filename.endswith(".so"): print >> sys.stderr, "Pushing %s.." % info.filename remote_file = posixpath.join(self.remote_bin_dir, os.path.basename(info.filename)) apk_contents.extract(info, tmpdir) local_file = os.path.join(tmpdir, info.filename) with open(local_file) as f: # Decompress xz-compressed file. if f.read(5)[1:] == '7zXZ': cmd = ['xz', '-df', '--suffix', '.so', local_file] subprocess.check_output(cmd) # xz strips the ".so" file suffix. os.rename(local_file[:-3], local_file) self.device.pushFile(local_file, remote_file) elif self.options.local_lib: for file in os.listdir(self.options.local_lib): if file.endswith(".so"): print >> sys.stderr, "Pushing %s.." % file remote_file = posixpath.join(self.remote_bin_dir, file) local_file = os.path.join(self.options.local_lib, file) self.device.pushFile(local_file, remote_file) # Additional libraries may be found in a sub-directory such as "lib/armeabi-v7a" for subdir in ["assets", "lib"]: local_arm_lib = os.path.join(self.options.local_lib, subdir) if os.path.isdir(local_arm_lib): for root, dirs, files in os.walk(local_arm_lib): for file in files: if (file.endswith(".so")): print >> sys.stderr, "Pushing %s.." % file remote_file = posixpath.join(self.remote_bin_dir, file) local_file = os.path.join(root, file) self.device.pushFile(local_file, remote_file)
def do(zip_file_name, include): print('zip_file_name:', zip_file_name) print('Include files:', include) # Измененный zip out_zip_file_name = '_' + zip_file_name try: print('open {} and {} zip arhives'.format(zip_file_name, out_zip_file_name)) zin = ZipFile(zip_file_name, 'r') zout = ZipFile(out_zip_file_name, 'w') print('start fill {} zip arhive'.format(out_zip_file_name)) for item in zin.infolist(): buffer = zin.read(item.filename) if any((fnmatch.fnmatch(item.filename, pattern) for pattern in include)): zout.writestr(item, buffer) else: print('Delete', item.filename) print('finish fill {} zip arhive'.format(out_zip_file_name)) finally: zout.close() zin.close() # Удаляем оригинальный print('remove original {} zip file'.format(zip_file_name)) os.remove(zip_file_name) # Переименновываем измененный zip в оригинальный print('rename {} zip file as original {}'.format( out_zip_file_name, zip_file_name)) os.rename(out_zip_file_name, zip_file_name)
def save_file_data(settle_date, data, temp_path, merchant_id, temp_prefix='unionpay_'): ''' @settle_date: like 1216 for generate filename @data: fileContent from request @temp_path: save data to a temp path ''' timeRandomString = datetime.now().strftime("%Y%m%d%H%M%S") path = os.path.join( temp_path, "%s%s%s" % (temp_prefix, datetime.now().year, settle_date)) if not os.path.exists(path): os.mkdir(path) fileWholePath = "%s/SMT_%s.zip" % (path, timeRandomString) with open(fileWholePath, 'wb') as f: f.write(data) logger.debug("temp file <%s> createdï¼" % fileWholePath) zfile = ZipFile(fileWholePath, 'r') zfile.extractall(path) files_list = zfile.infolist() logger.debug("file <%s> unzipedï¼" % ','.join(zfile.namelist())) zfile.close() logger.debug("balance file <%s> saved!" % path) os.unlink(fileWholePath) logger.debug("temp file deleted") balance_files = [] for item in files_list: if Signer.accept_filetype(item.filename, merchant_id): balance_files.append(os.path.join(path, item.filename)) return balance_files
def _extract_zip(archivefile: zipfile.ZipFile, name: str, rep: str) -> bool: """Extracts a zip file""" mkdir(rep, mode=0o711) try: for member in archivefile.infolist(): # zipfile member names are sanitized archivefile.extract(member, rep) member_location = joinpath(rep, member.filename) # python has no option to use umask while extracting, so… if isdir(member_location): chmod(member_location, 0o711) else: chmod(member_location, 0o644) except: # extraction failed, remove leftover files import traceback log.info('Extraction of %s failed, falling back to single-file upload', name, exc_info=True) rmtree(rep) return False else: remove(name) log.info('Successfully extracted zipfile %s into %s', name, rep) return True
def find_single_file_in_zip(zf: zipfile.ZipFile, path: str) -> typing.Optional[zipfile.ZipInfo]: infos = zf.infolist() if not infos: print( f"Warning: ZIP data for path {path!r} doesn't contain any files.", file=sys.stderr) return None elif len(infos) > 1: print( f"Warning: ZIP data for path {path!r} contains more than one file.", file=sys.stderr) return None (info, ) = infos split_path = path.split("\\") if info.filename != split_path[-1]: print( f"Warning: ZIP data for path {path!r} contains a differently named file {info.filename!r}.", file=sys.stderr) return None return info
def close_zip(self, zipfile, name): oldzipfilename = os.path.join(self.dst_root, name) zipfilename = os.path.join(self.tmp_root, name) try: oldzipfile = ZipFile(oldzipfilename, 'r') except IOError: uptodate = False else: old = set([(zi.filename, zi.CRC) for zi in oldzipfile.infolist()]) new = set([(zi.filename, zi.CRC) for zi in zipfile.infolist()]) uptodate = (old == new) oldzipfile.close() zipfile.close() if uptodate: stat = os.stat(oldzipfilename) os.utime(zipfilename, (stat.st_atime, stat.st_mtime)) if self.verbosity == '2': if uptodate: self.stdout.write(u"\x1b[3D\x1b[32munchanged\x1b[0m") else: self.stdout.write(u"\x1b[3D\x1b[32mzipped\x1b[0m")
def clean(self, zip_filename): temp_filename = zip_filename + self.TEMP_EXTENSION removed = False zip_in = ZipFile(zip_filename) zip_out = ZipFile(temp_filename, 'w') for item in zip_in.infolist(): is_file = not item.is_dir() if is_file and item.filename != self.remove_filename: buffer = zip_in.read(item.filename) zip_out.writestr(item.filename, buffer) elif is_file: removed = True zip_out.close() zip_in.close() if removed: os.remove(zip_filename) os.rename(temp_filename, zip_filename) else: os.remove(temp_filename) return removed
def _unzip_file(self, file_): results = [] if not zipfile.is_zipfile(file_): return results try: zf = ZipFile(file_, 'r') filelist = zf.infolist() for fileentry in filelist: unzipped_file = zf.open(fileentry, pwd=self._PASSWORD).read() results.append({ 'filename': fileentry.filename, 'data': unzipped_file }) except Exception as e: zf.close() file_.close() raise zf.close() file_.close() return results
def load(self) -> None: if path.isdir(self.zip_or_dir_path): print( f'Directories are not supported by {self.__class__.__name__}') return None zip_file = ZipFile(self.zip_or_dir_path) files_by_loader = dict() for file_info in zip_file.infolist(): if file_info.filename.endswith('/'): continue for prefix, LoaderClass in DictionaryLoader.dictionary_loader_by_file_prefix.items( ): if file_info.filename.startswith(prefix): files = files_by_loader.get(LoaderClass) if not files: files = list() files_by_loader[LoaderClass] = files files.append(file_info) break for LoaderClass, files in files_by_loader.items(): loader = LoaderClass(zip_file=zip_file, files=files) loader.load_once()
def _crc32(self, path): if self._infodict: return self._infodict[path] try: archive = ZipFile(self.src) except BadZipfile: e = get_exception() if e.args[0].lower().startswith('bad magic number'): # Python2.4 can't handle zipfiles with > 64K files. Try using # /usr/bin/unzip instead self._legacy_file_list() else: raise else: try: for item in archive.infolist(): self._infodict[item.filename] = long(item.CRC) except: archive.close() raise UnarchiveError('Unable to list files in the archive') return self._infodict[path]
async def download_and_extract_ppt( ppt_zip_link: str, index: int, download_basepath: Optional[Path] = None) -> None: if download_basepath is None: download_basepath = Path(FLAGS.download_basedir) log.info(f"processing {ppt_zip_link}") t = urlparse(ppt_zip_link) assert t.path.endswith("hymn-{index:03}.zip") ppt_zip_path = download_basepath / Path(t.path).name if ppt_zip_path.exists(): log.info(f"{ppt_zip_path} exists. use it as cache.") with ppt_zip_path.open("rb") as f: content = f.read() else: ppt_missing_path = download_basepath / f"{Path(t.path).name}.missing" if ppt_missing_path.exists(): log.warn(f"{ppt_missing_path} exists. stop.") return status, content = await fetch(ppt_zip_link) if status == 200: with ppt_zip_path.open("wb") as out: out.write(content) elif status in (404, 503): log.warn(f"write {ppt_missing_path}. stop.") ppt_missing_path.open("wb").close() return else: log.error(f"status={status}") return log.info(f"extract {ppt_zip_path}") zf = ZipFile(BytesIO(content)) infolist = zf.infolist() assert len(infolist) == 1
def handle(self, *args, **options): # maybe this should be replaced with a CLI options: do_upload = os.environ.get('AGREEMENTS_S3_UPLOAD_ENABLED', False) Agreement.objects.all().delete() Issuer.objects.all().delete() agreements_zip = ZipFile(options['path']) # Zip files default to IBM Code Page 437 encoding unless a specific bit # is set. See Appendix D in the zip file spec: # https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT all_pdfs = [ info.filename if (info.flag_bits & 0x800) == 0 else force_str(info.filename, 'cp437') for info in agreements_zip.infolist() if info.filename.upper().endswith('.PDF') ] blanks = empty_folder_test(agreements_zip, all_pdfs) if blanks: error_msg = ("Processing error: Blank folders were found " "in the source zip file:\n{}".format(", ".join( [folder for folder in blanks]))) raise CommandError(error_msg) if options['verbosity'] >= 1: output_file = self.stdout else: output_file = open(os.devnull, 'a') for pdf_path in all_pdfs: _util.save_agreement(agreements_zip, pdf_path, output_file, upload=do_upload)
def process_zip_file(path_zip_file, filename, formato): global total_registros_procesados, total_registros_insertados, total_registros_excluidos, msg_error_column, good_files, bad_files, duplicados formato_excel = set_formato_excel(formato) archivo_zip = ZipFile(path_zip_file, 'r') content_of_zip = archivo_zip.infolist() good_files = [] bad_files = [] duplicados = [] extension = (".xls", ".xlsx") for s in content_of_zip: duplicados = [] if s.filename.endswith(extension): print(s.filename) try: df = pd.read_excel(archivo_zip.open(s.filename, 'r'), converters=formato_excel) process_df = df[df.FECHA.notnull()] df_final = process_df.fillna(0) reg_procesados, reg_insertados, reg_excluidos = save_registers_in_database( df_final, s.filename, formato, duplicados) good_files.append({ 'filename': s.filename, 'status': status_indiv_file, 'registros_procesados': reg_procesados, 'registros_insertados': reg_insertados, 'registros_excluidos': reg_excluidos, 'registros_duplicados_detalle': duplicados }) total_registros_procesados += reg_procesados total_registros_insertados += reg_insertados total_registros_excluidos += reg_excluidos except AttributeError as e: indice = str(e).find('attribute') error = msg_error_column + str(e)[indice + 9:] bad_files.append({'file': s.filename, 'problema': error}) save_file_upload_error(s.filename, error)
def load_swig(): if not is_windows: return if which('swig'): return if not os.path.exists(SWIG_LOCAL_FILENAME): print('[+] Downloading file from %s' % SIWG_ZIP_URL) response = urlopen(SIWG_ZIP_URL) data = response.read() with open(SWIG_LOCAL_FILENAME, 'wb') as fp: fp.write(data) else: print('[+] Use %s from local' % SWIG_LOCAL_FILENAME) with open(SWIG_LOCAL_FILENAME, 'rb') as fp: data = fp.read() print('[*] Check if file hash match %s' % SWIG_ZIP_HASH) assert sha256(data).hexdigest().lower() == SWIG_ZIP_HASH print('[*] Read zip file') zfile = ZipFile(BytesIO(data)) pathname = zfile.infolist()[0].filename if os.path.exists(pathname): print('[+] Zip file already extracted') else: print('[+] Extracting files') zfile.extractall('.') path = os.getenv('PATH') swig_path = os.path.join(os.path.abspath('.'), 'swigwin-4.0.1') new_path = swig_path + os.path.pathsep + path os.putenv('PATH', new_path) print('New $PATH:') pprint(new_path.split(os.path.pathsep))
def _update_version1(self, filepath): logging.debug('Updating from "version 1"') oldzip = ZipFile(filepath, 'r') newzip = ZipFile(filepath + ".new", 'w') # Update stats.cfg config = ConfigParser() config.read(oldzip.open('stats.cfg', 'r')) limit = config.stats.convergence_limit del config.stats.convergence_limit config.stats.convergor = '<CompositionConvergor(limit=%s)>' % limit fp = StringIO() config.write(fp) newzip.writestr('stats.cfg', fp.getvalue()) # Add other files to new zip for zipinfo in oldzip.infolist(): if zipinfo.filename == 'stats.cfg': continue data = oldzip.read(zipinfo) newzip.writestr(zipinfo, data) # Add version newzip.comment = 'version=%s' % VERSION oldzip.close() newzip.close() # Remove old zip and replace with new one os.remove(filepath) os.rename(filepath + ".new", filepath) return filepath
def compare_site_backups(self, backup1, backup2): """ Compares 2 site backups Compares critical site backup files and ensures they are the same: - site database - site floorplan Variables *backup1* - first site backup file *backup2* - second site backup file .. code:: robotframework *** Test Cases *** Sample Compare site backups .//artifacts//site_backup1.zip .//artifacts//site_backup2.zip """ checklist = list() checklist.append('.sqlite') checklist.append('.egf.gz') assert os.path.exists(backup1), ImportError( 'Unable to find file {0}'.format(backup1)) assert os.path.exists(backup2), ImportError( 'Unable to find file {0}'.format(backup2)) _backup1 = ZipFile(backup1) _backup2 = ZipFile(backup2) for item in _backup1.infolist(): for check in checklist: if check in item.filename: assert item.file_size == _backup2.getinfo(item.filename).file_size, \ AssertionError('File {0} size does not match!'.format(item.filename))
def remove_meta_inf(p_apk): if isinstance(p_apk, str): raise Exception("p_apk is not str type") zip = ZipFile(p_apk) for l in zip.namelist(): if l.startswith('META-INF/'): is_signed = True break else: is_signed = False if is_signed: unsinged_apk_path = str(p_apk.resolve())[:-4] + "-unsigned.apk" zout = ZipFile(unsinged_apk_path, 'w') for item in zip.infolist(): buffer = zip.read(item.filename) if not item.filename.startswith('META-INF/'): zout.writestr(item, buffer) zout.close() zip.close() return Path(unsinged_apk_path) return p_apk
def clean_xml_in_zip(zip_name): # construct a list (name, data) for all XML files in the ZIP xml_list = [] xml_names = [] zf = ZipFile(zip_name, 'r') zl = zf.infolist() word_re = re.compile( ur'<w:lang( w:[a-zA-Z]{1,16}="[a-zA-Z\-]{1,10}"){1,5}/>', re.UNICODE) ppt_re = re.compile(ur' lang="[a-zA-Z\-]{1,10}"', re.UNICODE) for x in zl: if x.filename[-4:] == '.xml': xml_names.append(x.filename) xml = zf.open(x, 'rU').read().decode('utf-8') # MS Word: completely remove tags with language codes xml = word_re.sub('', xml) # MS PowerPoint: strip language attribute from tags with language codes xml = ppt_re.sub('', xml) xml_list.append((x.filename, xml.encode('utf-8'))) zf.close() # create a temporary file tmp_fd, tmp_name = mkstemp(dir=os.path.dirname(zip_name)) os.close(tmp_fd) # create a copy of the archive without XML files with ZipFile(zip_name, 'r') as src_zip: with ZipFile(tmp_name, 'w') as dst_zip: dst_zip.comment = src_zip.comment # preserve the comment (if any) for item in src_zip.infolist(): if not (item.filename in xml_names): dst_zip.writestr(item, src_zip.read(item.filename)) # replace the original with the temporary archive os.remove(zip_name) os.rename(tmp_name, zip_name) # add XML files with their new data with ZipFile(zip_name, mode='a', compression=ZIP_DEFLATED) as zf: for x in xml_list: zf.writestr(x[0], x[1])
def extract_files(input_file, output_folder): '''Extract files embedded in given MS Office document.''' # This code was taken from: # https://bitdrop.st0w.com/2010/07/23/python-extracting-a-file-from-a-zip-file-with-a-different-name/ # It allows to extract a file and rename it at the same time if not zipfile.is_zipfile(input_file): print(bcolors.WARNING + "\n{} not a zipfile.".format(input_file)) print("Please remove this file from watchfolder.") input("Press ENTER to continue:" + bcolors.ENDC) return 1 zipdata = ZipFile(input_file) zipinfos = zipdata.infolist() filecount = 0 # Counte for the number of extracted files print("\nStart extraction of embedded files...") for zipinfo in zipinfos: # Check if file in dedicated folder or of requested filetype if re.search( "(embeddings|media).+(bin|jpg|jpeg|doc|docx|xls|xlsx|ppt|pptx|pdf)", zipinfo.filename): # Rename files with extension .bin to .pdf zipinfo.filename = re.sub('\.bin$', '.pdf', zipinfo.filename) # Extract the file zipdata.extract(zipinfo, output_folder) filecount += 1 print("\t=>{}".format(zipinfo.filename)) if not filecount: print("\tNo embedded files found to extract.") print("Extracted {} of {} files.".format(filecount, len(zipinfos))) return 0
def _parse(self, f, options={}): print "Parsing ZIP" return [] information = [] zipfile = ZipFile(f) # try: for x in zipfile.infolist(): # Add File to index data = self.createData("strings", "FILE", filename=x.filename) data["CRC"] = x.CRC information.append(data) # Process File parser = P.instance().get_parser(x.filename) if parser is None: continue try: before = datetime.datetime.now() results = parser.parse(zipfile.open(x.filename), parent=self.filename_w) after = datetime.datetime.now() if (after - before) > datetime.timedelta(seconds=1): print "Parsed %s in %s" % (x.filename, str(after - before)) if results is not None: information += results except Exception as e: print "Cannot process %s" % x.filename print e return information
def unzip(zfilename): '''Unzip a zip archive to the working directory. Place that archive in working dir if it is a root archive. Else, place in a subdirectory. Arguments --------- zfilename : string, name of zip file. Must contain the .zip extension. ''' assert type(zfilename) is str, "zfilename is not a string: %r" % zfilename assert zfilename[-3:] == 'zip', "zfilename is not a .zip: %r" % zfilename try: zdir = zfilename.split('.')[-2] file = ZipFile(zfilename, "r") except: print("no such file or unable to open " + zfilename) #pull out the filenames from the archive filez = [] for info in file.infolist(): filez.append(info.filename) #if all files are in subdirectories, unzip to current dir if all(["/" in s for s in filez]): with ZipFile(zfilename) as myzip: myzip.extractall(".") print("successfully unzipped to current directory") #else, at least one file located at root of archive; unzip to dir else: with ZipFile(zfilename) as myzip: myzip.extractall(zdir) print("successfully unzipped to " + zdir + "/")
def getData(): dfs = [] years = [] for zip_filename in zips: zip_file = ZipFile(zip_filename) new = { text_file.filename: pd.read_csv( zip_file.open(text_file.filename), header=8, sep=";", quotechar='"', encoding="ISO-8859-1", ) for text_file in zip_file.infolist() if text_file.filename.upper().endswith(".CSV") and search.upper().strip() in Path( text_file.filename.upper()).stem.strip() } year = Path(zip_filename).stem if len(new) == 0: continue _, new = list(new.items())[0] try: new["time"] = new["Data"].apply( lambda x: datetime.datetime.strptime(x, "%Y/%m/%d")) except KeyError: new["time"] = new["DATA (YYYY-MM-DD)"].apply( lambda x: datetime.datetime.strptime( str(datetime.datetime.strptime(x, "%Y-%m-%d"))[:-9], "%Y-%m-%d")) new[column] = new[column].str.replace("-9999", "0") new[column] = new[column].str.replace(",", ".").astype(float) dfs += [new] years.append(year) return years, dfs
def download(url, destination, compression=None): try: os.makedirs(destination) except: pass print('Downloading', url) data = urlopen(url).read() if compression == 'zip': data = ZipFile(StringIO(data)) for f in data.infolist(): data.extract(f, path=destination) else: filename = os.path.split(url)[-1] if compression == 'gz': filename = os.path.splitext(filename)[0] data = zlib.decompress(data, 16+zlib.MAX_WBITS) with open(os.path.join(destination, filename), 'wb') as f: f.write(data)
def convert_doc(document_file): '''Given a filename or a file object of a ODT file (a zip file really) returns a converted file object''' file_in = ZipFile(document_file) styles = etree.parse(file_in.open('styles.xml')) content = etree.parse(file_in.open('content.xml')) style_mapping = {} convert_styles(styles, style_mapping) convert_styles(content, style_mapping) convert_content(content, style_mapping) # build a new odt file in memory fp = BytesIO() file_out = ZipFile(fp, mode='w', compression=file_in.compression) for zinfo in file_in.infolist(): name = zinfo.filename if name not in ('styles.xml', 'content.xml'): file_out.writestr(zinfo, file_in.read(zinfo)) file_out.writestr('styles.xml', etree.tostring(styles, encoding='utf8')) file_out.writestr('content.xml', etree.tostring(content, encoding='utf8')) file_out.close() return fp
def assertInZip(self, expected_files, filename, equal=True): """ Check if the given `expected_files` exists in the Zip archive. """ new_filename = None try: # If a stream is provided, dump it a file. ZipFile doesn't read file from a stream. if not isinstance(filename, str): f = filename filename = new_filename = tempfile.mktemp( prefix='rdiffweb_test_archiver_', suffix='.zip') with io.open(new_filename, 'wb') as out: byte = f.read(4096) while byte: out.write(byte) byte = f.read(4096) f.close() # Get data from zip. actual = {} a = ZipFile(filename) for m in a.infolist(): name = m.filename if isinstance(name, bytes): name = name.decode('utf8') actual[name] = m.file_size a.close() # Compare. if equal: self.assertEqual(expected_files, actual) else: for expected_file in expected_files: self.assertIn(expected_file, actual) finally: if new_filename: os.remove(new_filename)
def open_file(self): ''' Open a ROM. ''' rom_fn, _ = QFileDialog.getOpenFileName( self, "Open ROM", os.getcwd(), "Sega Genesis ROMs (*.bin *.gen *.zip)") if not rom_fn: return self.rom_fn = rom_fn if is_zipfile(rom_fn): # if the file is a ZIP, try to open the largest file inside zipfile = ZipFile(rom_fn, 'r') contents = [(f.file_size, f.filename) for f in zipfile.infolist()] contents.sort(reverse=True) rom = zipfile.read(contents[0][1]) else: rom = open(rom_fn, 'r').read() md.set_rom(c_char_p(rom), len(rom)) self.reset_emulation() self.activateWindow()
def test_pydist(): """Make sure pydist.json exists and validates against our schema.""" # XXX this test may need manual cleanup of older wheels import jsonschema def open_json(filename): return json.loads(open(filename, 'rb').read().decode('utf-8')) pymeta_schema = open_json(resource_filename('wheel.test', 'pydist-schema.json')) valid = 0 for dist in ("simple.dist", "complex-dist"): basedir = pkg_resources.resource_filename('wheel.test', dist) for (dirname, subdirs, filenames) in os.walk(basedir): for filename in filenames: if filename.endswith('.whl'): whl = ZipFile(os.path.join(dirname, filename)) for entry in whl.infolist(): if entry.filename.endswith('/metadata.json'): pymeta = json.loads(whl.read(entry).decode('utf-8')) jsonschema.validate(pymeta, pymeta_schema) valid += 1 assert valid > 0, "No metadata.json found"