def main(argv=None): if argv is None: argv = sys.argv[1:] parser = argparse.ArgumentParser(description="Insert music in MML format into a Donkey Kong ROM.") parser.add_argument('mmlname') parser.add_argument('inzipname') parser.add_argument('outzipname') args = parser.parse_args(argv) with open(args.mmlname, 'r') as mmlfile: try: data = read_mml(mmlfile) except MMLError as e: print(e, file=sys.stderr) return 1 with zipfile.open(args.inzipname, 'r') as inzip: snd_rom = inzip.read(SNDROM_NAME) patch_rom(snd_rom, PATTERN_DATA_OFFSET, data) # Move routine at 0x4f8 to 0x4fe, giving six more bytes to pattern table patch_rom(snd_rom, 0x4fe, '\xA3\x83') # Patch call to the routine formerly known as 0x4f8 snd_rom[0x637] = '\xfe' # Insert music data @XXX@ with zipfile.open(args.outzipname, 'w') as outzip: outzip.writestr(SNDROM_NAME, data)
def get_extension_id(zipfile): contents = zipfile.namelist() if 'install.rdf' in contents: return get_extension_id_rdf(zipfile.open('install.rdf')) elif 'manifest.json' in contents: return get_extension_id_json(zipfile.open('manifest.json')) raise ValueError("Extension is missing a manifest")
def EpubToTxt(file): import os import zipfile import re from bs4 import BeautifulSoup def PrettifyTxt(text): lines = text.split('\n') text = '' for line in lines: if line.split(): text = text + ' ' + line.strip() + '\n' return text filename = os.path.basename(file) filebasename = os.path.splitext(filename)[0] zipfile = zipfile.ZipFile(file) namelist = zipfile.namelist() opflist = [] text = '' for subfile in namelist: if subfile.endswith('.opf'): opflist.append(subfile) opffile = min(opflist, key=len) folder = opffile.rstrip(os.path.basename(opffile)) opfs = zipfile.open(opffile) opf = '' for line in opfs: opf = opf + str(line, 'utf-8') ncx = re.search('(?s)<spine.*toc.*=.*"ncx".*>(.*?)</spine>', opf, re.M).group() manifest = re.search('(?s)<manifest.*>(.*?)</manifest>', opf, re.M).group() ids = re.findall(' id="(.*?)"', manifest) hrefs = re.findall('href="(.*?)"', manifest) idrefs = re.findall('<itemref.*idref="(.*?)"', ncx) key = dict(zip(ids, hrefs)) for idref in idrefs: htmpath = folder + key[idref] htmopen = zipfile.open(htmpath) soup = BeautifulSoup(htmopen, 'lxml') text = text + soup.get_text() zipfile.close() return PrettifyTxt(text)
def read(zipfile, name): try: with zipfile.open(name, "r") as f: return f.read().decode('utf-8') except: print("[installer] Unable to find address " + name) return None
def extract_file(name, to_dir = None): """ Parameters ---------- name : Path to the compressed file. to_dir : Folder to which the file is extracted. Returns ------- to_dir : Folder in which the file is extracted. """ import zipfile import tarfile if to_dir is None: to_dir = os.path.dirname(name) if ".tar" in name: with tarfile.open(name, 'r') as tar: tar.extractall(path = to_dir) tar.close() if ".zip" in name: with zipfile.open(name, 'r') as zipf: zipf.extractall(path = to_dir) zipf.close() print("tar file extracted under dir : {}".format(to_dir)) return to_dir
def read_zipfile(file, options={}): """ Read text from 'file', where the file has been zipped up and has a name which is 'file' minus the extension Parameters ---------- file : str Options ------- 'one str' : if True, will return a single str (By default, each line is a separate str) Returns ------- str or array of str """ zipfile = Zipfile(file) innerfile = re.sub(r'\.zip$', '', file) innerfile = re.sub(r'\.gz$', '', innerfile) final = [] with open(zipfile.open(innerfile)) as resource: for line in resource: final.append(line.rstrip()) if isTrue(options, 'one str'): return "\n".join(final) return final
def dataType(self, filename): # determine if the file is compressed comp = self.compressed(filename) # determine if the file is fasta or fastq self.info['compressed'] = comp if comp == "gz": try: x = gzip.open(filename) except: x = gzip.open(filename+".gz") x = x.read(10) elif comp == "bz2": x = bz2.open(filename) x = x.read(10) elif comp == "zip": x = zipfile.open(filename) x = x.read(10) elif comp == "raw": x = open(filename) x = x.read(10) else: self.info['compressed'] = "undefined" if self.isFasta(x): self.info['fileType'] = "fasta" return self.info elif self.isFastq(x): self.info['fileType'] = "fastq" return self.info else: self.info['fileType'] = "undefined" return self.info
def read_archive(input_archive, archive_type, mode, sample2cat, input_dir): """read archive""" if not os.path.exists(input_dir): os.mkdir(input_dir) if archive_type == "zip": archive = zipfile.open(input_archive) namelist = archive.namelist() if archive_type == "tar.gz": archive = tarfile.open(input_archive, "r:gz") namelist = archive.getnames() sample_file_names, sample_names = get_sample_names(namelist) for tf, sfn in zip(namelist, sample_file_names): extracted = archive.extractfile(tf) with open("%s/%s" % (input_dir, sfn), 'w') as sample_file_out: for line in extracted: sample_file_out.write(line) extracted.close() #create sample table if sample2cat is not None: sample_cat = pd.read_cvs(sample2cat, index_col=0, sep="\t") #replace index with cleaned file names sample_cat.index.rename( str, dict([(tf, sfn) for sfn, tf in zip(sample_file_names, namelist)])) sample_table = pd.DataFrame( [sample_file_names, sample_cat.loc[sample_file_names, ]]) sample_table.columns = ["sample_file_name", "category"] else: sample_table = pd.DataFrame(sample_file_names) sample_table.columns = ["sample_file_name"] sample_table.index = sample_names sample_table.index.name = "sample_name" sample_table.to_csv("%s/sample_table.txt" % input_dir, sep="\t")
def importzip(conn, filename, zipfile): print 'Import ' + filename files = filelist(zipfile) cur = conn.cursor() meta = metadata(zipfile.read(files['OPERDAY'])) if datetime.strptime(meta['ValidThru'].replace('-', ''), '%Y%m%d') < (datetime.now() - timedelta(days=1)): return meta header = (zipfile.read(files['DEST']).split('\r\n')[0].split('|')[1] in versionheaders) encoding = encodingof(meta['DataOwnerCode']) for table in importorder: if table in files: f = zipfile.open(files[table]) table = table + '_delta' if header: cur.copy_expert( "COPY %s FROM STDIN WITH DELIMITER AS '|' NULL AS '' CSV HEADER ENCODING '%s'" % (table, encoding), f) else: cur.copy_expert( "COPY %s FROM STDIN WITH DELIMITER AS '|' NULL AS '' CSV ENCODING '%s'" % (table, encoding), f) conn.commit() cur.close() return meta
def run(file, n=1, pulse_properties=True): """Retrieve pax Event objects for all events""" events = [] zipfile = ReadZipped() zipfile.open(file) event_numbers = zipfile.get_event_numbers_in_current_file() properties = PulseProperties() for ev in tqdm(event_numbers): event = get_event(zipfile, ev) if pulse_properties: events.append(properties.transform_event(event)) else: events.append(event) return events
def _write_data_to_folder(zipfile, filenames, labels, folder, start_index, isTest): print("Writing data\n") sorted_labels = [None] * len(labels) if isTest == 1: for i in range(len(zipfile.infolist())): entry = zipfile.infolist()[i] if "IRHT_P_009793.tif" in entry.filename: zipfile.infolist().remove(entry) break zip_infolist = zipfile.infolist()[1:] for i in range(len(zip_infolist)): entry = zip_infolist[i] entry_index_infilenames = filenames.index( entry.filename[start_index:]) sorted_labels[i] = labels[entry_index_infilenames] for i, (enrty, label) in enumerate(zip(zipfile.infolist()[1:], sorted_labels)): with zipfile.open(enrty) as file: img = Image.open(file) dest = os.path.join(folder, str(label)) make_folder_if_not_exists(dest) img.save(os.path.join(dest, str(i) + '.png'), "PNG", quality=100)
def archive_to_repo(archive_path, repo, archive_type="tar"): """Downloads a archive from the specified path, extracts it into the repo's directory, commits any changes from the previous version and pushes it to github!!!!""" # Download the tarball and stick it in a tempfile r = requests.get(archive_path) tmp = SpooledTemporaryFile() tmp.write(r.content) tmp.seek(0) # Open the tempfile contents as an actual tarball if archive_type == "tar": archive = tarfile.open(fileobj=tmp) elif archive_type == "zip": archive = zipfile.open(tmp) else: raise ValueError("Unrecognized Archive Type") # Clear working files clear_working_dir(repo.working_dir) # Extract to the repo path archive.extract(repo.working_dir) # Add and commit everything! try: repo.git.add(".", A=True) repo.git.commit(m="New archive version") except: pass # May be that there was nothing new to commit # Cleanup, outta here! archive.close() tmp.close()
def add_interesting_events(self, node_dictionary, zipfile, zipfilename): t = time.clock() self.filename = zipfilename # Figure out sizes for progress reporting for key in self.events.keys(): for name in zipfile.namelist(): if name.endswith('/' + key): self.total_size += zipfile.getinfo(name).file_size mortimer.update_progress_so_far(self.progress_queue, self.total_size, self.progress_size) for key in self.events.keys(): for name in zipfile.namelist(): if name.endswith('/' + key): tf = tempfile.TemporaryFile() tf.write(zipfile.open(name).read()) tf.seek(0) # some logs don't have a year in the timestamp, assume log file year is the one self.year = int((datetime(zipfile.getinfo(name).date_time[0], 1, 1) - datetime(1970,1,1)).total_seconds()) self.process_log(node_dictionary, key, tf) tf.close() self.process_time = time.clock() - t mortimer.update_progress_so_far(self.progress_queue, self.total_size, self.total_size) print "{}: Processing of node events took {} seconds".format(self.filename, self.process_time)
def unpack(self): if not self.archive or not os.path.exists(self.archive): self.download() if self.url.startswith('git:'): return logging.info('unpacking {}'.format(self.archive)) target = self.root content, encoding = mimetypes.guess_type(self.archive) if content == 'application/x-tar': archive = tarfile.open(self.archive) prefix = os.path.commonprefix(archive.getnames()) elif content == 'application/zip': archive = zipfile.open(self.archive) prefix = os.path.commonprefix(archive.namelist()) else: raise RuntimeError('unknown archive type: {}'.format(content)) # if no prefix, create one if len(prefix) == 0: prefix = self.package target = os.path.join(target, self.package) # extract files self._setstate('source_dir', os.path.join(target, prefix)) os.makedirs(target, exist_ok=True) archive.extractall(target) logging.info(' successfully unpacked {}'.format(self.archive))
def examinezip(request, tarpath, fpath=None, document_root=None, show_indexes=False): if fpath: # Got a valid fpath (inside the zipfile) zippath = document_root + zippath + "/" + fpath else: # No valid fpath (zipfile itself) zippath = document_root + zippath if fpath == None: try: t = loader.get_template( '/u/savagev/valtest/realtest/templates/data/static/directory_index.html' ) except TemplateDoesNotExist: t = loader.get_template( '/u/kamwoods/Sudoc/svp/realtest/templates/data/static/directory_index.html' ) zip = zipfile.open(tarpath, 'r') files = [] for zipinfo in zip: f = zippath.split('/')[-1] + '/' + zipinfo.filename filefp = os.path.join(zippath, f) mimetype = mimefinder.find_type(filefp) fmodtime = zipinfo.date_time if not tarinfo.file_size == 0: fgsize = static.greek(zipinfo.file_size) else: fgsize = "-" files.append([ "-", "", mimetype, f, fmodtime, fgsize, ]) zip.close() c = Context({ 'directory': zippath + '/', 'file_list': files, 'hinst': request.session['hinst'], 'hexe': request.session['hexe'], 'hdriver': request.session['hdriver'], 'hlib': request.session['hlib'], 'hetc': request.session['hetc'], 'user': request.user.username, }) return HttpResponse(t.render(c))
def getzflo(zipfile, member_path): # GET a Zipfile File-Like Object for passing to # an XML parser try: return zipfile.open(member_path) # CPython 2.6 onwards except AttributeError: # old way return BYTES_IO(zipfile.read(member_path))
def parse(name, cur, zipfile): """Extract a csv from the zip and read it into a table""" csvfile = io.TextIOWrapper(zipfile.open(name + '.csv'), encoding='utf-8') dr = csv.DictReader(csvfile) cur.execute('CREATE TABLE {0} ({1});'.format(name, ', '.join(dr.fieldnames))) for row in dr: columns = ', '.join([':' + key for key in filter(None, row.keys())]) cur.execute('INSERT INTO {0} ({1}) VALUES ({2});'.format(name, ', '.join(filter(None,row.keys())), columns), row)
def zipfile_copy( zipfile, src_path, dest_path ): file_src = zipfile.open( src_path ) file_target = file(dest_path, 'wb') shutil.copyfileobj( file_src, file_target ) file_src.close() file_target.close()
def filename2json(filename): """A partir d'un nom de fichier JSON, récupération de son contenu dans une variable""" data = "" with open(zipfile.open(filename)) as f: data = json.load(f) print(data) return data
def _iter_dataset_zip(zipfile, prefixes, parts): """Iterate over selected event files inside a zip archive. """ for p in prefixes: files = [zipfile.open('{}-{}.csv'.format(p, _), mode='r') for _ in parts] dtypes = [DTYPES[_] for _ in parts] data = tuple(pandas.read_csv(f, header=0, index_col=False, dtype=d) for f, d in zip(files, dtypes)) yield (_extract_event_id(p),) + data
def extract_zip(zipfile, output_dir): """Extracts a zipfile without the uppermost folder.""" output_dir = Path(output_dir) if zipfile.testzip() is None: for m in zipfile.namelist(): fldr, name = re.split('/', m, maxsplit=1) if name: content = zipfile.open(m, 'r').read() with open(output_dir / name, 'wb') as out: out.write(content)
def load_fn(key): import trimesh from shape_tfds.core.resolver import ZipSubdirResolver subdir = os.path.join(synset_id, key) resolver = ZipSubdirResolver(zipfile, subdir) obj = os.path.join(subdir, "model.obj") with zipfile.open(obj) as fp: return trimesh.load(fp, file_type="obj", resolver=resolver)
def unpack(path): if tarfile.is_tarfile(path): archive = tarfile.open(path) elif zipfile.is_zipfile(path): archive = zipfile.open(path) else: raise TypeError('Unknown file-type: {0}'.format(path)) tempdir = tempfile.mkdtemp() archive.extractall(tempdir) return os.path.join(tempdir, os.listdir(tempdir)[0])
def unzip(url, output_file): """ Get a zip file content from a link and unzip """ content = requests.get(url) zipfile = ZipFile(BytesIO(content.content)) output_content = "" output_content += zipfile.open(zipfile.namelist()[0]).read() output = open(output_file, "w") output.write(output_content) output.close()
def load_data(path="trainingandtestdata.zip", nb_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113, start_char=1, oov_char=2, index_from=3): path = get_file(path, origin="http://cs.stanford.edu/people/alecmgo/trainingandtestdata.zip") if path.endswith(".gz"): f = gzip.open(path, 'rb') elif path.endswith(".zip"): f = zipfile.open(path, 'rb') else: f = open(path, 'rb')
def extract_from_zip_and_write(name_in_zip, path, name_on_disk): try: bron = zipfile.open(name_in_zip.encode('cp437')) # zip uses cp437 encoding except KeyError: failed_files.append(name_in_zip) return False doel = open(str(path / name_on_disk), "wb") with bron, doel: shutil.copyfileobj(bron, doel) return True
def open_file(input): """Returns opened input file""" #checks to see if the file extension is gzip/zip extension = check_ext(input) #print extension if extension == '.gz': datafile = gzip.open(input, 'rb') elif extension == '.zip': datafile = zipfile.open(input, 'rb') else: datafile = open(input, 'rb') return datafile
def _write_data_to_folder(zipfile, labels, folder, isTrainingset): print("Writing data to folder\n") for i, (enrty, label) in enumerate(zip(zipfile.infolist()[1:], labels)): with zipfile.open(enrty) as file: img = Image.open(file) dest = os.path.join(folder, str(label)) make_folder_if_not_exists(dest) if isTrainingset == 1: img.save(os.path.join(dest, str(i) + '.png')) else: img.save(os.path.join(dest, str(i) + '.jpg'))
def extract_zip(self, path): """ Extract contents of zip file """ fileMembers = None try: if zipfile.is_zipfile(path): zipArchive = zipfile.open(path) fileMembers = zipFile.getnames() zipArchive.extractall(self.tempDownloadFolder) zipArchive.close() except zipfile.ZipFile: print ("Error while reading zip archive") # TODO: OVERSETT return fileMembers
def unpack(src_path, dst_path, filename=""): dst_path = dst_path + ("" if len(filename) == 0 else "/" + filename) if ".zip" in src_path: with zipfile.open(src_path, "r") as archive: archive.extractall(dst_path) archive.close() elif ".tgz" or ".bz2" or ".tar.gz" or "tbz2" or ".tar.bz2" or ".tar" in src_path: with tarfile.open(src_path, "r:*") as archive: archive.extractall(dst_path) archive.close() else: raise Exception("File format not supported")
def unpack(filepath, target_dir): """Unpack the file to the target_dir.""" print("Unpacking %s ..." % filepath) if filepath.endswith('.zip'): zip = zipfile.ZipFile(filepath, 'r') zip.extractall(target_dir) zip.close() elif filepath.endswith('.tar') or filepath.endswith('.tar.gz'): tar = zipfile.open(filepath) tar.extractall(target_dir) tar.close() else: raise ValueError("File format is not supported for unpacking.")
def unpack_package(filepath: 'Path', target_dir: 'Path'): """Unpack the file to the target_dir. :param filepath: the path of given file :param target_dir: the path of target folder """ if filepath.suffix == '.zip': with zipfile.ZipFile(filepath, 'r') as zip: zip.extractall(target_dir) elif filepath.suffix in ['.tar', '.gz']: with zipfile.open(filepath) as tar: tar.extractall(target_dir) else: raise ValueError('File format is not supported for unpacking.')
def examinezip(request, tarpath, fpath=None, document_root=None, show_indexes=False): if fpath: # Got a valid fpath (inside the zipfile) zippath = document_root + zippath + "/" + fpath else: # No valid fpath (zipfile itself) zippath = document_root + zippath if fpath == None: try: t = loader.get_template('/u/savagev/valtest/realtest/templates/data/static/directory_index.html') except TemplateDoesNotExist: t = loader.get_template('/u/kamwoods/Sudoc/svp/realtest/templates/data/static/directory_index.html') zip = zipfile.open(tarpath, 'r') files = [] for zipinfo in zip: f = zippath.split('/')[-1] +'/'+ zipinfo.filename filefp = os.path.join(zippath, f) mimetype = mimefinder.find_type(filefp) fmodtime = zipinfo.date_time if not tarinfo.file_size == 0: fgsize = static.greek(zipinfo.file_size) else: fgsize = "-" files.append(["-", "", mimetype, f, fmodtime, fgsize,]) zip.close() c = Context({ 'directory' : zippath+'/', 'file_list' : files, 'hinst': request.session['hinst'], 'hexe': request.session['hexe'], 'hdriver': request.session['hdriver'], 'hlib': request.session['hlib'], 'hetc': request.session['hetc'], 'user': request.user.username, }) return HttpResponse(t.render(c))
def validate_project(data): if max_size is not None and data.size > max_size: params = { 'max_size': filesizeformat(max_size), 'size': filesizeformat(data.size), } raise ValidationError(error_messages['max_size'], 'max_size', params) try: print("Validating file {}".format(data.path)) mzip = zipfile.ZipFile(data.path) """ Retrieve metadata from the json file. """ if 'meta.json' not in zip.namelist(): raise Exception() print("No metadata") with zipfile.open('meta.json') as file: meta = json.load(file) if not isinstance(meta['title'], 'str'): print("No title") raise Exception() if not isinstance(meta['description'], 'str'): print("No description") raise Exception() figurelist = meta['figures'] """ Walk over all figures and the captured image and verify them.""" figurelist.append('image.jpg') for image in figurelist: with zipfile.open(image) as image: im = Image.open(image) if not im.verify(): print("PIL says image is invalid") raise Exception im.close() except Exception: raise ValidationError(error_messages['invalid'], 'invalid')
def run(self): v_logger(h.LOG_MODE.INFO, 'Parsing IDEs..') ide_settings = h.parse_json(self.config_location) custom_base = custom['custom_base'] install_base = custom['install_base'] for ide in ide_settings['ides']: # get application locations v_logger(h.LOG_MODE.INFO, 'Parsing {} config.'.format(ide['name'])) archive_location = custom_base if ide['archive_location'].startswith('~') else '' + arch['archive_location'] install_location = install_base if ide['install_location'].startswith('~') else '' + ide['install_location'] # get application config locations config_origin = ide['config_origin'] if 'config_origin' in ide.keys() else None config_save = ide['config_save'] if 'config_save' in ide.keys() else None # check how to install it archivetype = h.filetype(archive_location) v_logger(h.LOG_MODE.INFO, '{}: Extracting {} to {}..'.format(ide['name'], archive_location, install_location)) if archivetype in FILETYPES.TGZ + FILETYPES.TXZ: c_logger(Sn(stringify=lambda: ''' gzipped = tarfile.open(archive_location) gzipped.extractall(path=install_location) gzipped.close() '''.strip(' '), run=lambda: ( gzipped = tarfile.open(archive_location) gzipped.extractall(path=install_location) gzipped.close() ))) gzipped = tarfile.open(archive_location) gzipped.extractall(path=install_location) gzipped.close() elif archivetype in FILETYPES.ZIP: c_logger(Sn(stringify=lambda: , run=lambda: )) zipped = zipfile.open(archive_location) zipped.extractall(path=install_location) zipped.close() elif archivetype in FILETYPES.DEB: c_logger(Sn(stringify=lambda: ' '.join(['dpkg', '-i', archive_location]), run=lambda: Popen(['dpkg', '-i', archive_location], stdout=PIPE).wait()), 0) # config copying/installation v_logger(h.LOG_MODE.INFO, '{}: Copying config files'.format(ide['name'])) if os.path.exists(config_origin) and config_save: c_logger(Sn(stringify=lambda: 'shutil.copytree(config_origin, config_save)', run=lambda: shutil.copytree(config_origin, config_save)))
def __init__(self, zipfile: zipfile.ZipFile, entry: zipfile.ZipInfo, compiler: Compiler) -> None: self.reader = zipfile.open(entry, "r") self.entry = entry self.compiler = compiler if isinstance(zipfile.filename, str): origin = os.path.join(zipfile.filename, entry.filename) has_location = True else: origin = f"<zip entry '{entry.filename}'>" has_location = False self.spec = ModuleSpec("__main__", ZIPLoader("__main__", zipfile, entry), origin=origin, is_package=False) self.spec.has_location = has_location
def read_packs(zipfile): all_packs = collections.defaultdict(list) packs_by_gameid = collections.defaultdict( lambda: collections.defaultdict(set)) for info in zipfile.infolist(): packname = info.filename.split('/', 1)[0] all_packs[packname].append(info) if info.filename.endswith('/rules.txt') and info.filename.count( '/') == 1: with zipfile.open(info) as f: pack_game_ids, res = _parse_rules_txt(f) if pack_game_ids is not None: for _id in pack_game_ids: packs_by_gameid[_id][res].add(packname) logger.debug('%d packs found in zip', len(all_packs)) return packs_by_gameid, all_packs
def read_archive(input_archive, archive_type, mode, sample2cat, input_dir, input_names): """read archive""" if not os.path.exists(input_dir): os.mkdir(input_dir) if archive_type == "zip" or archive_type == "tar.gz": if archive_type == "zip": archive = zipfile.open(input_archive) namelist = archive.namelist() if archive_type == "tar.gz": archive = tarfile.open(input_archive, "r") namelist = archive.getnames() sample_file_names, sample_names = get_sample_names(namelist) for tf, sfn in zip(namelist, sample_file_names): extracted = archive.extractfile(tf) with open("%s/%s" % (input_dir, sfn), 'w') as sample_file_out: for line in extracted: sample_file_out.write(line) extracted.close() elif archive_type == "directory": sample_names = input_names.split(',') sample_file_names = [] for input_part in input_archive.split(','): input_dir_part=os.path.basename(input_part) sample_file_names.append(input_dir_part) os.symlink(input_part, input_dir+"/"+input_dir_part) #create sample table if sample2cat is not None: sample_cat = pd.read_csv(sample2cat, index_col = 0, sep = "\t") #replace index with cleaned file names if archive_type != "directory": sample_cat.index.rename(str, dict([(tf, sfn) for sfn, tf in zip(sample_file_names, namelist)])) sample_table = pd.DataFrame(sample_names) categories = pd.Series(sample_cat.loc[sample_file_names, ]['category'].tolist()) else: sample_table = pd.DataFrame(sample_file_names) categories = pd.Series(sample_cat.loc[sample_names, ]['category'].tolist()) sample_table['category'] = categories sample_table.columns = ["sample_file_name", "category"] else: sample_table = pd.DataFrame(sample_file_names) sample_table.columns = ["sample_file_name"] sample_table.index = sample_names sample_table.index.name = "sample_name" sample_table.to_csv("%s/sample_table.txt" % input_dir, sep = "\t")
def read(self): """ A generator that reads the in memory zip files, opens them and returns the content, one file at a time. """ file_content = {} with self.zipfile as zipfile: for files in zipfile.namelist(): if files == 'serverID.txt': continue self.file_name = files.replace('.json.txt', '') file_pointer = zipfile.open(files) file_bytes = file_pointer.read() """ Could be a more useful class if it didn't assume that content of txt files are JSON """ file_content = json.loads(file_bytes.decode('utf-8')) yield file_content
def archiveextract(resource, path='.'): """ extracts archives (tar/zip) :param resource: list/str of archive files (if netCDF files are in list, they are passed and returnd as well in the return) :param path: define a directory to store the results (default='.') :return list: [list of extracted files] """ from tarfile import open import zipfile from os.path import basename, join try: if isinstance(resource, six.string_types): resource = [resource] files = [] for archive in resource: try: LOGGER.debug("archive=%s", archive) # if mime_type == 'application/x-netcdf': if basename(archive).split('.')[-1] == 'nc': files.append(join(path, archive)) # elif mime_type == 'application/x-tar': elif basename(archive).split('.')[-1] == 'tar': tar = open(archive, mode='r') tar.extractall() files.extend([join(path, nc) for nc in tar.getnames()]) tar.close() # elif mime_type == 'application/zip': elif basename(archive).split('.')[1] == 'zip': zf = zipfile.open(archive, mode='r') zf.extractall() files.extend([join(path, nc) for nc in zf.filelist]) zf.close() else: LOGGER.warn('file extention unknown') except Exception as e: LOGGER.exception('failed to extract sub archive') except Exception as e: LOGGER.exception('failed to extract archive resource') return files
def stats_parse(bucketDictionary, zipfile, stats_file, filename, progress_queue): try: data = zipfile.open(stats_file, 'rU') except KeyError: logging.error('Error: Cannot find ns_server.stats.log in' + stats_file + \ 'See stats_parse(bucketDictionary, zipfile, stats_file, filename).') os._exit(1) else: current_bytes = 0 endsize = watched_stream_getendsize(zipfile, stats_file, filename) update_progress_so_far(progress_queue, endsize, current_bytes) data = TextIOWrapper(data) bucket = None statsDictionary = dict() byte_count = 0 t = time.clock() for line in data: byte_count += len(line) line = line.rstrip() if line != "": (possibleBucket, epoch) = isStatsForBucket(line) if epoch != 0: bucket = possibleBucket statsDictionary = dict() statsDictionary['localtime'] = epoch # check if have previous stats for this bucket if bucket not in bucketDictionary.keys(): bucketDictionary[bucket] = [] else: # Add to statsDictionary stats_kv(line, statsDictionary) else: # reached an empty line current_bytes = current_bytes + byte_count update_progress_so_far(progress_queue, endsize, current_bytes) byte_count = 0 if bucket: bucketDictionary.get(bucket).append(statsDictionary) bucket = None process_time = time.clock() - t update_progress_so_far(progress_queue, endsize, endsize) print "{}: Processing of ns_server.stats took {} seconds".format(filename, process_time)
def importzip(conn,filename,zipfile): print 'Import ' + filename files = filelist(zipfile) cur = conn.cursor() meta = metadata(zipfile.read(files['OPERDAY'])) if datetime.strptime(meta['ValidThru'].replace('-',''),'%Y%m%d') < (datetime.now() - timedelta(days=1)): return meta header = (zipfile.read(files['DEST']).split('\n')[0].split('|')[1] in versionheaders) encoding = encodingof(meta['DataOwnerCode']) for table in importorder: if table in files: f = zipfile.open(files[table]) table = table+'_delta' if header: cur.copy_expert("COPY %s FROM STDIN WITH DELIMITER AS '|' NULL AS '' CSV HEADER ENCODING '%s'" % (table,encoding),f) else: cur.copy_expert("COPY %s FROM STDIN WITH DELIMITER AS '|' NULL AS '' CSV ENCODING '%s'" % (table,encoding),f) conn.commit() cur.close() return meta
def importzip(conn,zipfile): files = filelist(zipfile) cur = conn.cursor() if 'OPERDAY' in files: meta = metadata(zipfile.read(files['OPERDAY'])) elif 'PUJO' in files: meta = metadata(zipfile.read(files['PUJO'])) else: raise Exception('OPERDAY mist') header = (zipfile.read(files['DEST']).split('\r\n')[0].split('|')[1] in versionheaders) encoding = encodingof(meta['dataownercode']) del(meta['dataownercode']) for table in importorder: if table in files: f = zipfile.open(files[table]) if header: cur.copy_expert("COPY %s FROM STDIN WITH DELIMITER AS '|' NULL AS '' CSV HEADER ENCODING '%s'" % (table,encoding),f) else: cur.copy_expert("COPY %s FROM STDIN WITH DELIMITER AS '|' NULL AS '' CSV ENCODING '%s'" % (table,encoding),f) cur.close() return meta
def get_files(self): filepath = self.get_filepath() if not os.path.exists(filepath): yield None fn, extension = os.path.splitext(filepath) if tarfile.is_tarfile(filepath): f = tarfile.open(filepath, 'r') finf = f.next() while finf: yield f.extractfile(finf.name) finf = f.next() elif zipfile.is_zipfile(filepath): f = zipfile.open(filepath) for fname in f.namelist(): yield f.open(fname) elif extension == ".gz": f = gzip.open(filepath, 'rb') yield f else: yield open(filepath, 'rb')
def stats_parse_ns_doctor(bucketDictionary, zipfile, stats_file, filename, progress_queue): try: data = zipfile.open(stats_file, 'rU') except KeyError: logging.error('Error: Cannot find ns_server.stats.log in' + stats_file + \ 'See stats_parse(bucketDictionary, zipfile, stats_file, filename).') os._exit(1) else: current_bytes = 0 scale = 4 # ns_doctor is slow, so scale the progress endsize = watched_stream_getendsize(zipfile, stats_file, filename) * scale update_progress_so_far(progress_queue, endsize, current_bytes) data = TextIOWrapper(data) t = time.clock() for line in data: (node, epoch) = isNsDoctorStats(line) if epoch != 0: statsDictionary = dict() statsDictionary['localtime'] = epoch (doctor_stats, bytes_read) = processNsDoctorStats(node, data, epoch) # doctor_stats contains stats for each thing, the key being a thing like xdcr # e.g. xdcr_XDCRName = {'latency':100, 'docs_sent':22} for key in doctor_stats: if key not in bucketDictionary.keys(): bucketDictionary[key] = [] doctor_stats[key]['localtime'] = epoch bucketDictionary.get(key).append(doctor_stats[key]) current_bytes = current_bytes + (bytes_read * scale) update_progress_so_far(progress_queue, endsize, current_bytes) update_progress_so_far(progress_queue, endsize, endsize) process_time = time.clock() - t print "{}: Processing of ns_server.stats (ns_doctor) took {} seconds".format(filename, process_time)
def unpack_zipped_image(self, uri): return self.unpack_image(zipfile.open(uri), uri)
def make_zip_file(self, zipfile_path): zip_file = zipfile.open(zipfile_path, "w") self.write_to_zip(zip_file) zip_file.close()
def main(win, archive, dir=''): win.erase() try: dir_list = tar_file_list(archive, dir) except ValueError: dir_list = ["Not a valid directory"] x = 0 for item in dir_list: x += 1 win.move(x, 6) win.addstr(str(item)) y = 1 win.move(y, 1) win.addstr("-->") win.refresh user_interaction(win, y, dir_list, archive, dir) ### GUI #### if __name__ == '__main__': script, file = sys.argv archive = determine_filetype(file) if archive == "is_tar": with tarfile.open(file) as f: curses.wrapper(main, f) elif archive == "is_zip": with zipfile.open(file) as f: curses.wrapper(main, f)
def run_scans(target): print("creating directories needed for the scanners to work") """ Checks to see if the paths we need are there If they are not, it creates them (the archive path is meant for destruction, so it is assumed that it should not be there unless something went wrong) """ if not os.path.isdir(paths.SCANNER_OUTPUT_PATH): subprocess.call(["mkdir", paths.SCANNER_OUTPUT_PATH]) if not os.path.isdir(paths.TEMP_ARCHIVE_UNPACK_PATH): subprocess.call(["mkdir", paths.TEMP_ARCHIVE_UNPACK_PATH]) else: subprocess.call(["rm", paths.TEMP_ARCHIVE_UNPACK_PATH + "/*.*"]) # To ensure the absolute path is not a part of the output file name out_name = get_file_from_absolute_path(target) ninka_out = paths.SCANNER_OUTPUT_PATH + "/" ninka_out += out_name + ".N_out.txt" check_file(ninka_out) foss_out = paths.SCANNER_OUTPUT_PATH + "/" foss_out += out_name + ".F_out.txt" check_file(foss_out) print("checking file format") if tarfile.is_tarfile(target): print(target + " identified as TAR file") archive = tarfile.open(target) print("extracting data") archive.extractall(paths.TEMP_ARCHIVE_UNPACK_PATH) print("starting ninka scan") archive_scan(archive, "n", ninka_out) print("ninka scan finished") print("starting fossology scan") archive_scan(archive, "f", foss_out) print("fossology scan finished") archive.close() # This is the same as the tarfile method but with zipfile elif zipfile.is_zipfile(target): print(target + " identified as ZIP file") archive = zipfile.open(target) print("extracting data") archive.extractall(paths.TEMP_ARCHIVE_UNPACK_PATH) print("starting ninka scan") archive.scan(archive, "n", ninka_out) print("ninka scan finished") print("starting fossology scan") archive.scan(archive, "f", foss_out) print("fossology scan finished") archive.close() else: # assumes a single file print("file is either not an archive or an unrecognized format") path = paths.TEMP_ARCHIVE_UNPACK_PATH + "/" + target subprocess.call(["cp", target, paths.TEMP_ARCHIVE_UNPACK_PATH]) # first with ninka n_file = open(ninka_out, "w") print("starting ninka scan") n_file.write(ninka_scan(path)) print("ninka scan fnished") n_file.close() # next with fossology f_file = open(foss_out, "w") print("starting fossology scan") f_file.write(foss_scan(path)) print("fossology scan finished") f_file.close() # clean() """
def declare_build_external( self, name, prefix = None, # <name>-build-ext tmp_dir= None, # <prefix>/tmp stamp_dir= None, # <prefix>/<name>-stamp download_dir= None, # <prefix>/src source_dir= None, # <prefix>/src/<name> build_dir= None, # <prefix>/src/<name>-build install_dir= None, # env.PREFIX # --- switches --- build_in_source = False, # --- input location(s) --- url = None, # ex: file:///foo or http://bar url_md5 = None, svn_repository = None, svn_revision = None, cvs_repository = None, cvs_revision = None, # --- build steps --- patch_cmd = None, # ex: 'patch -p0 foo.patch' configure_cmd = None, # ex: 'configure --prefix=${PREFIX}' build_cmd = None, # ex: 'make' install_cmd = None, # ex: 'make install DESTDIR=${BUILD_INSTALL_AREA} # --- build environment --- env = None, os_env_keys = None, shell = False, ): # set defaults if prefix is None: prefix = self.bldnode.make_node('%s-build-ext' % name) else: prefix = self.bldnode.make_node(prefix) if tmp_dir is None: tmp_dir = prefix.make_node('tmp') else: tmp_dir = self.bldnode.make_node(tmp_dir) if stamp_dir is None: stamp_dir = prefix.make_node('%s-stamp' % name) else: stamp_dir = self.bldnode.make_node(stamp_dir) if download_dir is None: download_dir = prefix.make_node('src') else: download_dir = self.bldnode.make_node(download_dir) if source_dir is None: source_dir = prefix.make_node(name) else: source_dir = self.bldnode.make_node(source_dir) if build_dir is None: if build_in_source: build_dir = source_dir else: build_dir = prefix.make_node('%s-build' % name) else: build_dir = self.bldnode.make_node(build_dir) if install_dir is None: install_dir = prefix.make_node('%s-install' % name) #install_dir = self.env.INSTALL_AREA else: install_dir = prefix.make_node(install_dir) #install_dir = self.root.make_node(install_dir) if os_env_keys is None: os_env_keys = [] os_env_keys += self.env.HEPWAF_RUNTIME_ENVVARS[:] for d in (tmp_dir, stamp_dir, download_dir, source_dir, build_dir, install_dir, ): d.mkdir() pass if (url is None and svn_repository is None and cvs_repository is None): self.fatal('You need to give "declare_build_external" an url') pass ## stamp files unpack_stamp = stamp_dir.make_node('000-unpack.stamp') patch_stamp = stamp_dir.make_node('001-patch.stamp') configure_stamp = stamp_dir.make_node('002-configure.stamp') make_stamp = stamp_dir.make_node('003-make.stamp') make_install_stamp = stamp_dir.make_node('004-make_install.stamp') ## log-files unpack_log = tmp_dir.make_node('000-unpack.log') patch_log = tmp_dir.make_node('001-patch.log') configure_log = tmp_dir.make_node('002-configure.log') make_log = tmp_dir.make_node('003-make.log') make_install_log = tmp_dir.make_node('004-make_install.log') ## env... self.env['BUNDLED_%s_ROOT'%name.upper()] = install_dir.abspath() if env is None: env = waffle_utils._get_env_for_subproc(self, os_env_keys) else: # do not modify user's env... env = dict(env) senv = waffle_utils._get_env_for_subproc(self, os_env_keys) for k in ('CXXFLAGS', 'CCFLAGS', 'CFLAGS', 'CPPFLAGS', 'LINKFLAGS', 'CC', 'CXX', ): env[k] = senv[k] pass pass for k in env.keys(): if not k in os_env_keys: del env[k] env['BUNDLED_%s_ROOT'%name.upper()] = install_dir.abspath() for k,v in env.iteritems(): if not isinstance(v, str): raise ValueError("invalid env.var: ${%s} = %r" % (k,v)) ## retrieve the sources... pkg_src = None if not (url is None): url = Utils.subst_vars(url, self.env) pkg_src = download_dir.make_node(os.path.basename(url)) if not os.path.exists(pkg_src.abspath()): Logs.info("[%s] retrieving sources..." % name) self.download_archive(src=url, dst=pkg_src.abspath()) if url_md5: import hashlib hasher = hashlib.md5() hasher.update(pkg_src.read('rb')) md5 = hasher.hexdigest() if url_md5 != md5: self.fatal("[%s] invalid MD5 checksum:\nref: %s\nnew: %s" % (name, url_md5, md5)) pass ## unpack the sources... # find the correct unpacker... if not os.path.exists(unpack_stamp.abspath()): Logs.info('[%s] unpacking...' % name) unpack_dir = tmp_dir.make_node("unpack-dir") unpack_dir.mkdir() import tarfile import zipfile if tarfile.is_tarfile(pkg_src.abspath()): o = tarfile.open(pkg_src.abspath()) o.extractall(unpack_dir.abspath()) o.close() elif zipfile.is_zipfile(pkg_src.abspath()): o = zipfile.open(pkg_src.abspath()) o.extractall(unpack_dir.abspath()) o.close() else: Logs.info('[%s] file [%s] is not a recognized archive format' % (name, pkg_src.abspath())) pass unpack_content = unpack_dir.ant_glob("*", dir=True) import shutil shutil.rmtree(path=source_dir.abspath(), ignore_errors=True) if (len(unpack_content) == 1 and os.path.isdir(unpack_content[0].abspath())): shutil.move(src=unpack_content[0].abspath(), dst=source_dir.abspath()) else: shutil.move(src=unpack_dir.abspath(), dst=source_dir.abspath()) shutil.rmtree(path=unpack_dir.abspath(), ignore_errors=True) unpack_stamp.write('') def _get_cmd(cmd): if isinstance(cmd, str): cmd = shlex.split(cmd) return [Utils.subst_vars(c, self.env) for c in cmd] ## real build... if patch_cmd: cmd = _get_cmd(patch_cmd) if not os.path.exists(patch_stamp.abspath()): cwd=build_dir.abspath() Logs.info('[%s] patching...' % name) fout = open(patch_log.abspath(), 'w') fout.write('++ cd %s\n' % cwd) fout.write('++ %s\n' % cmd) fout.flush() sc = subprocess.call( Utils.to_list(cmd), env=env, stdout=fout, stderr=fout, cwd=cwd ) if sc != 0: self.fatal("failed to patch [%s]\nlook into [%s]" % (name, fout.name)) patch_stamp.write('') if not os.path.exists(configure_stamp.abspath()): Logs.info('[%s] configuring...' % name) cmd = _get_cmd(configure_cmd) cwd=build_dir.abspath() fout = open(configure_log.abspath(), 'w') fout.write('++ cd %s\n' % cwd) fout.write('++ %s\n' % cmd) fout.flush() if isinstance(cmd, str): cmd = Utils.to_list(cmd) pass sc = subprocess.call( cmd, env=env, stdout=fout, stderr=fout, cwd=cwd ) if sc != 0: self.fatal("failed to configure [%s]\nlook into [%s]" % (name, fout.name)) configure_stamp.write('') if not os.path.exists(make_stamp.abspath()): Logs.info('[%s] building...' % name) cmd = _get_cmd(build_cmd) cwd=build_dir.abspath() fout = open(make_log.abspath(), 'w') fout.write('++ cd %s\n' % cwd) fout.write('++ %s\n' % cmd) fout.flush() sc = subprocess.call( Utils.to_list(cmd), env=env, stdout=fout, stderr=fout, cwd=cwd, shell=shell ) if sc != 0: self.fatal("failed to build [%s]\nlook into [%s]" % (name, fout.name)) make_stamp.write('') if not os.path.exists(make_install_stamp.abspath()): Logs.info('[%s] installing...' % name) cmd = _get_cmd(install_cmd) cwd=build_dir.abspath() fout = open(make_install_log.abspath(), 'w') fout.write('++ cd %s\n' % cwd) fout.write('++ %s\n' % cmd) fout.flush() sc = subprocess.call( Utils.to_list(cmd), env=env, stdout=fout, stderr=fout, cwd=cwd ) if sc != 0: self.fatal("failed to install [%s]\nlook into [%s]" % (name, fout.name)) make_install_stamp.write('') # create signatures for all nodes under ${BUNDLED_<name>_ROOT} outputs = install_dir.ant_glob('**/*') for o in outputs: #print("-- %s" % o.abspath()) o.sig = Utils.h_file(o.abspath()) return
def do_folder(folder, path): #print "DEBUG: entering do_folder(). old path=", path title = removeDisallowedFilenameChars(unicode(folder[0].text)) new_path = path / title # add subfolder to path if not new_path.exists(): if (verbose): print 'creating directory: ', str(new_path) new_path.mkdir() # create directory else: if (verbose): print 'chdir into existing directory:', str(new_path) new_path.resolve() # change dir files = folder[1:] # files is list of files and subfolders in this folder for f in files: # is this file a folder? # if it is the identifier contains '_folder_' id = f.get('identifier') if '_folder_' in id: # item is subfolder! branch into subfolder = f.getchildren() do_folder(subfolder,new_path) if '_folderfile_' in id: # item is file. Extract # identifiers zien er zo uit: 'I_rYTieTdHa_folderfile_42508' # we hebben alleen het getal nodig idval = id.split('_folderfile_')[1] bestandsnaam = removeDisallowedFilenameChars(unicode(resdict[idval].split('/')[1])) if (verbose): print 'extracting file: ',bestandsnaam extract_from_zip_and_write(resdict[idval], new_path, bestandsnaam) if '_weblink_' in id: # item is weblink. Extract idval = id.split('_weblink_')[1] url = resdict[idval] # get url from resource dict title = f[0].text # get title from <items> bestandsnaam = removeDisallowedFilenameChars(unicode(title+'.url')) if (verbose): print 'extracting weblink: ',bestandsnaam # .url file just a txt file with [Internet Shortcut]. Clickable in windows try: doel = open(str(new_path / bestandsnaam), "wb") doel.write('[InternetShortcut]\nURL=') doel.write(url) doel.write('\n') doel.close() except IOError: print "Cannot create:", str(new_path / bestandsnaam) failed_files.append(str(new_path/ bestandsnaam)) if '_note_' in id: # item is note. Extract html contents idval = id.split('_note_')[1] title = f[0].text # get title from <items> bestandsnaam = removeDisallowedFilenameChars(unicode(title+'.html')) if (verbose): print 'extracting note: ',bestandsnaam extract_from_zip_and_write(resdict[idval], new_path, bestandsnaam) if '_picture_' in id: # item is image. Extract idval = id.split('_picture_')[1] bestandsnaam = resdict[idval][1].split('/')[1] folder_in_zip = resdict[idval][0].split('/')[0] if (verbose): print 'extracting image: ',bestandsnaam # The correct imagefile is NOT in the <rescources> dict. # Images are renamed and an .html container is used # get .html and recover imagefilename (sigh!) htmlfile = zipfile.open(resdict[idval][0]) lines = htmlfile.readlines() for line in lines: x = line.find('src=') if (x != -1): imagefilename = line[x:x+20].split('\'')[1] print "reconstructed imagefilename (in zip): ", imagefilename bestandsnaam_in_zip = folder_in_zip + '/' + imagefilename extract_from_zip_and_write(bestandsnaam_in_zip, new_path, bestandsnaam)
else: local_box['updated_at'] = provider_info['updated_at'] local_box['last_check'] = last_check if local_updated_at == remote_updated_at and os.path.isdir(os.path.join(args.destination, args.box_name, args.provider)): actually_print("You already have the latest '{}' for provider '{}' in directory '{}'".format(args.box_name, args.provider, args.destination)) local_box_info.save_box(local_box) sys.exit(0) actually_print("Downloading latest version of '{}' for provider '{}'...".format(args.box_name, args.provider)) urlretrieve(provider_info['download_url'], './tmp.box') actually_print('Extracting box...') if zipfile.is_zipfile('./tmp.box'): archive = zipfile.open('./tmp.box') elif tarfile.is_tarfile('./tmp.box'): archive = tarfile.open('./tmp.box') else: actually_print('ERROR: Unknown archive type') sys.exit(1) archive.extractall(os.path.join(args.destination, args.box_name, args.provider)) archive.close() os.remove('./tmp.box') local_box_info.save_box(local_box) actually_print('Finished')