def output_archive(self, imagefile=None, tarname=None, zipname=None): """Write the changed and/or new files to a tarfile or a ZIP file. """ import zipfile, tarfile, StringIO, datetime tfile = None zfile = None to_archive = self.new_files.copy() to_archive = to_archive.union( set([val[1] for val in self.changed_content])) to_archive = to_archive.union( set([val[1] for val in self.changed_properties])) # Make sure we are just writing out inodes that have file contents to_archive = filter( lambda fi: fi.allocated() and fi.has_tag("inode") and fi. has_contents() and (fi.name_type() == '' or fi.name_type() == 'r'), to_archive) if len(to_archive) == 0: print( "No archive created, as no allocated files created or modified" ) return if tarname: print(">>> Creating tar file: %s" % tarname) tfile = tarfile.TarFile(tarname, mode="w") if zipname: print(">>> Creating zip file: %s" % zipname) zfile = zipfile.ZipFile(zipname, mode="w", allowZip64=True) files_written = set() content_error_log = [] for fi in to_archive: filename = fi.filename() fncount = 1 while filename in files_written: filename = "%s.%d" % (fi.filename(), fnperm) fncount += 1 contents = None try: contents = fi.contents(imagefile) except ValueError as ve: if ve.message.startswith("icat error"): #Some files cannot be recovered, even from images that do not seem corrupted; log the icat command that failed. content_error_log.append(ve.message) else: #This is a more interesting error, so have process die to report immediately. raise if contents: if tfile: info = tarfile.TarInfo(name=filename) info.mtime = fi.mtime() info.atime = fi.atime() info.ctime = fi.ctime() info.uid = fi.uid() info.gid = fi.gid() info.size = fi.filesize() # addfile requires a 'file', so let's make one string = StringIO.StringIO() string.write(contents) string.seek(0) tfile.addfile(tarinfo=info, fileobj=string) if zfile: mtimestamp = fi.mtime().timestamp() info = zipfile.ZipInfo(filename) if mtimestamp: #mtime might be null info.date_time = datetime.datetime.fromtimestamp( mtimestamp).utctimetuple() info.internal_attr = 1 info.external_attr = 2175008768 # specifies mode 0644 zfile.writestr(info, contents) if tfile: tfile.close() if zfile: zfile.close() if len(content_error_log) > 0: sys.stderr.write("Errors retrieving file contents:\n") sys.stderr.write("\n".join(content_error_log)) sys.stderr.write("\n")
def create_tar_info(name, size): info = tarfile.TarInfo(name) info.size = size info.mtime = time.time() return info
def build( self, client, image_spec, memory_limit, build_args, cache_from, extra_build_kwargs, safe_mode, ): tarf = io.BytesIO() tar = tarfile.open(fileobj=tarf, mode="w") dockerfile_tarinfo = tarfile.TarInfo("Dockerfile") dockerfile = self.render().encode("utf-8") dockerfile_tarinfo.size = len(dockerfile) tar.addfile(dockerfile_tarinfo, io.BytesIO(dockerfile)) def _filter_tar(tar): # We need to unset these for build_script_files we copy into tar # Otherwise they seem to vary each time, preventing effective use # of the cache! # https://github.com/docker/docker-py/pull/1582 is related tar.uname = "" tar.gname = "" tar.uid = int(build_args.get("NB_UID", 1000)) tar.gid = int(build_args.get("NB_UID", 1000)) return tar for src in sorted(self.get_build_script_files()): dest_path, src_path = self.generate_build_context_filename(src) tar.add(src_path, dest_path, filter=_filter_tar) tar.add(ENTRYPOINT_FILE, "repo2docker-entrypoint", filter=_filter_tar) tar.add(PACKAGE_JSON, "package.json", filter=_filter_tar) tar.add(".", "src/", filter=_filter_tar) tar.close() tarf.seek(0) # If you work on this bit of code check the corresponding code in # buildpacks/docker.py where it is duplicated if not isinstance(memory_limit, int): raise ValueError("The memory limit has to be specified as an" "integer but is '{}'".format(type(memory_limit))) limits = {} if memory_limit: # We want to always disable swap. Docker expects `memswap` to # be total allowable memory, *including* swap - while `memory` # points to non-swap memory. We set both values to the same so # we use no swap. limits = {"memory": memory_limit, "memswap": memory_limit} build_kwargs = dict( fileobj=tarf, tag=image_spec, custom_context=True, buildargs=build_args, decode=True, forcerm=True, rm=True, container_limits=limits, cache_from=cache_from, ) build_kwargs.update(extra_build_kwargs) for line in client.build(**build_kwargs): yield line
def export(self, file_name): """Exports workflow for use on DTV. """ exported = [u for u in self if hasattr(u, "export")] if len(exported) == 0: raise ValueError("No units support export. Implement export() " "method in at least one.") obj = { "workflow": self.name, "checksum": self.checksum, "units": [{ "class": { "name": unit.__class__.__name__, "uuid": unit.__class__.__id__ }, "data": unit.export() } for unit in exported] } for index, unit in enumerate(exported): obj["units"][index]["links"] = [ exported.index(u) for u in sorted(unit.links_to.keys()) if u in exported ] # TODO(v.markovtsev): check the resulting graph's connectivity # TODO(v.markovtsev): check for single entry and exit points import json arrays = [] def array_file_name(arr, index): return "%04d_%s" % (index, "x".join(arr.shape)) def export_numpy_array(arr): if isinstance(arr, numpy.ndarray): arrays.append(arr) return array_file_name(arr, len(arrays) - 1) raise TypeError("Objects of class other than numpy.ndarray are " "not supported") try: with tarfile.open(file_name, "w:gz") as tar: io = six.BytesIO() json.dump(obj, io, indent=4, sort_keys=True, default=export_numpy_array) ti = tarfile.TarInfo("contents.json") ti.size = io.tell() ti.mode = int("666", 8) io.seek(0) tar.addfile(ti, fileobj=io) for index, arr in enumerate(arrays): io = six.BytesIO() numpy.save(io, arr) ti = tarfile.TarInfo(array_file_name(arr, index) + ".npy") ti.size = io.tell() ti.mode = int("666", 8) io.seek(0) tar.addfile(ti, fileobj=io) except: self.exception("Failed to export to %s", file_name)
def debug_download(self, job): """ Job to stream debug file. This method is meant to be used in conjuntion with `core.download` to get the debug downloaded via HTTP. """ job.set_progress(0, 'Generating debug file') debug_job = self.middleware.call_sync('system.debug') standby_debug = None is_freenas = self.middleware.call_sync('system.is_freenas') if not is_freenas and self.middleware.call_sync('failover.licensed'): try: standby_debug = self.middleware.call_sync( 'failover.call_remote', 'system.debug', [], {'job': True} ) except Exception: self.logger.warn('Failed to get debug from standby node', exc_info=True) else: remote_ip = self.middleware.call_sync('failover.remote_ip') url = self.middleware.call_sync( 'failover.call_remote', 'core.download', ['filesystem.get', [standby_debug], 'debug.txz'], )[1] url = f'http://{remote_ip}:6000{url}' standby_debug = io.BytesIO() with requests.get(url, stream=True) as r: for i in r.iter_content(chunk_size=1048576): if standby_debug.tell() > 20971520: raise CallError(f'Standby debug file is bigger than 20MiB.') standby_debug.write(i) debug_job.wait_sync() if debug_job.error: raise CallError(debug_job.error) job.set_progress(90, 'Preparing debug file for streaming') if standby_debug: # Debug file cannot be big on HA because we put both debugs in memory # so they can be downloaded at once. try: if os.stat(debug_job.result).st_size > 20971520: raise CallError(f'Debug file is bigger than 20MiB.') except FileNotFoundError: raise CallError('Debug file was not found, try again.') network = self.middleware.call_sync('network.configuration.config') node = self.middleware.call_sync('failover.node') tario = io.BytesIO() with tarfile.open(fileobj=tario, mode='w') as tar: if node == 'A': my_hostname = network['hostname'] remote_hostname = network['hostname_b'] else: my_hostname = network['hostname_b'] remote_hostname = network['hostname'] tar.add(debug_job.result, f'{my_hostname}.txz') tarinfo = tarfile.TarInfo(f'{remote_hostname}.txz') tarinfo.size = standby_debug.tell() standby_debug.seek(0) tar.addfile(tarinfo, fileobj=standby_debug) tario.seek(0) shutil.copyfileobj(tario, job.pipes.output.w) else: with open(debug_job.result, 'rb') as f: shutil.copyfileobj(f, job.pipes.output.w) job.pipes.output.w.close()
def write_tar(src_fs, file, compression=None, encoding="utf-8", walker=None): """ Write the contents of a filesystem to a zip file. :param file: Destination file, may be a file name or an open file object. :type file: str or file-like. :param compression: Compression to use. :type compression: str :param encoding: The encoding to use for filenames. The default is ``"utf-8"``. :type encoding: str :param walker: A :class:`~fs.walk.Walker` instance, or None to use default walker. You can use this to specify which files you want to compress. :type walker: Walker or None """ type_map = { ResourceType.block_special_file: tarfile.BLKTYPE, ResourceType.character: tarfile.CHRTYPE, ResourceType.directory: tarfile.DIRTYPE, ResourceType.fifo: tarfile.FIFOTYPE, ResourceType.file: tarfile.REGTYPE, ResourceType.socket: tarfile.AREGTYPE, # no type for socket ResourceType.symlink: tarfile.SYMTYPE, ResourceType.unknown: tarfile.AREGTYPE, # no type for unknown } mode = 'w:{}'.format(compression or '') try: _tar = tarfile.open(fileobj=file, mode=mode) except (TypeError, AttributeError): _tar = tarfile.open(file, mode=mode) walker = walker or Walker() with _tar: gen_walk = walker.info(src_fs, namespaces=["details", "stat", "access"]) for path, info in gen_walk: # Tar names must be relative tar_name = relpath(path) if not six.PY3: # Python2 expects bytes filenames tar_name = tar_name.encode(encoding, 'replace') tar_info = tarfile.TarInfo(tar_name) if info.has_namespace('stat'): mtime = info.get('stat', 'st_mtime', None)\ or time.time() else: mtime = info.modified or time.time() if isinstance(mtime, datetime): mtime = datetime_to_epoch(mtime) if isinstance(mtime, float): mtime = int(mtime) tar_info.mtime = mtime for tarattr, infoattr in { 'uid': 'uid', 'gid': 'gid', 'uname': 'user', 'gname': 'group' }.items(): if getattr(info, infoattr) is not None: setattr(tar_info, tarattr, getattr(info, infoattr)) tar_info.mode = getattr(info.permissions, 'mode', 420) if info.is_dir: tar_info.type = tarfile.DIRTYPE _tar.addfile(tar_info) else: tar_info.type = type_map.get(info.type, tarfile.REGTYPE) tar_info.size = info.size with src_fs.openbin(path) as bin_file: _tar.addfile(tar_info, bin_file)
class UiRequestPlugin(object): def formatTableRow(self, row, class_name=""): back = [] for format, val in row: if val is None: formatted = "n/a" elif format == "since": if val: formatted = "%.0f" % (time.time() - val) else: formatted = "n/a" else: formatted = format % val back.append("<td>%s</td>" % formatted) return "<tr class='%s'>%s</tr>" % (class_name.encode("utf8"), "".join(back).encode("utf8")) def getObjSize(self, obj, hpy=None): if hpy: return float(hpy.iso(obj).domisize) / 1024 else: return 0 # /Stats entry point def actionStats(self): import gc import sys from Ui import UiRequest from Db import Db from Crypt import CryptConnection hpy = None if self.get.get("size") == "1": # Calc obj size try: import guppy hpy = guppy.hpy() except: pass self.sendHeader() if "Multiuser" in PluginManager.plugin_manager.plugin_names and not config.multiuser_local: yield "This function is disabled on this proxy" raise StopIteration s = time.time() main = sys.modules["main"] # Style yield """ <style> * { font-family: monospace } table td, table th { text-align: right; padding: 0px 10px } .connections td { white-space: nowrap } .serving-False { opacity: 0.3 } </style> """ # Memory yield "rev%s | " % config.rev yield "%s | " % main.file_server.ip_external_list yield "Port: %s | " % main.file_server.port yield "IP Network: %s | " % main.file_server.supported_ip_types yield "Opened: %s | " % main.file_server.port_opened yield "Crypt: %s | " % CryptConnection.manager.crypt_supported yield "In: %.2fMB, Out: %.2fMB | " % ( float(main.file_server.bytes_recv) / 1024 / 1024, float(main.file_server.bytes_sent) / 1024 / 1024) yield "Peerid: %s | " % main.file_server.peer_id yield "Time correction: %.2fs" % main.file_server.getTimecorrection() try: import psutil process = psutil.Process(os.getpid()) mem = process.get_memory_info()[0] / float(2**20) yield "Mem: %.2fMB | " % mem yield "Threads: %s | " % len(process.threads()) yield "CPU: usr %.2fs sys %.2fs | " % process.cpu_times() yield "Files: %s | " % len(process.open_files()) yield "Sockets: %s | " % len(process.connections()) yield "Calc size <a href='?size=1'>on</a> <a href='?size=0'>off</a>" except Exception: pass yield "<br>" # Connections yield "<b>Connections</b> (%s, total made: %s, in: %s, out: %s):<br>" % ( len(main.file_server.connections), main.file_server.last_connection_id, main.file_server.num_incoming, main.file_server.num_outgoing) yield "<table class='connections'><tr> <th>id</th> <th>type</th> <th>ip</th> <th>open</th> <th>crypt</th> <th>ping</th>" yield "<th>buff</th> <th>bad</th> <th>idle</th> <th>open</th> <th>delay</th> <th>cpu</th> <th>out</th> <th>in</th> <th>last sent</th>" yield "<th>wait</th> <th>version</th> <th>time</th> <th>sites</th> </tr>" for connection in main.file_server.connections: if "cipher" in dir(connection.sock): cipher = connection.sock.cipher()[0] tls_version = connection.sock.version() else: cipher = connection.crypt tls_version = "" if "time" in connection.handshake and connection.last_ping_delay: time_correction = connection.handshake[ "time"] - connection.handshake_time - connection.last_ping_delay else: time_correction = 0.0 yield self.formatTableRow([ ("%3d", connection.id), ("%s", connection.type), ("%s:%s", (connection.ip, connection.port)), ("%s", connection.handshake.get("port_opened")), ("<span title='%s %s'>%s</span>", (cipher, tls_version, connection.crypt)), ("%6.3f", connection.last_ping_delay), ("%s", connection.incomplete_buff_recv), ("%s", connection.bad_actions), ("since", max(connection.last_send_time, connection.last_recv_time)), ("since", connection.start_time), ("%.3f", max(-1, connection.last_sent_time - connection.last_send_time)), ("%.3f", connection.cpu_time), ("%.0fkB", connection.bytes_sent / 1024), ("%.0fkB", connection.bytes_recv / 1024), ("<span title='Recv: %s'>%s</span>", (connection.last_cmd_recv, connection.last_cmd_sent)), ("%s", connection.waiting_requests.keys()), ("%s r%s", (connection.handshake.get("version"), connection.handshake.get("rev", "?"))), ("%.2fs", time_correction), ("%s", connection.sites) ]) yield "</table>" # Trackers yield "<br><br><b>Trackers:</b><br>" yield "<table class='trackers'><tr> <th>address</th> <th>request</th> <th>successive errors</th> <th>last_request</th></tr>" for tracker_address, tracker_stat in sorted( sys.modules["Site.SiteAnnouncer"].global_stats.iteritems()): yield self.formatTableRow([ ("%s", tracker_address), ("%s", tracker_stat["num_request"]), ("%s", tracker_stat["num_error"]), ("%.0f min ago", min(999, (time.time() - tracker_stat["time_request"]) / 60)) ]) yield "</table>" if "AnnounceShare" in PluginManager.plugin_manager.plugin_names: yield "<br><br><b>Shared trackers:</b><br>" yield "<table class='trackers'><tr> <th>address</th> <th>added</th> <th>found</th> <th>latency</th> <th>successive errors</th> <th>last_success</th></tr>" from AnnounceShare import AnnounceSharePlugin for tracker_address, tracker_stat in sorted( AnnounceSharePlugin.tracker_storage.getTrackers( ).iteritems()): yield self.formatTableRow([ ("%s", tracker_address), ("%.0f min ago", min(999, (time.time() - tracker_stat["time_added"]) / 60)), ("%.0f min ago", min(999, (time.time() - tracker_stat.get("time_found", 0)) / 60)), ("%.3fs", tracker_stat["latency"]), ("%s", tracker_stat["num_error"]), ("%.0f min ago", min(999, (time.time() - tracker_stat["time_success"]) / 60)), ]) yield "</table>" # Tor hidden services yield "<br><br><b>Tor hidden services (status: %s):</b><br>" % main.file_server.tor_manager.status.encode( "utf8") for site_address, onion in main.file_server.tor_manager.site_onions.items( ): yield "- %-34s: %s<br>" % (site_address, onion.encode("utf8")) # Db yield "<br><br><b>Db</b>:<br>" for db in sys.modules["Db.Db"].opened_dbs: tables = [ row["name"] for row in db.execute( "SELECT name FROM sqlite_master WHERE type = 'table'"). fetchall() ] table_rows = {} for table in tables: table_rows[table] = db.execute("SELECT COUNT(*) AS c FROM %s" % table).fetchone()["c"] db_size = os.path.getsize(db.db_path) / 1024.0 / 1024.0 yield "- %.3fs: %s %.3fMB, table rows: %s<br>" % ( time.time() - db.last_query_time, db.db_path.encode("utf8"), db_size, json.dumps(table_rows, sort_keys=True)) # Sites yield "<br><br><b>Sites</b>:" yield "<table>" yield "<tr><th>address</th> <th>connected</th> <th title='connected/good/total'>peers</th> <th>content.json</th> <th>out</th> <th>in</th> </tr>" for site in sorted(self.server.sites.values(), lambda a, b: cmp(a.address, b.address)): yield self.formatTableRow([ ("""<a href='#' onclick='document.getElementById("peers_%s").style.display="initial"; return false'>%s</a>""", (site.address, site.address)), ("%s", [ peer.connection.id for peer in site.peers.values() if peer.connection and peer.connection.connected ]), ("%s/%s/%s", (len([ peer for peer in site.peers.values() if peer.connection and peer.connection.connected ]), len(site.getConnectablePeers(100)), len(site.peers))), ("%s (loaded: %s)", (len(site.content_manager.contents), len([ key for key, val in dict( site.content_manager.contents).iteritems() if val ]))), ("%.0fkB", site.settings.get("bytes_sent", 0) / 1024), ("%.0fkB", site.settings.get("bytes_recv", 0) / 1024), ], "serving-%s" % site.settings["serving"]) yield "<tr><td id='peers_%s' style='display: none; white-space: pre' colspan=6>" % site.address for key, peer in site.peers.items(): if peer.time_found: time_found = int(time.time() - peer.time_found) / 60 else: time_found = "--" if peer.connection: connection_id = peer.connection.id else: connection_id = None if site.content_manager.has_optional_files: yield "Optional files: %4s " % len(peer.hashfield) time_added = (time.time() - peer.time_added) / (60 * 60 * 24) yield "(#%4s, rep: %2s, err: %s, found: %3s min, add: %.1f day) %30s -<br>" % ( connection_id, peer.reputation, peer.connection_error, time_found, time_added, key) yield "<br></td></tr>" yield "</table>" # Big files yield "<br><br><b>Big files</b>:<br>" for site in self.server.sites.values(): if not site.settings.get("has_bigfile"): continue bigfiles = {} yield """<a href="#" onclick='document.getElementById("bigfiles_%s").style.display="initial"; return false'>%s</a><br>""" % ( site.address, site.address) for peer in site.peers.values(): if not peer.time_piecefields_updated: continue for sha512, piecefield in peer.piecefields.iteritems(): if sha512 not in bigfiles: bigfiles[sha512] = [] bigfiles[sha512].append(peer) yield "<div id='bigfiles_%s' style='display: none'>" % site.address for sha512, peers in bigfiles.iteritems(): yield "<br> - " + sha512 + " (hash id: %s)<br>" % site.content_manager.hashfield.getHashId( sha512) yield "<table>" for peer in peers: yield "<tr><td>" + peer.key + "</td><td>" + peer.piecefields[ sha512].tostring() + "</td></tr>" yield "</table>" yield "</div>" # Cmd stats yield "<div style='float: left'>" yield "<br><br><b>Sent commands</b>:<br>" yield "<table>" for stat_key, stat in sorted( main.file_server.stat_sent.items(), lambda a, b: cmp(a[1]["bytes"], b[1]["bytes"]), reverse=True): yield "<tr><td>%s</td><td style='white-space: nowrap'>x %s =</td><td>%.0fkB</td></tr>" % ( stat_key, stat["num"], stat["bytes"] / 1024) yield "</table>" yield "</div>" yield "<div style='float: left; margin-left: 20%; max-width: 50%'>" yield "<br><br><b>Received commands</b>:<br>" yield "<table>" for stat_key, stat in sorted( main.file_server.stat_recv.items(), lambda a, b: cmp(a[1]["bytes"], b[1]["bytes"]), reverse=True): yield "<tr><td>%s</td><td style='white-space: nowrap'>x %s =</td><td>%.0fkB</td></tr>" % ( stat_key, stat["num"], stat["bytes"] / 1024) yield "</table>" yield "</div>" yield "<div style='clear: both'></div>" # No more if not in debug mode if not config.debug: raise StopIteration # Object types obj_count = {} for obj in gc.get_objects(): obj_type = str(type(obj)) if obj_type not in obj_count: obj_count[obj_type] = [0, 0] obj_count[obj_type][0] += 1 # Count obj_count[obj_type][1] += float(sys.getsizeof(obj)) / 1024 # Size yield "<br><br><b>Objects in memory (types: %s, total: %s, %.2fkb):</b><br>" % ( len(obj_count), sum([stat[0] for stat in obj_count.values()]), sum([stat[1] for stat in obj_count.values()])) for obj, stat in sorted(obj_count.items(), key=lambda x: x[1][0], reverse=True): # Sorted by count yield " - %.1fkb = %s x <a href=\"/Listobj?type=%s\">%s</a><br>" % ( stat[1], stat[0], obj, cgi.escape(obj)) # Classes class_count = {} for obj in gc.get_objects(): obj_type = str(type(obj)) if obj_type != "<type 'instance'>": continue class_name = obj.__class__.__name__ if class_name not in class_count: class_count[class_name] = [0, 0] class_count[class_name][0] += 1 # Count class_count[class_name][1] += float( sys.getsizeof(obj)) / 1024 # Size yield "<br><br><b>Classes in memory (types: %s, total: %s, %.2fkb):</b><br>" % ( len(class_count), sum([stat[0] for stat in class_count.values()]), sum([stat[1] for stat in class_count.values()])) for obj, stat in sorted(class_count.items(), key=lambda x: x[1][0], reverse=True): # Sorted by count yield " - %.1fkb = %s x <a href=\"/Dumpobj?class=%s\">%s</a><br>" % ( stat[1], stat[0], obj, cgi.escape(obj)) from greenlet import greenlet objs = [obj for obj in gc.get_objects() if isinstance(obj, greenlet)] yield "<br>Greenlets (%s):<br>" % len(objs) for obj in objs: yield " - %.1fkb: %s<br>" % (self.getObjSize( obj, hpy), cgi.escape(repr(obj).encode("utf8"))) from Worker import Worker objs = [obj for obj in gc.get_objects() if isinstance(obj, Worker)] yield "<br>Workers (%s):<br>" % len(objs) for obj in objs: yield " - %.1fkb: %s<br>" % (self.getObjSize( obj, hpy), cgi.escape(repr(obj))) from Connection import Connection objs = [obj for obj in gc.get_objects() if isinstance(obj, Connection)] yield "<br>Connections (%s):<br>" % len(objs) for obj in objs: yield " - %.1fkb: %s<br>" % (self.getObjSize( obj, hpy), cgi.escape(repr(obj))) from socket import socket objs = [obj for obj in gc.get_objects() if isinstance(obj, socket)] yield "<br>Sockets (%s):<br>" % len(objs) for obj in objs: yield " - %.1fkb: %s<br>" % (self.getObjSize( obj, hpy), cgi.escape(repr(obj))) from msgpack import Unpacker objs = [obj for obj in gc.get_objects() if isinstance(obj, Unpacker)] yield "<br>Msgpack unpacker (%s):<br>" % len(objs) for obj in objs: yield " - %.1fkb: %s<br>" % (self.getObjSize( obj, hpy), cgi.escape(repr(obj))) from Site import Site objs = [obj for obj in gc.get_objects() if isinstance(obj, Site)] yield "<br>Sites (%s):<br>" % len(objs) for obj in objs: yield " - %.1fkb: %s<br>" % (self.getObjSize( obj, hpy), cgi.escape(repr(obj))) objs = [ obj for obj in gc.get_objects() if isinstance(obj, self.server.log.__class__) ] yield "<br>Loggers (%s):<br>" % len(objs) for obj in objs: yield " - %.1fkb: %s<br>" % (self.getObjSize( obj, hpy), cgi.escape(repr(obj.name))) objs = [obj for obj in gc.get_objects() if isinstance(obj, UiRequest)] yield "<br>UiRequests (%s):<br>" % len(objs) for obj in objs: yield " - %.1fkb: %s<br>" % (self.getObjSize( obj, hpy), cgi.escape(repr(obj))) from Peer import Peer objs = [obj for obj in gc.get_objects() if isinstance(obj, Peer)] yield "<br>Peers (%s):<br>" % len(objs) for obj in objs: yield " - %.1fkb: %s<br>" % (self.getObjSize( obj, hpy), cgi.escape(repr(obj))) objs = [(key, val) for key, val in sys.modules.iteritems() if val is not None] objs.sort() yield "<br>Modules (%s):<br>" % len(objs) for module_name, module in objs: yield " - %.3fkb: %s %s<br>" % (self.getObjSize( module, hpy), module_name, cgi.escape(repr(module))) gc.collect() # Implicit grabage collection yield "Done in %.1f" % (time.time() - s) def actionDumpobj(self): import gc import sys self.sendHeader() if "Multiuser" in PluginManager.plugin_manager.plugin_names and not config.multiuser_local: yield "This function is disabled on this proxy" raise StopIteration # No more if not in debug mode if not config.debug: yield "Not in debug mode" raise StopIteration class_filter = self.get.get("class") yield """ <style> * { font-family: monospace; white-space: pre } table * { text-align: right; padding: 0px 10px } </style> """ objs = gc.get_objects() for obj in objs: obj_type = str(type(obj)) if obj_type != "<type 'instance'>" or obj.__class__.__name__ != class_filter: continue yield "%.1fkb %s... " % (float(sys.getsizeof(obj)) / 1024, cgi.escape(str(obj))) for attr in dir(obj): yield "- %s: %s<br>" % (attr, cgi.escape(str(getattr(obj, attr)))) yield "<br>" gc.collect() # Implicit grabage collection def actionListobj(self): import gc import sys self.sendHeader() if "Multiuser" in PluginManager.plugin_manager.plugin_names and not config.multiuser_local: yield "This function is disabled on this proxy" raise StopIteration # No more if not in debug mode if not config.debug: yield "Not in debug mode" raise StopIteration type_filter = self.get.get("type") yield """ <style> * { font-family: monospace; white-space: pre } table * { text-align: right; padding: 0px 10px } </style> """ yield "Listing all %s objects in memory...<br>" % cgi.escape( type_filter) ref_count = {} objs = gc.get_objects() for obj in objs: obj_type = str(type(obj)) if obj_type != type_filter: continue refs = [ ref for ref in gc.get_referrers(obj) if hasattr(ref, "__class__") and ref.__class__.__name__ not in [ "list", "dict", "function", "type", "frame", "WeakSet", "tuple" ] ] if not refs: continue try: yield "%.1fkb <span title=\"%s\">%s</span>... " % ( float(sys.getsizeof(obj)) / 1024, cgi.escape( str(obj)), cgi.escape(str(obj)[0:100].ljust(100))) except: continue for ref in refs: yield " [" if "object at" in str(ref) or len(str(ref)) > 100: yield str(ref.__class__.__name__) else: yield str(ref.__class__.__name__) + ":" + cgi.escape( str(ref)) yield "] " ref_type = ref.__class__.__name__ if ref_type not in ref_count: ref_count[ref_type] = [0, 0] ref_count[ref_type][0] += 1 # Count ref_count[ref_type][1] += float( sys.getsizeof(obj)) / 1024 # Size yield "<br>" yield "<br>Object referrer (total: %s, %.2fkb):<br>" % ( len(ref_count), sum([stat[1] for stat in ref_count.values()])) for obj, stat in sorted(ref_count.items(), key=lambda x: x[1][0], reverse=True)[0:30]: # Sorted by count yield " - %.1fkb = %s x %s<br>" % (stat[1], stat[0], cgi.escape(str(obj))) gc.collect() # Implicit grabage collection def actionBenchmark(self): import sys import gc from contextlib import contextmanager output = self.sendHeader() if "Multiuser" in PluginManager.plugin_manager.plugin_names and not config.multiuser_local: yield "This function is disabled on this proxy" raise StopIteration @contextmanager def benchmark(name, standard): s = time.time() output("- %s" % name) try: yield 1 except Exception, err: output("<br><b>! Error: %s</b><br>" % err) taken = time.time() - s if taken > 0: multipler = standard / taken else: multipler = 99 if multipler < 0.3: speed = "Sloooow" elif multipler < 0.5: speed = "Ehh" elif multipler < 0.8: speed = "Goodish" elif multipler < 1.2: speed = "OK" elif multipler < 1.7: speed = "Fine" elif multipler < 2.5: speed = "Fast" elif multipler < 3.5: speed = "WOW" else: speed = "Insane!!" output("%.3fs [x%.2f: %s]<br>" % (taken, multipler, speed)) time.sleep(0.01) yield """ <style> * { font-family: monospace } table * { text-align: right; padding: 0px 10px } </style> """ yield "Benchmarking Ainkuraddo %s (rev%s) Python %s on: %s...<br>" % ( config.version, config.rev, sys.version, sys.platform) t = time.time() # CryptBitcoin yield "<br>CryptBitcoin:<br>" from Crypt import CryptBitcoin # seed = CryptBitcoin.newSeed() # yield "- Seed: %s<br>" % seed seed = "e180efa477c63b0f2757eac7b1cce781877177fe0966be62754ffd4c8592ce38" with benchmark("hdPrivatekey x 10", 0.7): for i in range(10): privatekey = CryptBitcoin.hdPrivatekey(seed, i * 10) yield "." valid = "5JsunC55XGVqFQj5kPGK4MWgTL26jKbnPhjnmchSNPo75XXCwtk" assert privatekey == valid, "%s != %s" % (privatekey, valid) data = "Hello" * 1024 # 5k with benchmark("sign x 10", 0.35): for i in range(10): yield "." sign = CryptBitcoin.sign(data, privatekey) valid = "G1GXaDauZ8vX/N9Jn+MRiGm9h+I94zUhDnNYFaqMGuOiBHB+kp4cRPZOL7l1yqK5BHa6J+W97bMjvTXtxzljp6w=" assert sign == valid, "%s != %s" % (sign, valid) address = CryptBitcoin.privatekeyToAddress(privatekey) if CryptBitcoin.opensslVerify: # Openssl avalible with benchmark("openssl verify x 100", 0.37): for i in range(100): if i % 10 == 0: yield "." ok = CryptBitcoin.verify(data, address, sign) assert ok, "does not verify from %s" % address else: yield " - openssl verify x 100...not avalible :(<br>" openssl_verify_bk = CryptBitcoin.opensslVerify # Emulate openssl not found in any way CryptBitcoin.opensslVerify = None with benchmark("pure-python verify x 10", 1.6): for i in range(10): yield "." ok = CryptBitcoin.verify(data, address, sign) assert ok, "does not verify from %s" % address CryptBitcoin.opensslVerify = openssl_verify_bk # CryptHash yield "<br>CryptHash:<br>" from Crypt import CryptHash from cStringIO import StringIO data = StringIO("Hello" * 1024 * 1024) # 5m with benchmark("sha256 5M x 10", 0.6): for i in range(10): data.seek(0) hash = CryptHash.sha256sum(data) yield "." valid = "8cd629d9d6aff6590da8b80782a5046d2673d5917b99d5603c3dcb4005c45ffa" assert hash == valid, "%s != %s" % (hash, valid) data = StringIO("Hello" * 1024 * 1024) # 5m with benchmark("sha512 5M x 10", 0.6): for i in range(10): data.seek(0) hash = CryptHash.sha512sum(data) yield "." valid = "9ca7e855d430964d5b55b114e95c6bbb114a6d478f6485df93044d87b108904d" assert hash == valid, "%s != %s" % (hash, valid) with benchmark("os.urandom(256) x 1000", 0.0065): for i in range(10): for y in range(100): data = os.urandom(256) yield "." # Msgpack import msgpack yield "<br>Msgpack: (version: %s)<br>" % ".".join( map(str, msgpack.version)) binary = 'fqv\xf0\x1a"e\x10,\xbe\x9cT\x9e(\xa5]u\x072C\x8c\x15\xa2\xa8\x93Sw)\x19\x02\xdd\t\xfb\xf67\x88\xd9\xee\x86\xa1\xe4\xb6,\xc6\x14\xbb\xd7$z\x1d\xb2\xda\x85\xf5\xa0\x97^\x01*\xaf\xd3\xb0!\xb7\x9d\xea\x89\xbbh8\xa1"\xa7]e(@\xa2\xa5g\xb7[\xae\x8eE\xc2\x9fL\xb6s\x19\x19\r\xc8\x04S\xd0N\xe4]?/\x01\xea\xf6\xec\xd1\xb3\xc2\x91\x86\xd7\xf4K\xdf\xc2lV\xf4\xe8\x80\xfc\x8ep\xbb\x82\xb3\x86\x98F\x1c\xecS\xc8\x15\xcf\xdc\xf1\xed\xfc\xd8\x18r\xf9\x80\x0f\xfa\x8cO\x97(\x0b]\xf1\xdd\r\xe7\xbf\xed\x06\xbd\x1b?\xc5\xa0\xd7a\x82\xf3\xa8\xe6@\xf3\ri\xa1\xb10\xf6\xd4W\xbc\x86\x1a\xbb\xfd\x94!bS\xdb\xaeM\x92\x00#\x0b\xf7\xad\xe9\xc2\x8e\x86\xbfi![%\xd31]\xc6\xfc2\xc9\xda\xc6v\x82P\xcc\xa9\xea\xb9\xff\xf6\xc8\x17iD\xcf\xf3\xeeI\x04\xe9\xa1\x19\xbb\x01\x92\xf5nn4K\xf8\xbb\xc6\x17e>\xa7 \xbbv' data = { "int": 1024 * 1024 * 1024, "float": 12345.67890, "text": "hello" * 1024, "binary": binary } with benchmark("pack 5K x 10 000", 0.78): for i in range(10): for y in range(1000): data_packed = msgpack.packb(data) yield "." valid = """\x84\xa3int\xce@\x00\x00\x00\xa4text\xda\x14\x00hellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohello\xa5float\xcb@\xc8\x1c\xd6\xe61\xf8\xa1\xa6binary\xda\x01\x00fqv\xf0\x1a"e\x10,\xbe\x9cT\x9e(\xa5]u\x072C\x8c\x15\xa2\xa8\x93Sw)\x19\x02\xdd\t\xfb\xf67\x88\xd9\xee\x86\xa1\xe4\xb6,\xc6\x14\xbb\xd7$z\x1d\xb2\xda\x85\xf5\xa0\x97^\x01*\xaf\xd3\xb0!\xb7\x9d\xea\x89\xbbh8\xa1"\xa7]e(@\xa2\xa5g\xb7[\xae\x8eE\xc2\x9fL\xb6s\x19\x19\r\xc8\x04S\xd0N\xe4]?/\x01\xea\xf6\xec\xd1\xb3\xc2\x91\x86\xd7\xf4K\xdf\xc2lV\xf4\xe8\x80\xfc\x8ep\xbb\x82\xb3\x86\x98F\x1c\xecS\xc8\x15\xcf\xdc\xf1\xed\xfc\xd8\x18r\xf9\x80\x0f\xfa\x8cO\x97(\x0b]\xf1\xdd\r\xe7\xbf\xed\x06\xbd\x1b?\xc5\xa0\xd7a\x82\xf3\xa8\xe6@\xf3\ri\xa1\xb10\xf6\xd4W\xbc\x86\x1a\xbb\xfd\x94!bS\xdb\xaeM\x92\x00#\x0b\xf7\xad\xe9\xc2\x8e\x86\xbfi![%\xd31]\xc6\xfc2\xc9\xda\xc6v\x82P\xcc\xa9\xea\xb9\xff\xf6\xc8\x17iD\xcf\xf3\xeeI\x04\xe9\xa1\x19\xbb\x01\x92\xf5nn4K\xf8\xbb\xc6\x17e>\xa7 \xbbv""" assert data_packed == valid, "%s<br>!=<br>%s" % (repr(data_packed), repr(valid)) with benchmark("unpack 5K x 10 000", 1.2): for i in range(10): for y in range(1000): data_unpacked = msgpack.unpackb(data_packed) yield "." assert data == data_unpacked, "%s != %s" % (data_unpacked, data) with benchmark("streaming unpack 5K x 10 000", 1.4): for i in range(10): unpacker = msgpack.Unpacker() for y in range(1000): unpacker.feed(data_packed) for data_unpacked in unpacker: pass yield "." assert data == data_unpacked, "%s != %s" % (data_unpacked, data) # Db from Db import Db import sqlite3 yield "<br>Db: (version: %s, API: %s)<br>" % (sqlite3.sqlite_version, sqlite3.version) schema = { "db_name": "TestDb", "db_file": "%s/benchmark.db" % config.data_dir, "maps": { ".*": { "to_table": { "test": "test" } } }, "tables": { "test": { "cols": [["test_id", "INTEGER"], ["title", "TEXT"], ["json_id", "INTEGER REFERENCES json (json_id)"]], "indexes": ["CREATE UNIQUE INDEX test_key ON test(test_id, json_id)"], "schema_changed": 1426195822 } } } if os.path.isfile("%s/benchmark.db" % config.data_dir): os.unlink("%s/benchmark.db" % config.data_dir) with benchmark("Open x 10", 0.13): for i in range(10): db = Db(schema, "%s/benchmark.db" % config.data_dir) db.checkTables() db.close() yield "." db = Db(schema, "%s/benchmark.db" % config.data_dir) db.checkTables() import json with benchmark("Insert x 10 x 1000", 1.0): for u in range(10): # 10 user data = {"test": []} for i in range(1000): # 1000 line of data data["test"].append({ "test_id": i, "title": "Testdata for %s message %s" % (u, i) }) json.dump(data, open("%s/test_%s.json" % (config.data_dir, u), "w")) db.updateJson("%s/test_%s.json" % (config.data_dir, u)) os.unlink("%s/test_%s.json" % (config.data_dir, u)) yield "." with benchmark("Buffered insert x 100 x 100", 1.3): cur = db.getCursor() cur.execute("BEGIN") cur.logging = False for u in range(100, 200): # 100 user data = {"test": []} for i in range(100): # 1000 line of data data["test"].append({ "test_id": i, "title": "Testdata for %s message %s" % (u, i) }) json.dump(data, open("%s/test_%s.json" % (config.data_dir, u), "w")) db.updateJson("%s/test_%s.json" % (config.data_dir, u), cur=cur) os.unlink("%s/test_%s.json" % (config.data_dir, u)) if u % 10 == 0: yield "." cur.execute("COMMIT") yield " - Total rows in db: %s<br>" % db.execute( "SELECT COUNT(*) AS num FROM test").fetchone()[0] with benchmark("Indexed query x 1000", 0.25): found = 0 cur = db.getCursor() cur.logging = False for i in range(1000): # 1000x by test_id res = cur.execute("SELECT * FROM test WHERE test_id = %s" % i) for row in res: found += 1 if i % 100 == 0: yield "." assert found == 20000, "Found: %s != 20000" % found with benchmark("Not indexed query x 100", 0.6): found = 0 cur = db.getCursor() cur.logging = False for i in range(100): # 1000x by test_id res = cur.execute("SELECT * FROM test WHERE json_id = %s" % i) for row in res: found += 1 if i % 10 == 0: yield "." assert found == 18900, "Found: %s != 18900" % found with benchmark("Like query x 100", 1.8): found = 0 cur = db.getCursor() cur.logging = False for i in range(100): # 1000x by test_id res = cur.execute( "SELECT * FROM test WHERE title LIKE '%%message %s%%'" % i) for row in res: found += 1 if i % 10 == 0: yield "." assert found == 38900, "Found: %s != 11000" % found db.close() if os.path.isfile("%s/benchmark.db" % config.data_dir): os.unlink("%s/benchmark.db" % config.data_dir) gc.collect() # Implicit grabage collection # Zip yield "<br>Compression:<br>" import zipfile test_data = "Test" * 1024 file_name = "\xc3\x81rv\xc3\xadzt\xc5\xb0r\xc5\x91t\xc3\xbck\xc3\xb6r\xc3\xb3g\xc3\xa9p\xe4\xb8\xad\xe5\x8d\x8e%s.txt" with benchmark("Zip pack x 10", 0.12): for i in range(10): with zipfile.ZipFile('%s/test.zip' % config.data_dir, 'w') as archive: for y in range(100): zip_info = zipfile.ZipInfo(file_name % y, (1980, 1, 1, 0, 0, 0)) zip_info.compress_type = zipfile.ZIP_DEFLATED zip_info.create_system = 3 archive.writestr(zip_info, test_data) yield "." hash = CryptHash.sha512sum( open("%s/test.zip" % config.data_dir, "rb")) valid = "f6ef623e6653883a1758db14aa593350e26c9dc53a8406d6e6defd6029dbd483" assert hash == valid, "Invalid hash: %s != %s<br>" % (hash, valid) with benchmark("Zip unpack x 10", 0.2): for i in range(10): with zipfile.ZipFile('%s/test.zip' % config.data_dir) as archive: for y in range(100): assert archive.read(file_name % y) == test_data yield "." if os.path.isfile("%s/test.zip" % config.data_dir): os.unlink("%s/test.zip" % config.data_dir) # Tar.gz import tarfile import struct # Monkey patch _init_write_gz to use fixed date in order to keep the hash independent from datetime def nodate_write_gzip_header(self): self.mtime = 0 original_write_gzip_header(self) import gzip original_write_gzip_header = gzip.GzipFile._write_gzip_header gzip.GzipFile._write_gzip_header = nodate_write_gzip_header test_data_io = StringIO("Test" * 1024) with benchmark("Tar.gz pack x 10", 0.3): for i in range(10): with tarfile.open('%s/test.tar.gz' % config.data_dir, 'w:gz') as archive: for y in range(100): test_data_io.seek(0) tar_info = tarfile.TarInfo(file_name % y) tar_info.size = 4 * 1024 archive.addfile(tar_info, test_data_io) yield "." hash = CryptHash.sha512sum( open("%s/test.tar.gz" % config.data_dir, "rb")) valid = "4704ebd8c987ed6f833059f1de9c475d443b0539b8d4c4cb8b49b26f7bbf2d19" assert hash == valid, "Invalid hash: %s != %s<br>" % (hash, valid) with benchmark("Tar.gz unpack x 10", 0.2): for i in range(10): with tarfile.open('%s/test.tar.gz' % config.data_dir, 'r:gz') as archive: for y in range(100): assert archive.extractfile(file_name % y).read() == test_data yield "." if os.path.isfile("%s/test.tar.gz" % config.data_dir): os.unlink("%s/test.tar.gz" % config.data_dir) # Tar.bz2 import tarfile test_data_io = StringIO("Test" * 1024) with benchmark("Tar.bz2 pack x 10", 2.0): for i in range(10): with tarfile.open('%s/test.tar.bz2' % config.data_dir, 'w:bz2') as archive: for y in range(100): test_data_io.seek(0) tar_info = tarfile.TarInfo(file_name % y) tar_info.size = 4 * 1024 archive.addfile(tar_info, test_data_io) yield "." hash = CryptHash.sha512sum( open("%s/test.tar.bz2" % config.data_dir, "rb")) valid = "90cba0b4d9abaa37b830bf37e4adba93bfd183e095b489ebee62aaa94339f3b5" assert hash == valid, "Invalid hash: %s != %s<br>" % (hash, valid) with benchmark("Tar.bz2 unpack x 10", 0.5): for i in range(10): with tarfile.open('%s/test.tar.bz2' % config.data_dir, 'r:bz2') as archive: for y in range(100): assert archive.extractfile(file_name % y).read() == test_data yield "." if os.path.isfile("%s/test.tar.bz2" % config.data_dir): os.unlink("%s/test.tar.bz2" % config.data_dir) yield "<br>Done. Total: %.2fs" % (time.time() - t)
def add_file(self, file_object, filename): self._archive.addfile(tarfile.TarInfo(), fileobj=file_object)
def create_tar(files, output_folder=None): '''create_memory_tar will take a list of files (each a dictionary with name, permission, and content) and write the tarfile (a sha256 sum name is used) to the output_folder. If there is no output folde specified, the tar is written to a temporary folder. ''' if output_folder is None: output_folder = tempfile.mkdtemp() finished_tar = None additions = [] contents = [] for entity in files: info = tarfile.TarInfo(name=entity['name']) info.mode = entity['mode'] info.mtime = int(datetime.datetime.now().strftime('%s')) info.uid = entity["uid"] info.gid = entity["gid"] info.uname = entity["uname"] info.gname = entity["gname"] # Get size from stringIO write filey = io.StringIO() content = None try: # python3 info.size = filey.write(entity['content']) content = io.BytesIO(entity['content'].encode('utf8')) except Exception: # python2 info.size = int(filey.write(entity['content'].decode('utf-8'))) content = io.BytesIO(entity['content'].encode('utf8')) pass if content is not None: addition = {'content': content, 'info': info} additions.append(addition) contents.append(content) # Now generate the sha256 name based on content if len(additions) > 0: hashy = get_content_hash(contents) finished_tar = "%s/sha256:%s.tar.gz" % (output_folder, hashy) # Warn the user if it already exists if os.path.exists(finished_tar): msg = "metadata file %s already exists " % finished_tar msg += "will over-write." bot.debug(msg) # Add all content objects to file tar = tarfile.open(finished_tar, "w:gz") for a in additions: tar.addfile(a["info"], a["content"]) tar.close() else: msg = "No contents, environment or labels" msg += " for tarfile, will not generate." bot.debug(msg) return finished_tar
def GetAndUploadStateDeltaDiff(blockNum, lastBlockNum): global start # check if there is diff and buffer the diff_output bashCommand = "aws s3 sync --dryrun --delete temp/persistence/stateDelta " + getBucketString( PERSISTENCE_SNAPSHOT_NAME) + "/persistence/stateDelta" process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE) diff_output, error = process.communicate() str_diff_output = diff_output.decode("utf-8") if re.match(r'^\s*$', str_diff_output): logging.warning("No state delta diff, interesting...") tf = tarfile.open("stateDelta_" + str(blockNum) + ".tar.gz", mode="w:gz") t = tarfile.TarInfo("stateDelta_" + str(blockNum)) t.type = tarfile.DIRTYPE tf.addfile(t) tf.close() bashCommand = "aws s3 cp stateDelta_" + str( blockNum) + ".tar.gz " + getBucketString( STATEDELTA_DIFF_NAME) + "/stateDelta_" + str( blockNum) + ".tar.gz" process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE) output, error = process.communicate() logging.info("DUMMY upload: State-delta Diff for new txBlk :" + str(blockNum) + ") in Remote S3 bucket: " + getBucketString(STATEDELTA_DIFF_NAME) + " is Synced") os.remove("stateDelta_" + str(blockNum) + ".tar.gz") start = (int)( time.time()) # reset inactive start time - delta was uploaded return 1 if (blockNum % NUM_FINAL_BLOCK_PER_POW == 0 or (lastBlockNum == 0)): # we dont need to upload diff here. Instead complete stateDelta tf = tarfile.open("stateDelta_" + str(blockNum) + ".tar.gz", mode="w:gz") tf.add("temp/persistence/stateDelta", arcname=os.path.basename("persistence/stateDelta_" + str(blockNum))) tf.close() bashCommand = "aws s3 cp stateDelta_" + str( blockNum) + ".tar.gz " + getBucketString( STATEDELTA_DIFF_NAME) + "/stateDelta_" + str( blockNum) + ".tar.gz" process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE) output, error = process.communicate() logging.info("New state-delta snapshot for new ds epoch (TXBLK:" + str(blockNum) + ") in Remote S3 bucket: " + getBucketString(STATEDELTA_DIFF_NAME) + " is Synced") os.remove("stateDelta_" + str(blockNum) + ".tar.gz") start = (int)( time.time()) # reset inactive start time - delta was uploaded return 0 str_diff_output = str_diff_output.strip() splitted = str_diff_output.split('\n') result = [] if (len(splitted) > 0): for x in splitted: tok = x.split(' ') # skip deleted files if (len(tok) >= 3 and tok[1] == "upload:"): result.append(tok[2]) tf = tarfile.open("stateDelta_" + str(blockNum) + ".tar.gz", mode="w:gz") for x in result: tf.add(x, arcname="stateDelta_" + str(blockNum) + "/" + path_leaf(x)) tf.close() bashCommand = "aws s3 cp stateDelta_" + str( blockNum) + ".tar.gz " + getBucketString( STATEDELTA_DIFF_NAME) + "/stateDelta_" + str( blockNum) + ".tar.gz" process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE) output, error = process.communicate() logging.info("State-delta Diff for new txBlk :" + str(blockNum) + ") in Remote S3 bucket: " + getBucketString(STATEDELTA_DIFF_NAME) + " is Synced") os.remove("stateDelta_" + str(blockNum) + ".tar.gz") start = (int)( time.time()) # reset inactive start time - delta was uploaded return 0 #success return 1
def _add_file(f, fname, buf): info = tarfile.TarInfo(fname) info.size = len(buf) f.addfile(info, BytesIO(buf))
def SyncLocalToS3Persistence(blockNum, lastBlockNum): # Try uploading stateDelta diff to S3 result = GetAndUploadStateDeltaDiff(blockNum, lastBlockNum) # Try syncing S3 with latest persistence only if NUM_DSBLOCK blocks have crossed. if ((blockNum + 1) % (NUM_DSBLOCK * NUM_FINAL_BLOCK_PER_POW) == 0 or lastBlockNum == 0): bashCommand = "aws s3 sync --delete temp/persistence " + getBucketString( PERSISTENCE_SNAPSHOT_NAME ) + "/persistence --exclude 'diagnosticNodes/*' --exclude 'diagnosticCoinb/*' " process = subprocess.Popen(bashCommand, universal_newlines=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output, error = process.communicate() if re.match(r'^\s*$', output): logging.warning("No entire persistence diff, interesting...") else: logging.info("Remote S3 bucket: " + getBucketString(PERSISTENCE_SNAPSHOT_NAME) + "/persistence is entirely Synced") # clear the state-delta bucket now. if (lastBlockNum != 0): CleanS3StateDeltas() CleanS3PersistenceDiffs() elif (result == 0): # we still need to sync persistence except for state, stateroot, contractCode, contractStateData, contractStateIndex so that next time for next blocknum we can get statedelta diff and persistence diff correctly bashCommand = "aws s3 sync --delete temp/persistence " + getBucketString( PERSISTENCE_SNAPSHOT_NAME ) + "/persistence --exclude '*' --include 'microBlockKeys/*' --include 'microBlocks*' --include 'dsBlocks/*' --include 'minerInfoDSComm/*' --include 'minerInfoShards/*' --include 'dsCommittee/*' --include 'shardStructure/*' --include 'txBlocks/*' --include 'VCBlocks/*' --include 'blockLinks/*' --include 'metaData/*' --include 'stateDelta/*' --include 'txEpochs/*' --include 'txBodies*' --include 'extSeedPubKeys/*' " process = subprocess.Popen(bashCommand, universal_newlines=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) str_diff_output, error = process.communicate() logging.info( "Remote S3 bucket: " + getBucketString(PERSISTENCE_SNAPSHOT_NAME) + "/persistence is Synced without state/stateRoot/contractCode/contractStateData/contractStateIndex" ) if re.match( r'^\s*$', str_diff_output ): # if output of sync command is either empty or just whitespaces print("No persistence diff, interesting...") tf = tarfile.open("diff_persistence_" + str(blockNum) + ".tar.gz", mode="w:gz") t = tarfile.TarInfo("diff_persistence_" + str(blockNum)) t.type = tarfile.DIRTYPE tf.addfile(t) tf.close() bashCommand = "aws s3 cp diff_persistence_" + str( blockNum) + ".tar.gz " + getBucketString( PERSISTENCE_SNAPSHOT_NAME) + "/diff_persistence_" + str( blockNum) + ".tar.gz" process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE) output, error = process.communicate() logging.info("DUMMY upload: persistence Diff for new txBlk :" + str(blockNum) + ") in Remote S3 bucket: " + getBucketString(PERSISTENCE_SNAPSHOT_NAME) + " is Synced") os.remove("diff_persistence_" + str(blockNum) + ".tar.gz") else: str_diff_output = str_diff_output.strip() splitted = str_diff_output.split('\n') result = [] if (len(splitted) > 0): for x in splitted: tok = x.split(' ') # skip deleted files if (len(tok) >= 3 and tok[0] == "upload:"): result.append(tok[1]) tf = tarfile.open("diff_persistence_" + str(blockNum) + ".tar.gz", mode="w:gz") for x in result: print(x) tf.add(x, arcname="diff_persistence_" + str(blockNum) + "/" + x.split("persistence/", 1)[1]) tf.close() bashCommand = "aws s3 cp diff_persistence_" + str( blockNum) + ".tar.gz " + getBucketString( PERSISTENCE_SNAPSHOT_NAME) + "/diff_persistence_" + str( blockNum) + ".tar.gz" process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE) output, error = process.communicate() logging.info( "Persistence Diff for new txBlk :" + str(blockNum) + ") in Remote S3 bucket: " + getBucketString(PERSISTENCE_SNAPSHOT_NAME) + " is Synced without state/stateroot/contractCode/contractStateData/contractStateIndex" ) os.remove("diff_persistence_" + str(blockNum) + ".tar.gz") else: logging.info("Not supposed to upload state now!")
def export_emails_archive(data_set_id, email_ids=["f9c9c59a-7fe8-11e5-bb05-08002705cb99"]): cherrypy.log("email.get_attachments_sender(index=%s, attachment_id=%s)" % (data_set_id, email_ids)) if not data_set_id: return tangelo.HTTPStatusCode(400, "invalid service call - missing index") # if not email: # return tangelo.HTTPStatusCode(400, "invalid service call - missing attachment_id") # elasticsearch.exceptions.ConnectionTimeout: ConnectionTimeout caused by - ReadTimeoutError(HTTPConnectionPool(host='10.1.70.143', port=9200): Read timed out. (read timeout=10)) es = Elasticsearch([{ "host": "10.1.70.143", "port": 9200 }], request_timeout=60) # TODO can implement with multiple doc_types and combine attachments in emails = es.mget(index=data_set_id, doc_type="emails", body={"docs": [{ "_id": id } for id in email_ids]}) # TODO filename filename = "export.tar.gz" tangelo.content_type("application/x-gzip") header("Content-Disposition", 'attachment; filename="{}"'.format(filename)) string_buffer = cStringIO.StringIO() tar = tarfile.open(mode='w:gz', fileobj=string_buffer) # Add each email to the tar for email_source in emails["docs"]: email = email_source["_source"] tarinfo_parent = tarfile.TarInfo(name=email["id"]) tarinfo_parent.type = tarfile.DIRTYPE tarinfo_parent.mode = 0755 tarinfo_parent.mtime = time.time() tar.addfile(tarinfo_parent) tarinfo = tarfile.TarInfo(email["id"] + "/" + email["id"] + ".json") # TODO -- email transformation data_string = json.dumps(email) fobj = cStringIO.StringIO(data_string) tarinfo.size = len(data_string) tarinfo.mode = 0644 tarinfo.mtime = time.time() tar.addfile(tarinfo, fobj) # Get the attachments if email["attachments"]: attachments = es.mget(index=data_set_id, doc_type="attachments", body={ "docs": [{ "_id": attch["guid"] } for attch in email["attachments"]] }) for attachment_source in attachments["docs"]: attachment = attachment_source["_source"] filename = attachment["filename"] attch_data = str(base64.b64decode(attachment["contents64"])) tarinfo_attch = tarfile.TarInfo(email["id"] + "/" + filename) tarinfo_attch.size = len(attch_data) tarinfo_attch.mode = 0644 tarinfo_attch.mtime = time.time() tar.addfile(tarinfo_attch, cStringIO.StringIO(attch_data)) tar.close() return string_buffer.getvalue()
def export_attachments(data_set_id, outfile, sender='', attachment_extension='jpg', date_bounds=None): print( "email.get_attachments_sender(index=%s, sender=%s, attachment_type=%s, date_bounds=%s)" % (data_set_id, sender, attachment_extension, date_bounds)) if not data_set_id: print "invalid service call - missing index" return 1 # elasticsearch.exceptions.ConnectionTimeout: ConnectionTimeout caused by - ReadTimeoutError(HTTPConnectionPool(host='10.1.70.143', port=9200): Read timed out. (read timeout=10)) es = Elasticsearch([{"host": "10.1.70.143", "port": 9200}], timeout=60) # TODO get accurate count -- this is not strictly needed as attachments will be accessed as inner docs on the email_address max_inner_attachments_returned = 100000 # Get all attachments by extension rows = [] body = _attch_nested__ext_query( sender, attachment_extension, date_bounds, max_inner_attachments_returned=max_inner_attachments_returned) print body addresses_count = es.count(index=data_set_id, doc_type="email_address", body=body)["count"] print "total addresses: " + str(addresses_count) addresses = es.search(index=data_set_id, doc_type="email_address", body=body, size=addresses_count) for address in addresses["hits"]["hits"]: rows += [[ address["_source"]["addr"], attachment["_source"]["guid"], attachment["_source"]["filename"], attachment["_source"]["datetime"] ] for attachment in address["inner_hits"]["sender_attachments"]["hits"] ["hits"]] print "total attachments: " + str(len(rows)) # start tar.gz # tar = tarfile.open(mode='w:gz', name="big-export.tar.gz") # Start tar tar = tarfile.open(mode='w', name=outfile) csv_string_buffer = cStringIO.StringIO() csv_file = csv.writer(csv_string_buffer) # Add all rows to attachment csv csv_file.writerows(rows) tarinfo = tarfile.TarInfo("attachments.csv") tarinfo.size = csv_string_buffer.tell() tarinfo.mode = 0644 tarinfo.mtime = time.time() csv_string_buffer.seek(0) tar.addfile(tarinfo, csv_string_buffer) # This is the buffer size of how many attachments to pull from ES at each iteration num_returned = 3 index = 0 # Paging while index < len(rows): # Get num_returned attachments from ES attachments = es.mget(index=data_set_id, doc_type="attachments", body={ "docs": [{ "_id": row[1] } for row in rows[index:index + num_returned]] }) index += num_returned # Add all attachments to the archive for attachment_source in attachments["docs"]: attachment = attachment_source["_source"] filename = attachment["filename"] attch_data = str(base64.b64decode(attachment["contents64"])) tarinfo_attch = tarfile.TarInfo(attachment["guid"] + "/" + filename) tarinfo_attch.size = len(attch_data) tarinfo_attch.mode = 0644 tarinfo_attch.mtime = time.time() tar.addfile(tarinfo_attch, cStringIO.StringIO(attch_data)) tar.close()
def make_bin(basename, filelist, type, kver, sign=0, jailbreak=0): fd, tgz_fname = tempfile.mkstemp() os.close(fd) tar = tarfile.open(tgz_fname, "w:gz") dat_list = "" if sign: fd, keyfile = tempfile.mkstemp() fs = os.fdopen(fd, "wb") fs.write(SIGN_KEY) fs.close() if jailbreak: random.seed() # Create fake symlink namedir = '__dir' + str(random.randint(1000, 9999)) tarinfo = tarfile.TarInfo(namedir) tarinfo.type = tarfile.SYMTYPE tarinfo.linkname = KINDLE_HACK_DIR tar.addfile(tarinfo) # Create new key fd, tmpfile = tempfile.mkstemp() fs = os.fdopen(fd, "wb") fs.write(NEW_KEY) fs.close() tarinfo = tar.gettarinfo(tmpfile, arcname=namedir + '/' + KINDLE_HACK_KEYNAME) add_tarfile(tarinfo, tmpfile, tar) os.remove(tmpfile) # Create additional install script nameinstall = '_install' + str(random.randint(1000, 9999)) + '.sh' fd, tmpinstall = tempfile.mkstemp() fs = os.fdopen(fd, "wb") fs.write(INSTALL_SCRIPT) fs.close() tarinfo = tar.gettarinfo(tmpinstall, arcname=nameinstall) add_tarfile(tarinfo, tmpinstall, tar) if sign: create_sig(keyfile, tmpinstall, tar, finalname=nameinstall) # Creating extra script signature if sign: for name in filelist: print "calculating signature for %s" % name create_sig(keyfile, name, tar) for name in filelist: print "adding %s" % name tarinfo = tar.gettarinfo(name) if name.endswith(".sh"): fid = 129 else: fid = 128 add_tarfile(tarinfo, name, tar) fsize = os.path.getsize(name) / 64 inf = open(name, "rb") dat_list += "%d %s %s %d %s\n" % (fid, s_md5( inf.read()), name, fsize, name + "_file") inf.close() if jailbreak: fsize = os.path.getsize(tmpinstall) / 64 inf = open(tmpinstall, "rb") dat_list += "%d %s %s %d %s\n" % (129, s_md5( inf.read()), nameinstall, fsize, nameinstall + "_file") inf.close() os.remove(tmpinstall) fd, tmpdat = tempfile.mkstemp() fs = os.fdopen(fd, "wb") fs.write(dat_list) fs.close() tarinfo = tar.gettarinfo(tmpdat, arcname=basename + '.dat') add_tarfile(tarinfo, tmpdat, tar) # Sign the bundle file, too (needed since fw 3.x) if sign: print "calculating signature for bundle file" create_sig(keyfile, tmpdat, tar, basename + '.dat') os.remove(keyfile) os.remove(tmpdat) tar.close() convert_bin(basename, tgz_fname, type, kver) os.remove(tgz_fname)
def add_file(filename, contents): info = tarfile.TarInfo(filename) info.size = len(contents) tar.addfile(tarinfo=info, fileobj=cStringIO.StringIO(contents))
def _add_file(tar, filename, content): tar_info = tarfile.TarInfo(name=filename) tar_info.size = len(content) content = io.BytesIO(content) content.seek(0) tar.addfile(tar_info, content)
def create(info, verbose=False): tmp_dir_base_path = join(dirname(info['_outpath']), "tmp") try: os.makedirs(tmp_dir_base_path) except Exception: pass tmp_dir = tempfile.mkdtemp(dir=tmp_dir_base_path) preconda_write_files(info, tmp_dir) preconda_tarball = join(tmp_dir, 'preconda.tar.bz2') postconda_tarball = join(tmp_dir, 'postconda.tar.bz2') pre_t = tarfile.open(preconda_tarball, 'w:bz2') post_t = tarfile.open(postconda_tarball, 'w:bz2') for dist in preconda_files: fn = filename_dist(dist) pre_t.add(join(tmp_dir, fn), 'pkgs/' + fn) for key in 'pre_install', 'post_install': if key in info: pre_t.add( info[key], 'pkgs/%s.sh' % key, filter=make_executable if has_shebang(info[key]) else None) cache_dir = join(tmp_dir, 'cache') if isdir(cache_dir): for cf in os.listdir(cache_dir): if cf.endswith(".json"): pre_t.add(join(cache_dir, cf), 'pkgs/cache/' + cf) for dist in info['_dists']: if filename_dist(dist).endswith(".conda"): _dist = filename_dist(dist)[:-6] elif filename_dist(dist).endswith(".tar.bz2"): _dist = filename_dist(dist)[:-8] record_file = join(_dist, 'info', 'repodata_record.json') record_file_src = join(tmp_dir, record_file) record_file_dest = join('pkgs', record_file) pre_t.add(record_file_src, record_file_dest) pre_t.addfile(tarinfo=tarfile.TarInfo("conda-meta/history")) post_t.add(join(tmp_dir, 'conda-meta', 'history'), 'conda-meta/history') pre_t.close() post_t.close() tarball = join(tmp_dir, 'tmp.tar') t = tarfile.open(tarball, 'w') t.add(preconda_tarball, basename(preconda_tarball)) t.add(postconda_tarball, basename(postconda_tarball)) if 'license_file' in info: t.add(info['license_file'], 'LICENSE.txt') for dist in info['_dists']: fn = filename_dist(dist) t.add(join(info['_download_dir'], fn), 'pkgs/' + fn) t.close() conda_exec = info["_conda_exe"] header = get_header(conda_exec, tarball, info) shar_path = info['_outpath'] with open(shar_path, 'wb') as fo: fo.write(header.encode('utf-8')) for payload in [conda_exec, tarball]: with open(payload, 'rb') as fi: while True: chunk = fi.read(262144) if not chunk: break fo.write(chunk) os.unlink(tarball) os.chmod(shar_path, 0o755) shutil.rmtree(tmp_dir)
def add(self, path): self.tar.addfile(tarfile.TarInfo(name=path))
def tar(self, uncompressed_size=65536, num_files=1, min_file_size=4096, compression=None): """Generate a bytes object containing a random valid tar file. The number and sizes of files contained inside the resulting archive can be controlled using the following arguments: - ``uncompressed_size`` - the total size of files before compression, 16 KiB by default - ``num_files`` - the number of files archived in resulting zip file, 1 by default - ``min_file_size`` - the minimum size of each file before compression, 4 KiB by default No compression is used by default, but setting ``compression`` to one of the values listed below will use the corresponding compression type. - ``'bzip2'`` or ``'bz2'`` for BZIP2 - ``'lzma'`` or ``'xz'`` for LZMA - ``'gzip'`` or ``'gz'`` for GZIP :sample: uncompressed_size=256, num_files=4, min_file_size=32 :sample: uncompressed_size=256, num_files=32, min_file_size=4, compression='bz2' """ if any([ not isinstance(num_files, int) or num_files <= 0, not isinstance(min_file_size, int) or min_file_size <= 0, not isinstance(uncompressed_size, int) or uncompressed_size <= 0, ]): raise ValueError( '`num_files`, `min_file_size`, and `uncompressed_size` must be positive integers', ) if min_file_size * num_files > uncompressed_size: raise AssertionError( '`uncompressed_size` is smaller than the calculated minimum required size', ) if compression in ['gzip', 'gz']: mode = 'w:gz' elif compression in ['bzip2', 'bz2']: mode = 'w:bz2' elif compression in ['lzma', 'xz']: mode = 'w:xz' else: mode = 'w' tar_buffer = io.BytesIO() remaining_size = uncompressed_size with tarfile.open(mode=mode, fileobj=tar_buffer) as tar_handle: for file_number in range(1, num_files + 1): file_buffer = io.BytesIO() filename = self.generator.pystr() + str(file_number) max_allowed_size = remaining_size - (num_files - file_number) * min_file_size if file_number < num_files: file_size = self.generator.random.randint(min_file_size, max_allowed_size) remaining_size = remaining_size - file_size else: file_size = remaining_size tarinfo = tarfile.TarInfo(name=filename) data = self.generator.binary(file_size) file_buffer.write(data) tarinfo.size = len(file_buffer.getvalue()) file_buffer.seek(0) tar_handle.addfile(tarinfo, file_buffer) file_buffer.close() return tar_buffer.getvalue()
def write_to_tar(url_file, out_file, makevocab=False): """Reads the tokenized .story files corresponding to the urls listed in the url_file and writes them to a out_file. """ print("Making bin file for URLs listed in {}...".format(url_file)) url_list = [line.strip() for line in open(url_file)] url_hashes = get_url_hashes(url_list) story_fnames = [s + ".story" for s in url_hashes] num_stories = len(story_fnames) if makevocab: vocab_counter = collections.Counter() idx_to_story_dict = {} with tarfile.open(out_file, 'w') as writer: for idx, s in enumerate(story_fnames): if idx % 1000 == 0: print("Writing story {} of {}; {:.2f} percent done".format( idx, num_stories, float(idx) * 100.0 / float(num_stories))) # Look in the tokenized story dirs to find the .story file # corresponding to this url if os.path.isfile(os.path.join(cnn_tokenized_stories_dir, s)): story_file = os.path.join(cnn_tokenized_stories_dir, s) elif os.path.isfile(os.path.join(dm_tokenized_stories_dir, s)): story_file = os.path.join(dm_tokenized_stories_dir, s) else: print("Error: Couldn't find tokenized story file {} in either" " tokenized story directories {} and {}. Was there an" " error during tokenization?".format( s, cnn_tokenized_stories_dir, dm_tokenized_stories_dir)) # Get the strings to write to .bin file article_sents, abstract_sents = get_art_abs(story_file) # Write to JSON file js_example = {} js_example['id'] = s.replace('.story', '') js_example['article'] = article_sents js_example['abstract'] = abstract_sents js_serialized = json.dumps(js_example, indent=4).encode() save_file = io.BytesIO(js_serialized) tar_info = tarfile.TarInfo('{}/{}.json'.format( os.path.basename(out_file).replace('.tar', ''), idx)) tar_info.size = len(js_serialized) writer.addfile(tar_info, save_file) idx_to_story_dict[idx] = s # Write the vocab to file, if applicable if makevocab: art_tokens = ' '.join(article_sents).split() abs_tokens = ' '.join(abstract_sents).split() tokens = art_tokens + abs_tokens tokens = [t.strip() for t in tokens] # strip tokens = [t for t in tokens if t != ""] # remove empty vocab_counter.update(tokens) print("Finished writing file {}\n".format(out_file)) with open(os.path.join(finished_files_dir, "id_to_story_dict.pkl"), 'wb') as story_dict_f: pkl.dump(idx_to_story_dict, story_dict_f) # write vocab to file if makevocab: print("Writing vocab file...") with open(os.path.join(finished_files_dir, "vocab_cnt.pkl"), 'wb') as vocab_file: pkl.dump(vocab_counter, vocab_file) print("Finished writing vocab file")
def get_submission_archive(self, submissions, sub_folders, aggregations, archive_file=None): """ :param submissions: a list of submissions :param sub_folders: possible values: []: put all submissions in / ['taskid']: put all submissions for each task in a different directory /taskid/ ['username']: put all submissions for each user in a different directory /username/ ['taskid','username']: /taskid/username/ ['username','taskid']: /username/taskid/ :return: a file-like object containing a tgz archive of all the submissions """ tmpfile = archive_file if archive_file is not None else tempfile.TemporaryFile( ) tar = tarfile.open(fileobj=tmpfile, mode='w:gz') for submission in submissions: submission = self.get_input_from_submission(submission) submission_yaml = io.BytesIO( inginious.common.custom_yaml.dump(submission).encode('utf-8')) # Considering multiple single submissions for each user for username in submission["username"]: # Compute base path in the tar file base_path = "/" for sub_folder in sub_folders: if sub_folder == 'taskid': base_path = submission['taskid'] + base_path elif sub_folder == 'username': base_path = '_' + '-'.join( submission['username']) + base_path base_path = base_path[1:] elif sub_folder == 'aggregation': if aggregations[username] is None: # If classrooms are not used, and user is not grouped, his classroom is replaced by None base_path = '_' + '-'.join( submission['username']) + base_path base_path = base_path[1:] else: base_path = ( aggregations[username]["description"] + " (" + str(aggregations[username]["_id"]) + ")").replace(" ", "_") + base_path base_path = '/' + base_path base_path = base_path[1:] submission_yaml_fname = base_path + str( submission["_id"]) + '/submission.test' # Avoid putting two times the same submission on the same place if submission_yaml_fname not in tar.getnames(): info = tarfile.TarInfo(name=submission_yaml_fname) info.size = submission_yaml.getbuffer().nbytes info.mtime = time.mktime( submission["submitted_on"].timetuple()) # Add file in tar archive tar.addfile(info, fileobj=submission_yaml) # If there is an archive, add it too if 'archive' in submission and submission[ 'archive'] is not None and submission[ 'archive'] != "": subfile = self._gridfs.get(submission['archive']) subtar = tarfile.open(fileobj=subfile, mode="r:gz") for member in subtar.getmembers(): subtarfile = subtar.extractfile(member) member.name = base_path + str( submission["_id"]) + "/archive/" + member.name tar.addfile(member, subtarfile) subtar.close() subfile.close() # If there files that were uploaded by the student, add them if submission['input'] is not None: for pid, problem in submission['input'].items(): # If problem is a dict, it is a file (from the specification of the problems) if isinstance(problem, dict): # Get the extension (match extensions with more than one dot too) DOUBLE_EXTENSIONS = [ '.tar.gz', '.tar.bz2', '.tar.bz', '.tar.xz' ] ext = "" if not problem['filename'].endswith( tuple(DOUBLE_EXTENSIONS)): _, ext = os.path.splitext( problem['filename']) else: for t_ext in DOUBLE_EXTENSIONS: if problem['filename'].endswith(t_ext): ext = t_ext subfile = io.BytesIO( base64.b64decode(problem['value'])) taskfname = base_path + str( submission["_id"] ) + '/uploaded_files/' + pid + ext # Generate file info info = tarfile.TarInfo(name=taskfname) info.size = subfile.getbuffer().nbytes info.mtime = time.mktime( submission["submitted_on"].timetuple()) # Add file in tar archive tar.addfile(info, fileobj=subfile) # Close tarfile and put tempfile cursor at 0 tar.close() tmpfile.seek(0) return tmpfile
def main(configuration, viewer_dir, viewer_exes, libs_suffix, dump_syms_tool, viewer_symbol_file): print "generate_breakpad_symbols run with args: %s" % str( (configuration, viewer_dir, viewer_exes, libs_suffix, dump_syms_tool, viewer_symbol_file)) if not re.match("release", configuration, re.IGNORECASE): print "skipping breakpad symbol generation for non-release build." return 0 # split up list of viewer_exes # "'Second Life' SLPlugin" becomes ['Second Life', 'SLPlugin'] viewer_exes = shlex.split(viewer_exes) found_required = dict([(module, False) for module in viewer_exes]) def matches(f): if f in viewer_exes: found_required[f] = True return True return fnmatch.fnmatch(f, libs_suffix) def list_files(): for (dirname, subdirs, filenames) in os.walk(viewer_dir): #print "scanning '%s' for modules..." % dirname for f in itertools.ifilter(matches, filenames): yield os.path.join(dirname, f) def dump_module(m): print "dumping module '%s' with '%s'..." % (m, dump_syms_tool) child = subprocess.Popen([dump_syms_tool, m], stdout=subprocess.PIPE) out, err = child.communicate() return (m, child.returncode, out, err) out = tarfile.open(viewer_symbol_file, 'w:bz2') for (filename, status, symbols, err) in itertools.imap(dump_module, list_files()): if status == 0: module_line = symbols[:symbols.index('\n')] module_line = module_line.split() hash_id = module_line[3] module = ' '.join(module_line[4:]) if sys.platform in ['win32', 'cygwin']: mod_name = module[:module.rindex('.pdb')] else: mod_name = module symbolfile = StringIO.StringIO(symbols) info = tarfile.TarInfo( "%(module)s/%(hash_id)s/%(mod_name)s.sym" % dict(module=module, hash_id=hash_id, mod_name=mod_name)) info.size = symbolfile.len out.addfile(info, symbolfile) else: print >> sys.stderr, "warning: failed to dump symbols for '%s': %s" % ( filename, err) out.close() missing_modules = [ m for (m, _) in itertools.ifilter(lambda (k, v): not v, found_required.iteritems()) ] if missing_modules: print >> sys.stderr, "failed to generate %s" % viewer_symbol_file os.remove(viewer_symbol_file) raise MissingModuleError(missing_modules) symbols = tarfile.open(viewer_symbol_file, 'r:bz2') tarfile_members = symbols.getnames() symbols.close() for required_module in viewer_exes: def match_module_basename(m): return os.path.splitext(required_module)[0].lower() \ == os.path.splitext(os.path.basename(m))[0].lower() # there must be at least one .sym file in tarfile_members that matches # each required module (ignoring file extensions) if not reduce(operator.or_, itertools.imap(match_module_basename, tarfile_members)): print >> sys.stderr, "failed to find required %s in generated %s" \ % (required_module, viewer_symbol_file) os.remove(viewer_symbol_file) raise MissingModuleError([required_module]) print "successfully generated %s including required modules '%s'" % ( viewer_symbol_file, viewer_exes) return 0
def export(self): # missmedia_action = 0 #-------------------------------------------------------------- # def remove_clicked(): # # File is lost => remove all references and the object itself # for p_id in self.db.iter_family_handles(): # p = self.db.get_family_from_handle(p_id) # nl = p.get_media_list() # for o in nl: # if o.get_reference_handle() == m_id: # nl.remove(o) # p.set_media_list(nl) # self.db.commit_family(p,None) # for key in self.db.iter_person_handles(): # p = self.db.get_person_from_handle(key) # nl = p.get_media_list() # for o in nl: # if o.get_reference_handle() == m_id: # nl.remove(o) # p.set_media_list(nl) # self.db.commit_person(p,None) # for key in self.db.get_source_handles(): # p = self.db.get_source_from_handle(key) # nl = p.get_media_list() # for o in nl: # if o.get_reference_handle() == m_id: # nl.remove(o) # p.set_media_list(nl) # self.db.commit_source(p,None) # for key in self.db.get_place_handles(): # p = self.db.get_place_from_handle(key) # nl = p.get_media_list() # for o in nl: # if o.get_reference_handle() == m_id: # nl.remove(o) # p.set_media_list(nl) # self.db.commit_place(p,None) # for key in self.db.get_event_handles(): # p = self.db.get_event_from_handle(key) # nl = p.get_media_list() # for o in nl: # if o.get_reference_handle() == m_id: # nl.remove(o) # p.set_media_list(nl) # self.db.commit_event(p,None) # self.db.remove_media(m_id,None) # def leave_clicked(): # # File is lost => do nothing, leave as is # pass # def select_clicked(): # # File is lost => select a file to replace the lost one # def fs_close_window(obj): # pass # def fs_ok_clicked(obj): # name = fs_top.get_filename() # if os.path.isfile(name): # archive.add(name) # fs_top = gtk.FileChooserDialog("%s - GRAMPS" % _("Select file"), # buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, # gtk.STOCK_OK, Gtk.ResponseType.OK) # ) # response = fs_top.run() # if response == Gtk.ResponseType.OK: # fs_ok_clicked(fs_top) # elif response == gtk.RESPONSE_CANCEL: # fs_close_window(fs_top) # fs_top.destroy() #--------------------------------------------------------------- try: archive = tarfile.open(self.filename,'w:gz') except EnvironmentError as msg: log.warn(str(msg)) self.user.notify_error(_('Failure writing %s') % self.filename, str(msg)) return 0 # Write media files first, since the database may be modified # during the process (i.e. when removing object) for m_id in self.db.get_media_handles(sort_handles=True): mobject = self.db.get_media_from_handle(m_id) filename = media_path_full(self.db, mobject.get_path()) archname = str(mobject.get_path()) if os.path.isfile(filename) and os.access(filename, os.R_OK): archive.add(filename, archname) # else: # # File is lost => ask what to do # if missmedia_action == 0: # mmd = MissingMediaDialog( # _("Media object could not be found"), # _("%(file_name)s is referenced in the database, " # "but no longer exists. The file may have been " # "deleted or moved to a different location. " # "You may choose to either remove the reference " # "from the database, keep the reference to the " # "missing file, or select a new file." # ) % { 'file_name' : filename }, # remove_clicked, leave_clicked, select_clicked) # missmedia_action = mmd.default_action # elif missmedia_action == 1: # remove_clicked() # elif missmedia_action == 2: # leave_clicked() # elif missmedia_action == 3: # select_clicked() # Write XML now g = BytesIO() gfile = XmlWriter(self.db, self.user, 2) gfile.write_handle(g) tarinfo = tarfile.TarInfo('data.gramps') tarinfo.size = len(g.getvalue()) tarinfo.mtime = time.time() if not win(): tarinfo.uid = os.getuid() tarinfo.gid = os.getgid() g.seek(0) archive.addfile(tarinfo, g) archive.close() g.close() return True
def createPackedInputSandbox(sandbox_files, inws, name): """Put all sandbox_files into tarball called name and write it into to the input workspace. This function is called by Ganga client at the submission time. Arguments: 'sandbox_files': a list of File or FileBuffer objects. 'inws': a InputFileWorkspace object Return: a list containing a path to the tarball """ # from Ganga.Core import FileWorkspace # from Ganga.GPIDev.Lib.File import File # tgzfile = os.path.join(tmpdir,name) tgzfile = inws.getPath(name) import tarfile import stat logger.debug("Creating packed Sandbox with %s many sandbox files." % len(sandbox_files)) # # Curent release with os module # # wsdir = os.path.join(tmpdir,"ws") # ws = FileWorkspace.FileWorkspace(wsdir) # ws.create() # for f in sandbox_files: # ws.writefile(f) # if os.system("tar -C %s -czf %s ."%(wsdir,tgzfile)) !=0: # print "ERROR:: can't create tarball file with InputSandbox" # # Future release with tarball module if mimetypes.guess_type(tgzfile)[1] in ['gzip']: file_format = 'gz' elif mimetypes.guess_type(tgzfile)[1] in ['bzip2']: file_format = 'bz2' else: file_format = '' with open(tgzfile, 'w:%s' % file_format) as this_tarfile: tf = tarfile.open(name=tgzfile, fileobj=this_tarfile, mode="w:gz") tf.dereference = True # --not needed in Windows from Ganga.GPIDev.Lib.File.FileBuffer import FileBuffer from Ganga.GPIDev.Lib.File.File import File from Ganga.GPIDev.Base.Proxy import isType for f in sandbox_files: fileobj = None if isType(f, FileBuffer): contents = f.getContents() # is it FileBuffer? # print "Getting FileBuffer Contents" from StringIO import StringIO fileobj = StringIO(contents) tinfo = tarfile.TarInfo() # FIX for Ganga/test/Internals/FileBuffer_Sandbox # Don't keep the './' on files as looking for an exact filename # afterwards won't work if f.subdir == os.curdir: tinfo.name = os.path.basename(f.name) else: tinfo.name = os.path.join(f.subdir, os.path.basename(f.name)) import time tinfo.mtime = time.time() tinfo.size = fileobj.len else: # except AttributeError as err: # File # print "Getting File %s" % f.name # tf.add(f.name,os.path.join(f.subdir,os.path.basename(f.name))) logger.debug("Opening file for sandbox: %s" % f.name) try: fileobj = open(f.name) except Exception as err: raise SandboxError("File '%s' does not exist." % f.name) tinfo = tf.gettarinfo( f.name, os.path.join(f.subdir, os.path.basename(f.name))) if f.isExecutable(): tinfo.mode = tinfo.mode | stat.S_IXUSR tf.addfile(tinfo, fileobj) fileobj.close() tf.close() return [tgzfile]
def write_archive(self, treeish, archive, timestamp=None, prefix=''): """Write treeish into an archive If no timestamp is provided and 'treeish' is a commit, its committer timestamp will be used. Otherwise the current time will be used. All path names in the archive are added to 'prefix', which defaults to an empty string. Arguments: treeish The treeish to write. archive An archive from the 'tarfile' module timestamp Timestamp to use for the files in the archive. prefix Extra prefix to add to the path names in the archive. Example:: >>> import tarfile, pygit2 >>>> with tarfile.open('foo.tar', 'w') as archive: >>>> repo = pygit2.Repsitory('.') >>>> repo.write_archive(archive, repo.head.target) """ # Try to get a tree form whatever we got if isinstance(treeish, Tree): tree = treeish if isinstance(treeish, Oid) or is_string(treeish): treeish = self[treeish] # if we don't have a timestamp, try to get it from a commit if not timestamp: try: commit = treeish.peel(Commit) timestamp = commit.committer.time except Exception: pass # as a last resort, use the current timestamp if not timestamp: timestamp = int(time()) tree = treeish.peel(Tree) index = Index() index.read_tree(tree) for entry in index: content = self[entry.id].read_raw() info = tarfile.TarInfo(prefix + entry.path) info.size = len(content) info.mtime = timestamp info.uname = info.gname = 'root' # just because git does this if entry.mode == GIT_FILEMODE_LINK: info.type = archive.SYMTYPE info.linkname = content info.mode = 0o777 # symlinks get placeholder info.size = 0 archive.addfile(info) else: archive.addfile(info, StringIO(content))
def add_file(self, name, kind=tarfile.REGTYPE, content=None, link=None, file_content=None, uid=0, gid=0, uname='', gname='', mtime=None, mode=None): """Add a file to the current tar. Args: name: the name of the file to add. kind: the type of the file to add, see tarfile.*TYPE. content: a textual content to put in the file. link: if the file is a link, the destination of the link. file_content: file to read the content from. Provide either this one or `content` to specifies a content for the file. uid: owner user identifier. gid: owner group identifier. uname: owner user names. gname: owner group names. mtime: modification time to put in the archive. mode: unix permission mode of the file, default 0644 (0755). """ if file_content and os.path.isdir(file_content): # Recurse into directory self.add_dir(name, file_content, uid, gid, uname, gname, mtime, mode) return if not (name == self.root_directory or name.startswith('/') or name.startswith(self.root_directory + '/')): name = os.path.join(self.root_directory, name) if kind == tarfile.DIRTYPE: name = name.rstrip('/') if name in self.directories: return if mtime is None: mtime = self.default_mtime components = name.rsplit('/', 1) if len(components) > 1: d = components[0] self.add_file(d, tarfile.DIRTYPE, uid=uid, gid=gid, uname=uname, gname=gname, mtime=mtime, mode=0o755) tarinfo = tarfile.TarInfo(name) tarinfo.mtime = mtime tarinfo.uid = uid tarinfo.gid = gid tarinfo.uname = uname tarinfo.gname = gname tarinfo.type = kind if mode is None: tarinfo.mode = 0o644 if kind == tarfile.REGTYPE else 0o755 else: tarinfo.mode = mode if link: tarinfo.linkname = link if content: content_bytes = six.ensure_binary(content, 'utf-8') tarinfo.size = len(content_bytes) self._addfile(tarinfo, io.BytesIO(content_bytes)) elif file_content: with open(file_content, 'rb') as f: tarinfo.size = os.fstat(f.fileno()).st_size self._addfile(tarinfo, f) else: if kind == tarfile.DIRTYPE: self.directories.add(name) self._addfile(tarinfo)
def add_to_tar(tar, name, sio_obj, perm=420): info = tarfile.TarInfo(name=name) info.size = flen(sio_obj) info.mode = perm sio_obj.seek(0) tar.addfile(info, sio_obj)
def makepkg(self, path): """Creates an Arch Linux package archive. A package archive is generated in the location 'path', based on the data from the object. """ archive_files = [] # .PKGINFO data = ["pkgname = %s" % self.name] data.append("pkgver = %s" % self.version) data.append("pkgdesc = %s" % self.desc) data.append("url = %s" % self.url) data.append("builddate = %s" % self.builddate) data.append("packager = %s" % self.packager) data.append("size = %s" % self.size) if self.arch: data.append("arch = %s" % self.arch) for i in self.license: data.append("license = %s" % i) for i in self.replaces: data.append("replaces = %s" % i) for i in self.groups: data.append("group = %s" % i) for i in self.depends: data.append("depend = %s" % i) for i in self.optdepends: data.append("optdepend = %s" % i) for i in self.conflicts: data.append("conflict = %s" % i) for i in self.provides: data.append("provides = %s" % i) for i in self.backup: data.append("backup = %s" % i) archive_files.append((".PKGINFO", "\n".join(data))) # .INSTALL if any(self.install.values()): archive_files.append((".INSTALL", self.installfile())) self.path = os.path.join(path, self.filename()) util.mkdir(os.path.dirname(self.path)) # Generate package metadata tar = tarfile.open(self.path, "w:gz") for name, data in archive_files: info = tarfile.TarInfo(name) info.size = len(data) tar.addfile(info, StringIO(data)) # Generate package file system for name in self.files: fileinfo = util.getfileinfo(name) info = tarfile.TarInfo(fileinfo["filename"]) if fileinfo["hasperms"]: info.mode = fileinfo["perms"] elif fileinfo["isdir"]: info.mode = 0o755 if fileinfo["isdir"]: info.type = tarfile.DIRTYPE tar.addfile(info) elif fileinfo["islink"]: info.type = tarfile.SYMTYPE info.linkname = fileinfo["link"] tar.addfile(info) else: # TODO wow what a hack, adding a newline to match mkfile? filedata = name + "\n" info.size = len(filedata) tar.addfile(info, StringIO(filedata)) tar.close()
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('topic', type=str, help='Name of Kafka topic to listen to.') parser.add_argument('--group', type=str, help='Globally unique name of the consumer group. ' 'Consumers in the same group will share messages ' '(i.e., only one consumer will receive a message, ' 'as in a queue). Default is value of $HOSTNAME.') parser.add_argument('--tarName', type=str, help='Name of tar file.') avrogroup = parser.add_mutually_exclusive_group() avrogroup.add_argument('--decode', dest='avroFlag', action='store_true', help='Decode from Avro format. (default)') avrogroup.add_argument('--decode-off', dest='avroFlag', action='store_false', help='Do not decode from Avro format.') parser.set_defaults(avroFlag=True) args = parser.parse_args() # Configure consumer connection to Kafka broker conf = { 'bootstrap.servers': 'epyc.astro.washington.edu:9092,epyc.astro.washington.edu:9093,epyc.astro.washington.edu:9094', 'default.topic.config': { 'auto.offset.reset': 'smallest' } } if args.group: conf['group.id'] = args.group else: conf['group.id'] = os.environ['HOSTNAME'] # Configure Avro reader schema schema_files = [ "./ztf-avro-alert/schema/candidate.avsc", "./ztf-avro-alert/schema/cutout.avsc", "./ztf-avro-alert/schema/prv_candidate.avsc", "./ztf-avro-alert/schema/alert.avsc" ] # Start consumer and collect alerts in a stream with alertConsumer.AlertConsumer(args.topic, schema_files, **conf) as streamReader: with tarfile.open("./" + args.tarName + ".tar", "a") as tar: while True: try: msg = streamReader.poll(decode=args.avroFlag) if msg is None: print('currenttime: ', int(strftime('%H'))) if (int(strftime('%H')) >= stopTIME): print("break \n") break else: print("continue \n") continue else: for record in msg: #record0 = msg_text(record) candidate_data = record.get('candidate') fn = str(candidate_data['candid']) + ".avro" with io.BytesIO() as avro_file: record0 = [record] fastavro.writer(avro_file, (combineSchemas(schema_files)), record0) avro_file.seek(0) tarinfo = tarfile.TarInfo(name=fn) tarinfo.size = len(avro_file.getvalue()) tarinfo.mtime = time.time() tarinfo.mode = 0o744 tarinfo.type = tarfile.REGTYPE tarfile.uid = tarfile.gid = 0 tarfile.unmae = tarfile.gname = "root" tar.addfile(tarinfo, avro_file) #print( "%s \t %8.9f \t %8.5f \t %8.5f \n" % \ # (record.get('objectId'),candidate_data['jd'],candidate_data['ra'],candidate_data['dec']) ) except alertConsumer.EopError as e: # Write when reaching end of partition sys.stderr.write(e.message) #continue except IndexError: sys.stderr.write('%% Data cannot be decoded\n') except UnicodeDecodeError: sys.stderr.write('%% Unexpected data format received\n') except KeyboardInterrupt: sys.stderr.write('%% Aborted by user\n') break print('we reached the end of stream at: {}'.format( strftime("%b %d %Y %H:%M:%S"))) sys.exit()