Beispiel #1
0
    def output_archive(self, imagefile=None, tarname=None, zipname=None):
        """Write the changed and/or new files to a tarfile or a ZIP file. """
        import zipfile, tarfile, StringIO, datetime

        tfile = None
        zfile = None

        to_archive = self.new_files.copy()
        to_archive = to_archive.union(
            set([val[1] for val in self.changed_content]))
        to_archive = to_archive.union(
            set([val[1] for val in self.changed_properties]))

        # Make sure we are just writing out inodes that have file contents
        to_archive = filter(
            lambda fi: fi.allocated() and fi.has_tag("inode") and fi.
            has_contents() and (fi.name_type() == '' or fi.name_type() == 'r'),
            to_archive)

        if len(to_archive) == 0:
            print(
                "No archive created, as no allocated files created or modified"
            )
            return

        if tarname:
            print(">>> Creating tar file: %s" % tarname)
            tfile = tarfile.TarFile(tarname, mode="w")

        if zipname:
            print(">>> Creating zip file: %s" % zipname)
            zfile = zipfile.ZipFile(zipname, mode="w", allowZip64=True)

        files_written = set()
        content_error_log = []
        for fi in to_archive:
            filename = fi.filename()
            fncount = 1
            while filename in files_written:
                filename = "%s.%d" % (fi.filename(), fnperm)
                fncount += 1
            contents = None
            try:
                contents = fi.contents(imagefile)
            except ValueError as ve:
                if ve.message.startswith("icat error"):
                    #Some files cannot be recovered, even from images that do not seem corrupted; log the icat command that failed.
                    content_error_log.append(ve.message)
                else:
                    #This is a more interesting error, so have process die to report immediately.
                    raise
            if contents:
                if tfile:
                    info = tarfile.TarInfo(name=filename)
                    info.mtime = fi.mtime()
                    info.atime = fi.atime()
                    info.ctime = fi.ctime()
                    info.uid = fi.uid()
                    info.gid = fi.gid()
                    info.size = fi.filesize()
                    # addfile requires a 'file', so let's make one
                    string = StringIO.StringIO()
                    string.write(contents)
                    string.seek(0)
                    tfile.addfile(tarinfo=info, fileobj=string)
                if zfile:
                    mtimestamp = fi.mtime().timestamp()
                    info = zipfile.ZipInfo(filename)
                    if mtimestamp:
                        #mtime might be null
                        info.date_time = datetime.datetime.fromtimestamp(
                            mtimestamp).utctimetuple()
                    info.internal_attr = 1
                    info.external_attr = 2175008768  # specifies mode 0644
                    zfile.writestr(info, contents)
        if tfile: tfile.close()
        if zfile: zfile.close()
        if len(content_error_log) > 0:
            sys.stderr.write("Errors retrieving file contents:\n")
            sys.stderr.write("\n".join(content_error_log))
            sys.stderr.write("\n")
Beispiel #2
0
def create_tar_info(name, size):
    info = tarfile.TarInfo(name)
    info.size = size
    info.mtime = time.time()
    return info
Beispiel #3
0
    def build(
        self,
        client,
        image_spec,
        memory_limit,
        build_args,
        cache_from,
        extra_build_kwargs,
        safe_mode,
    ):
        tarf = io.BytesIO()
        tar = tarfile.open(fileobj=tarf, mode="w")
        dockerfile_tarinfo = tarfile.TarInfo("Dockerfile")
        dockerfile = self.render().encode("utf-8")
        dockerfile_tarinfo.size = len(dockerfile)

        tar.addfile(dockerfile_tarinfo, io.BytesIO(dockerfile))

        def _filter_tar(tar):
            # We need to unset these for build_script_files we copy into tar
            # Otherwise they seem to vary each time, preventing effective use
            # of the cache!
            # https://github.com/docker/docker-py/pull/1582 is related
            tar.uname = ""
            tar.gname = ""
            tar.uid = int(build_args.get("NB_UID", 1000))
            tar.gid = int(build_args.get("NB_UID", 1000))
            return tar

        for src in sorted(self.get_build_script_files()):
            dest_path, src_path = self.generate_build_context_filename(src)
            tar.add(src_path, dest_path, filter=_filter_tar)

        tar.add(ENTRYPOINT_FILE, "repo2docker-entrypoint", filter=_filter_tar)
        tar.add(PACKAGE_JSON, "package.json", filter=_filter_tar)

        tar.add(".", "src/", filter=_filter_tar)

        tar.close()
        tarf.seek(0)

        # If you work on this bit of code check the corresponding code in
        # buildpacks/docker.py where it is duplicated
        if not isinstance(memory_limit, int):
            raise ValueError("The memory limit has to be specified as an"
                             "integer but is '{}'".format(type(memory_limit)))
        limits = {}
        if memory_limit:
            # We want to always disable swap. Docker expects `memswap` to
            # be total allowable memory, *including* swap - while `memory`
            # points to non-swap memory. We set both values to the same so
            # we use no swap.
            limits = {"memory": memory_limit, "memswap": memory_limit}

        build_kwargs = dict(
            fileobj=tarf,
            tag=image_spec,
            custom_context=True,
            buildargs=build_args,
            decode=True,
            forcerm=True,
            rm=True,
            container_limits=limits,
            cache_from=cache_from,
        )

        build_kwargs.update(extra_build_kwargs)

        for line in client.build(**build_kwargs):
            yield line
Beispiel #4
0
    def export(self, file_name):
        """Exports workflow for use on DTV.
        """
        exported = [u for u in self if hasattr(u, "export")]
        if len(exported) == 0:
            raise ValueError("No units support export. Implement export() "
                             "method in at least one.")
        obj = {
            "workflow":
            self.name,
            "checksum":
            self.checksum,
            "units": [{
                "class": {
                    "name": unit.__class__.__name__,
                    "uuid": unit.__class__.__id__
                },
                "data": unit.export()
            } for unit in exported]
        }
        for index, unit in enumerate(exported):
            obj["units"][index]["links"] = [
                exported.index(u) for u in sorted(unit.links_to.keys())
                if u in exported
            ]
        # TODO(v.markovtsev): check the resulting graph's connectivity
        # TODO(v.markovtsev): check for single entry and exit points

        import json

        arrays = []

        def array_file_name(arr, index):
            return "%04d_%s" % (index, "x".join(arr.shape))

        def export_numpy_array(arr):
            if isinstance(arr, numpy.ndarray):
                arrays.append(arr)
                return array_file_name(arr, len(arrays) - 1)
            raise TypeError("Objects of class other than numpy.ndarray are "
                            "not supported")

        try:
            with tarfile.open(file_name, "w:gz") as tar:
                io = six.BytesIO()
                json.dump(obj,
                          io,
                          indent=4,
                          sort_keys=True,
                          default=export_numpy_array)
                ti = tarfile.TarInfo("contents.json")
                ti.size = io.tell()
                ti.mode = int("666", 8)
                io.seek(0)
                tar.addfile(ti, fileobj=io)
                for index, arr in enumerate(arrays):
                    io = six.BytesIO()
                    numpy.save(io, arr)
                    ti = tarfile.TarInfo(array_file_name(arr, index) + ".npy")
                    ti.size = io.tell()
                    ti.mode = int("666", 8)
                    io.seek(0)
                    tar.addfile(ti, fileobj=io)
        except:
            self.exception("Failed to export to %s", file_name)
Beispiel #5
0
    def debug_download(self, job):
        """
        Job to stream debug file.

        This method is meant to be used in conjuntion with `core.download` to get the debug
        downloaded via HTTP.
        """
        job.set_progress(0, 'Generating debug file')
        debug_job = self.middleware.call_sync('system.debug')

        standby_debug = None
        is_freenas = self.middleware.call_sync('system.is_freenas')
        if not is_freenas and self.middleware.call_sync('failover.licensed'):
            try:
                standby_debug = self.middleware.call_sync(
                    'failover.call_remote', 'system.debug', [], {'job': True}
                )
            except Exception:
                self.logger.warn('Failed to get debug from standby node', exc_info=True)
            else:
                remote_ip = self.middleware.call_sync('failover.remote_ip')
                url = self.middleware.call_sync(
                    'failover.call_remote', 'core.download', ['filesystem.get', [standby_debug], 'debug.txz'],
                )[1]

                url = f'http://{remote_ip}:6000{url}'
                standby_debug = io.BytesIO()
                with requests.get(url, stream=True) as r:
                    for i in r.iter_content(chunk_size=1048576):
                        if standby_debug.tell() > 20971520:
                            raise CallError(f'Standby debug file is bigger than 20MiB.')
                        standby_debug.write(i)

        debug_job.wait_sync()
        if debug_job.error:
            raise CallError(debug_job.error)

        job.set_progress(90, 'Preparing debug file for streaming')

        if standby_debug:
            # Debug file cannot be big on HA because we put both debugs in memory
            # so they can be downloaded at once.
            try:
                if os.stat(debug_job.result).st_size > 20971520:
                    raise CallError(f'Debug file is bigger than 20MiB.')
            except FileNotFoundError:
                raise CallError('Debug file was not found, try again.')

            network = self.middleware.call_sync('network.configuration.config')
            node = self.middleware.call_sync('failover.node')

            tario = io.BytesIO()
            with tarfile.open(fileobj=tario, mode='w') as tar:

                if node == 'A':
                    my_hostname = network['hostname']
                    remote_hostname = network['hostname_b']
                else:
                    my_hostname = network['hostname_b']
                    remote_hostname = network['hostname']

                tar.add(debug_job.result, f'{my_hostname}.txz')

                tarinfo = tarfile.TarInfo(f'{remote_hostname}.txz')
                tarinfo.size = standby_debug.tell()
                standby_debug.seek(0)
                tar.addfile(tarinfo, fileobj=standby_debug)

            tario.seek(0)
            shutil.copyfileobj(tario, job.pipes.output.w)
        else:
            with open(debug_job.result, 'rb') as f:
                shutil.copyfileobj(f, job.pipes.output.w)
        job.pipes.output.w.close()
Beispiel #6
0
def write_tar(src_fs, file, compression=None, encoding="utf-8", walker=None):
    """
    Write the contents of a filesystem to a zip file.

    :param file: Destination file, may be a file name or an open file
        object.
    :type file: str or file-like.
    :param compression: Compression to use.
    :type compression: str
    :param encoding: The encoding to use for filenames. The default is
        ``"utf-8"``.
    :type encoding: str
    :param walker: A :class:`~fs.walk.Walker` instance, or None to use
        default walker. You can use this to specify which files you
        want to compress.
    :type walker: Walker or None

    """

    type_map = {
        ResourceType.block_special_file: tarfile.BLKTYPE,
        ResourceType.character: tarfile.CHRTYPE,
        ResourceType.directory: tarfile.DIRTYPE,
        ResourceType.fifo: tarfile.FIFOTYPE,
        ResourceType.file: tarfile.REGTYPE,
        ResourceType.socket: tarfile.AREGTYPE,  # no type for socket
        ResourceType.symlink: tarfile.SYMTYPE,
        ResourceType.unknown: tarfile.AREGTYPE,  # no type for unknown
    }

    mode = 'w:{}'.format(compression or '')
    try:
        _tar = tarfile.open(fileobj=file, mode=mode)
    except (TypeError, AttributeError):
        _tar = tarfile.open(file, mode=mode)

    walker = walker or Walker()
    with _tar:
        gen_walk = walker.info(src_fs,
                               namespaces=["details", "stat", "access"])
        for path, info in gen_walk:
            # Tar names must be relative
            tar_name = relpath(path)
            if not six.PY3:
                # Python2 expects bytes filenames
                tar_name = tar_name.encode(encoding, 'replace')

            tar_info = tarfile.TarInfo(tar_name)

            if info.has_namespace('stat'):
                mtime = info.get('stat', 'st_mtime', None)\
                                    or time.time()
            else:
                mtime = info.modified or time.time()

            if isinstance(mtime, datetime):
                mtime = datetime_to_epoch(mtime)
            if isinstance(mtime, float):
                mtime = int(mtime)
            tar_info.mtime = mtime

            for tarattr, infoattr in {
                    'uid': 'uid',
                    'gid': 'gid',
                    'uname': 'user',
                    'gname': 'group'
            }.items():
                if getattr(info, infoattr) is not None:
                    setattr(tar_info, tarattr, getattr(info, infoattr))

            tar_info.mode = getattr(info.permissions, 'mode', 420)

            if info.is_dir:
                tar_info.type = tarfile.DIRTYPE
                _tar.addfile(tar_info)
            else:
                tar_info.type = type_map.get(info.type, tarfile.REGTYPE)
                tar_info.size = info.size
                with src_fs.openbin(path) as bin_file:
                    _tar.addfile(tar_info, bin_file)
Beispiel #7
0
class UiRequestPlugin(object):
    def formatTableRow(self, row, class_name=""):
        back = []
        for format, val in row:
            if val is None:
                formatted = "n/a"
            elif format == "since":
                if val:
                    formatted = "%.0f" % (time.time() - val)
                else:
                    formatted = "n/a"
            else:
                formatted = format % val
            back.append("<td>%s</td>" % formatted)
        return "<tr class='%s'>%s</tr>" % (class_name.encode("utf8"),
                                           "".join(back).encode("utf8"))

    def getObjSize(self, obj, hpy=None):
        if hpy:
            return float(hpy.iso(obj).domisize) / 1024
        else:
            return 0

    # /Stats entry point
    def actionStats(self):
        import gc
        import sys
        from Ui import UiRequest
        from Db import Db
        from Crypt import CryptConnection

        hpy = None
        if self.get.get("size") == "1":  # Calc obj size
            try:
                import guppy
                hpy = guppy.hpy()
            except:
                pass
        self.sendHeader()

        if "Multiuser" in PluginManager.plugin_manager.plugin_names and not config.multiuser_local:
            yield "This function is disabled on this proxy"
            raise StopIteration

        s = time.time()
        main = sys.modules["main"]

        # Style
        yield """
        <style>
         * { font-family: monospace }
         table td, table th { text-align: right; padding: 0px 10px }
         .connections td { white-space: nowrap }
         .serving-False { opacity: 0.3 }
        </style>
        """

        # Memory
        yield "rev%s | " % config.rev
        yield "%s | " % main.file_server.ip_external_list
        yield "Port: %s | " % main.file_server.port
        yield "IP Network: %s | " % main.file_server.supported_ip_types
        yield "Opened: %s | " % main.file_server.port_opened
        yield "Crypt: %s | " % CryptConnection.manager.crypt_supported
        yield "In: %.2fMB, Out: %.2fMB  | " % (
            float(main.file_server.bytes_recv) / 1024 / 1024,
            float(main.file_server.bytes_sent) / 1024 / 1024)
        yield "Peerid: %s  | " % main.file_server.peer_id
        yield "Time correction: %.2fs" % main.file_server.getTimecorrection()

        try:
            import psutil
            process = psutil.Process(os.getpid())
            mem = process.get_memory_info()[0] / float(2**20)
            yield "Mem: %.2fMB | " % mem
            yield "Threads: %s | " % len(process.threads())
            yield "CPU: usr %.2fs sys %.2fs | " % process.cpu_times()
            yield "Files: %s | " % len(process.open_files())
            yield "Sockets: %s | " % len(process.connections())
            yield "Calc size <a href='?size=1'>on</a> <a href='?size=0'>off</a>"
        except Exception:
            pass
        yield "<br>"

        # Connections
        yield "<b>Connections</b> (%s, total made: %s, in: %s, out: %s):<br>" % (
            len(main.file_server.connections),
            main.file_server.last_connection_id, main.file_server.num_incoming,
            main.file_server.num_outgoing)
        yield "<table class='connections'><tr> <th>id</th> <th>type</th> <th>ip</th> <th>open</th> <th>crypt</th> <th>ping</th>"
        yield "<th>buff</th> <th>bad</th> <th>idle</th> <th>open</th> <th>delay</th> <th>cpu</th> <th>out</th> <th>in</th> <th>last sent</th>"
        yield "<th>wait</th> <th>version</th> <th>time</th> <th>sites</th> </tr>"
        for connection in main.file_server.connections:
            if "cipher" in dir(connection.sock):
                cipher = connection.sock.cipher()[0]
                tls_version = connection.sock.version()
            else:
                cipher = connection.crypt
                tls_version = ""
            if "time" in connection.handshake and connection.last_ping_delay:
                time_correction = connection.handshake[
                    "time"] - connection.handshake_time - connection.last_ping_delay
            else:
                time_correction = 0.0
            yield self.formatTableRow([
                ("%3d", connection.id), ("%s", connection.type),
                ("%s:%s", (connection.ip, connection.port)),
                ("%s", connection.handshake.get("port_opened")),
                ("<span title='%s %s'>%s</span>", (cipher, tls_version,
                                                   connection.crypt)),
                ("%6.3f", connection.last_ping_delay),
                ("%s", connection.incomplete_buff_recv),
                ("%s", connection.bad_actions),
                ("since",
                 max(connection.last_send_time, connection.last_recv_time)),
                ("since", connection.start_time),
                ("%.3f",
                 max(-1,
                     connection.last_sent_time - connection.last_send_time)),
                ("%.3f", connection.cpu_time),
                ("%.0fkB", connection.bytes_sent / 1024),
                ("%.0fkB", connection.bytes_recv / 1024),
                ("<span title='Recv: %s'>%s</span>",
                 (connection.last_cmd_recv, connection.last_cmd_sent)),
                ("%s", connection.waiting_requests.keys()),
                ("%s r%s", (connection.handshake.get("version"),
                            connection.handshake.get("rev", "?"))),
                ("%.2fs", time_correction), ("%s", connection.sites)
            ])
        yield "</table>"

        # Trackers
        yield "<br><br><b>Trackers:</b><br>"
        yield "<table class='trackers'><tr> <th>address</th> <th>request</th> <th>successive errors</th> <th>last_request</th></tr>"
        for tracker_address, tracker_stat in sorted(
                sys.modules["Site.SiteAnnouncer"].global_stats.iteritems()):
            yield self.formatTableRow([
                ("%s", tracker_address), ("%s", tracker_stat["num_request"]),
                ("%s", tracker_stat["num_error"]),
                ("%.0f min ago",
                 min(999, (time.time() - tracker_stat["time_request"]) / 60))
            ])
        yield "</table>"

        if "AnnounceShare" in PluginManager.plugin_manager.plugin_names:
            yield "<br><br><b>Shared trackers:</b><br>"
            yield "<table class='trackers'><tr> <th>address</th> <th>added</th> <th>found</th> <th>latency</th> <th>successive errors</th> <th>last_success</th></tr>"
            from AnnounceShare import AnnounceSharePlugin
            for tracker_address, tracker_stat in sorted(
                    AnnounceSharePlugin.tracker_storage.getTrackers(
                    ).iteritems()):
                yield self.formatTableRow([
                    ("%s", tracker_address),
                    ("%.0f min ago",
                     min(999,
                         (time.time() - tracker_stat["time_added"]) / 60)),
                    ("%.0f min ago",
                     min(999,
                         (time.time() - tracker_stat.get("time_found", 0)) /
                         60)),
                    ("%.3fs", tracker_stat["latency"]),
                    ("%s", tracker_stat["num_error"]),
                    ("%.0f min ago",
                     min(999,
                         (time.time() - tracker_stat["time_success"]) / 60)),
                ])
            yield "</table>"

        # Tor hidden services
        yield "<br><br><b>Tor hidden services (status: %s):</b><br>" % main.file_server.tor_manager.status.encode(
            "utf8")
        for site_address, onion in main.file_server.tor_manager.site_onions.items(
        ):
            yield "- %-34s: %s<br>" % (site_address, onion.encode("utf8"))

        # Db
        yield "<br><br><b>Db</b>:<br>"
        for db in sys.modules["Db.Db"].opened_dbs:
            tables = [
                row["name"] for row in db.execute(
                    "SELECT name FROM sqlite_master WHERE type = 'table'").
                fetchall()
            ]
            table_rows = {}
            for table in tables:
                table_rows[table] = db.execute("SELECT COUNT(*) AS c FROM %s" %
                                               table).fetchone()["c"]
            db_size = os.path.getsize(db.db_path) / 1024.0 / 1024.0
            yield "- %.3fs: %s %.3fMB, table rows: %s<br>" % (
                time.time() - db.last_query_time, db.db_path.encode("utf8"),
                db_size, json.dumps(table_rows, sort_keys=True))

        # Sites
        yield "<br><br><b>Sites</b>:"
        yield "<table>"
        yield "<tr><th>address</th> <th>connected</th> <th title='connected/good/total'>peers</th> <th>content.json</th> <th>out</th> <th>in</th>  </tr>"
        for site in sorted(self.server.sites.values(),
                           lambda a, b: cmp(a.address, b.address)):
            yield self.formatTableRow([
                ("""<a href='#' onclick='document.getElementById("peers_%s").style.display="initial"; return false'>%s</a>""",
                 (site.address, site.address)),
                ("%s", [
                    peer.connection.id for peer in site.peers.values()
                    if peer.connection and peer.connection.connected
                ]),
                ("%s/%s/%s", (len([
                    peer for peer in site.peers.values()
                    if peer.connection and peer.connection.connected
                ]), len(site.getConnectablePeers(100)), len(site.peers))),
                ("%s (loaded: %s)",
                 (len(site.content_manager.contents),
                  len([
                      key for key, val in dict(
                          site.content_manager.contents).iteritems() if val
                  ]))),
                ("%.0fkB", site.settings.get("bytes_sent", 0) / 1024),
                ("%.0fkB", site.settings.get("bytes_recv", 0) / 1024),
            ], "serving-%s" % site.settings["serving"])
            yield "<tr><td id='peers_%s' style='display: none; white-space: pre' colspan=6>" % site.address
            for key, peer in site.peers.items():
                if peer.time_found:
                    time_found = int(time.time() - peer.time_found) / 60
                else:
                    time_found = "--"
                if peer.connection:
                    connection_id = peer.connection.id
                else:
                    connection_id = None
                if site.content_manager.has_optional_files:
                    yield "Optional files: %4s " % len(peer.hashfield)
                time_added = (time.time() - peer.time_added) / (60 * 60 * 24)
                yield "(#%4s, rep: %2s, err: %s, found: %3s min, add: %.1f day) %30s -<br>" % (
                    connection_id, peer.reputation, peer.connection_error,
                    time_found, time_added, key)
            yield "<br></td></tr>"
        yield "</table>"

        # Big files
        yield "<br><br><b>Big files</b>:<br>"
        for site in self.server.sites.values():
            if not site.settings.get("has_bigfile"):
                continue
            bigfiles = {}
            yield """<a href="#" onclick='document.getElementById("bigfiles_%s").style.display="initial"; return false'>%s</a><br>""" % (
                site.address, site.address)
            for peer in site.peers.values():
                if not peer.time_piecefields_updated:
                    continue
                for sha512, piecefield in peer.piecefields.iteritems():
                    if sha512 not in bigfiles:
                        bigfiles[sha512] = []
                    bigfiles[sha512].append(peer)

            yield "<div id='bigfiles_%s' style='display: none'>" % site.address
            for sha512, peers in bigfiles.iteritems():
                yield "<br> - " + sha512 + " (hash id: %s)<br>" % site.content_manager.hashfield.getHashId(
                    sha512)
                yield "<table>"
                for peer in peers:
                    yield "<tr><td>" + peer.key + "</td><td>" + peer.piecefields[
                        sha512].tostring() + "</td></tr>"
                yield "</table>"
            yield "</div>"

        # Cmd stats
        yield "<div style='float: left'>"
        yield "<br><br><b>Sent commands</b>:<br>"
        yield "<table>"
        for stat_key, stat in sorted(
                main.file_server.stat_sent.items(),
                lambda a, b: cmp(a[1]["bytes"], b[1]["bytes"]),
                reverse=True):
            yield "<tr><td>%s</td><td style='white-space: nowrap'>x %s =</td><td>%.0fkB</td></tr>" % (
                stat_key, stat["num"], stat["bytes"] / 1024)
        yield "</table>"
        yield "</div>"

        yield "<div style='float: left; margin-left: 20%; max-width: 50%'>"
        yield "<br><br><b>Received commands</b>:<br>"
        yield "<table>"
        for stat_key, stat in sorted(
                main.file_server.stat_recv.items(),
                lambda a, b: cmp(a[1]["bytes"], b[1]["bytes"]),
                reverse=True):
            yield "<tr><td>%s</td><td style='white-space: nowrap'>x %s =</td><td>%.0fkB</td></tr>" % (
                stat_key, stat["num"], stat["bytes"] / 1024)
        yield "</table>"
        yield "</div>"
        yield "<div style='clear: both'></div>"

        # No more if not in debug mode
        if not config.debug:
            raise StopIteration

        # Object types

        obj_count = {}
        for obj in gc.get_objects():
            obj_type = str(type(obj))
            if obj_type not in obj_count:
                obj_count[obj_type] = [0, 0]
            obj_count[obj_type][0] += 1  # Count
            obj_count[obj_type][1] += float(sys.getsizeof(obj)) / 1024  # Size

        yield "<br><br><b>Objects in memory (types: %s, total: %s, %.2fkb):</b><br>" % (
            len(obj_count), sum([stat[0] for stat in obj_count.values()]),
            sum([stat[1] for stat in obj_count.values()]))

        for obj, stat in sorted(obj_count.items(),
                                key=lambda x: x[1][0],
                                reverse=True):  # Sorted by count
            yield " - %.1fkb = %s x <a href=\"/Listobj?type=%s\">%s</a><br>" % (
                stat[1], stat[0], obj, cgi.escape(obj))

        # Classes

        class_count = {}
        for obj in gc.get_objects():
            obj_type = str(type(obj))
            if obj_type != "<type 'instance'>":
                continue
            class_name = obj.__class__.__name__
            if class_name not in class_count:
                class_count[class_name] = [0, 0]
            class_count[class_name][0] += 1  # Count
            class_count[class_name][1] += float(
                sys.getsizeof(obj)) / 1024  # Size

        yield "<br><br><b>Classes in memory (types: %s, total: %s, %.2fkb):</b><br>" % (
            len(class_count), sum([stat[0] for stat in class_count.values()]),
            sum([stat[1] for stat in class_count.values()]))

        for obj, stat in sorted(class_count.items(),
                                key=lambda x: x[1][0],
                                reverse=True):  # Sorted by count
            yield " - %.1fkb = %s x <a href=\"/Dumpobj?class=%s\">%s</a><br>" % (
                stat[1], stat[0], obj, cgi.escape(obj))

        from greenlet import greenlet
        objs = [obj for obj in gc.get_objects() if isinstance(obj, greenlet)]
        yield "<br>Greenlets (%s):<br>" % len(objs)
        for obj in objs:
            yield " - %.1fkb: %s<br>" % (self.getObjSize(
                obj, hpy), cgi.escape(repr(obj).encode("utf8")))

        from Worker import Worker
        objs = [obj for obj in gc.get_objects() if isinstance(obj, Worker)]
        yield "<br>Workers (%s):<br>" % len(objs)
        for obj in objs:
            yield " - %.1fkb: %s<br>" % (self.getObjSize(
                obj, hpy), cgi.escape(repr(obj)))

        from Connection import Connection
        objs = [obj for obj in gc.get_objects() if isinstance(obj, Connection)]
        yield "<br>Connections (%s):<br>" % len(objs)
        for obj in objs:
            yield " - %.1fkb: %s<br>" % (self.getObjSize(
                obj, hpy), cgi.escape(repr(obj)))

        from socket import socket
        objs = [obj for obj in gc.get_objects() if isinstance(obj, socket)]
        yield "<br>Sockets (%s):<br>" % len(objs)
        for obj in objs:
            yield " - %.1fkb: %s<br>" % (self.getObjSize(
                obj, hpy), cgi.escape(repr(obj)))

        from msgpack import Unpacker
        objs = [obj for obj in gc.get_objects() if isinstance(obj, Unpacker)]
        yield "<br>Msgpack unpacker (%s):<br>" % len(objs)
        for obj in objs:
            yield " - %.1fkb: %s<br>" % (self.getObjSize(
                obj, hpy), cgi.escape(repr(obj)))

        from Site import Site
        objs = [obj for obj in gc.get_objects() if isinstance(obj, Site)]
        yield "<br>Sites (%s):<br>" % len(objs)
        for obj in objs:
            yield " - %.1fkb: %s<br>" % (self.getObjSize(
                obj, hpy), cgi.escape(repr(obj)))

        objs = [
            obj for obj in gc.get_objects()
            if isinstance(obj, self.server.log.__class__)
        ]
        yield "<br>Loggers (%s):<br>" % len(objs)
        for obj in objs:
            yield " - %.1fkb: %s<br>" % (self.getObjSize(
                obj, hpy), cgi.escape(repr(obj.name)))

        objs = [obj for obj in gc.get_objects() if isinstance(obj, UiRequest)]
        yield "<br>UiRequests (%s):<br>" % len(objs)
        for obj in objs:
            yield " - %.1fkb: %s<br>" % (self.getObjSize(
                obj, hpy), cgi.escape(repr(obj)))

        from Peer import Peer
        objs = [obj for obj in gc.get_objects() if isinstance(obj, Peer)]
        yield "<br>Peers (%s):<br>" % len(objs)
        for obj in objs:
            yield " - %.1fkb: %s<br>" % (self.getObjSize(
                obj, hpy), cgi.escape(repr(obj)))

        objs = [(key, val) for key, val in sys.modules.iteritems()
                if val is not None]
        objs.sort()
        yield "<br>Modules (%s):<br>" % len(objs)
        for module_name, module in objs:
            yield " - %.3fkb: %s %s<br>" % (self.getObjSize(
                module, hpy), module_name, cgi.escape(repr(module)))

        gc.collect()  # Implicit grabage collection
        yield "Done in %.1f" % (time.time() - s)

    def actionDumpobj(self):

        import gc
        import sys

        self.sendHeader()

        if "Multiuser" in PluginManager.plugin_manager.plugin_names and not config.multiuser_local:
            yield "This function is disabled on this proxy"
            raise StopIteration

        # No more if not in debug mode
        if not config.debug:
            yield "Not in debug mode"
            raise StopIteration

        class_filter = self.get.get("class")

        yield """
        <style>
         * { font-family: monospace; white-space: pre }
         table * { text-align: right; padding: 0px 10px }
        </style>
        """

        objs = gc.get_objects()
        for obj in objs:
            obj_type = str(type(obj))
            if obj_type != "<type 'instance'>" or obj.__class__.__name__ != class_filter:
                continue
            yield "%.1fkb %s... " % (float(sys.getsizeof(obj)) / 1024,
                                     cgi.escape(str(obj)))
            for attr in dir(obj):
                yield "- %s: %s<br>" % (attr,
                                        cgi.escape(str(getattr(obj, attr))))
            yield "<br>"

        gc.collect()  # Implicit grabage collection

    def actionListobj(self):

        import gc
        import sys

        self.sendHeader()

        if "Multiuser" in PluginManager.plugin_manager.plugin_names and not config.multiuser_local:
            yield "This function is disabled on this proxy"
            raise StopIteration

        # No more if not in debug mode
        if not config.debug:
            yield "Not in debug mode"
            raise StopIteration

        type_filter = self.get.get("type")

        yield """
        <style>
         * { font-family: monospace; white-space: pre }
         table * { text-align: right; padding: 0px 10px }
        </style>
        """

        yield "Listing all %s objects in memory...<br>" % cgi.escape(
            type_filter)

        ref_count = {}
        objs = gc.get_objects()
        for obj in objs:
            obj_type = str(type(obj))
            if obj_type != type_filter:
                continue
            refs = [
                ref for ref in gc.get_referrers(obj) if
                hasattr(ref, "__class__") and ref.__class__.__name__ not in [
                    "list", "dict", "function", "type", "frame", "WeakSet",
                    "tuple"
                ]
            ]
            if not refs:
                continue
            try:
                yield "%.1fkb <span title=\"%s\">%s</span>... " % (
                    float(sys.getsizeof(obj)) / 1024, cgi.escape(
                        str(obj)), cgi.escape(str(obj)[0:100].ljust(100)))
            except:
                continue
            for ref in refs:
                yield " ["
                if "object at" in str(ref) or len(str(ref)) > 100:
                    yield str(ref.__class__.__name__)
                else:
                    yield str(ref.__class__.__name__) + ":" + cgi.escape(
                        str(ref))
                yield "] "
                ref_type = ref.__class__.__name__
                if ref_type not in ref_count:
                    ref_count[ref_type] = [0, 0]
                ref_count[ref_type][0] += 1  # Count
                ref_count[ref_type][1] += float(
                    sys.getsizeof(obj)) / 1024  # Size
            yield "<br>"

        yield "<br>Object referrer (total: %s, %.2fkb):<br>" % (
            len(ref_count), sum([stat[1] for stat in ref_count.values()]))

        for obj, stat in sorted(ref_count.items(),
                                key=lambda x: x[1][0],
                                reverse=True)[0:30]:  # Sorted by count
            yield " - %.1fkb = %s x %s<br>" % (stat[1], stat[0],
                                               cgi.escape(str(obj)))

        gc.collect()  # Implicit grabage collection

    def actionBenchmark(self):
        import sys
        import gc
        from contextlib import contextmanager

        output = self.sendHeader()

        if "Multiuser" in PluginManager.plugin_manager.plugin_names and not config.multiuser_local:
            yield "This function is disabled on this proxy"
            raise StopIteration

        @contextmanager
        def benchmark(name, standard):
            s = time.time()
            output("- %s" % name)
            try:
                yield 1
            except Exception, err:
                output("<br><b>! Error: %s</b><br>" % err)
            taken = time.time() - s
            if taken > 0:
                multipler = standard / taken
            else:
                multipler = 99
            if multipler < 0.3:
                speed = "Sloooow"
            elif multipler < 0.5:
                speed = "Ehh"
            elif multipler < 0.8:
                speed = "Goodish"
            elif multipler < 1.2:
                speed = "OK"
            elif multipler < 1.7:
                speed = "Fine"
            elif multipler < 2.5:
                speed = "Fast"
            elif multipler < 3.5:
                speed = "WOW"
            else:
                speed = "Insane!!"
            output("%.3fs [x%.2f: %s]<br>" % (taken, multipler, speed))
            time.sleep(0.01)

        yield """
        <style>
         * { font-family: monospace }
         table * { text-align: right; padding: 0px 10px }
        </style>
        """

        yield "Benchmarking Ainkuraddo %s (rev%s) Python %s on: %s...<br>" % (
            config.version, config.rev, sys.version, sys.platform)

        t = time.time()

        # CryptBitcoin
        yield "<br>CryptBitcoin:<br>"
        from Crypt import CryptBitcoin

        # seed = CryptBitcoin.newSeed()
        # yield "- Seed: %s<br>" % seed
        seed = "e180efa477c63b0f2757eac7b1cce781877177fe0966be62754ffd4c8592ce38"

        with benchmark("hdPrivatekey x 10", 0.7):
            for i in range(10):
                privatekey = CryptBitcoin.hdPrivatekey(seed, i * 10)
                yield "."
            valid = "5JsunC55XGVqFQj5kPGK4MWgTL26jKbnPhjnmchSNPo75XXCwtk"
            assert privatekey == valid, "%s != %s" % (privatekey, valid)

        data = "Hello" * 1024  # 5k
        with benchmark("sign x 10", 0.35):
            for i in range(10):
                yield "."
                sign = CryptBitcoin.sign(data, privatekey)
            valid = "G1GXaDauZ8vX/N9Jn+MRiGm9h+I94zUhDnNYFaqMGuOiBHB+kp4cRPZOL7l1yqK5BHa6J+W97bMjvTXtxzljp6w="
            assert sign == valid, "%s != %s" % (sign, valid)

        address = CryptBitcoin.privatekeyToAddress(privatekey)
        if CryptBitcoin.opensslVerify:  # Openssl avalible
            with benchmark("openssl verify x 100", 0.37):
                for i in range(100):
                    if i % 10 == 0:
                        yield "."
                    ok = CryptBitcoin.verify(data, address, sign)
                assert ok, "does not verify from %s" % address
        else:
            yield " - openssl verify x 100...not avalible :(<br>"

        openssl_verify_bk = CryptBitcoin.opensslVerify  # Emulate openssl not found in any way
        CryptBitcoin.opensslVerify = None
        with benchmark("pure-python verify x 10", 1.6):
            for i in range(10):
                yield "."
                ok = CryptBitcoin.verify(data, address, sign)
            assert ok, "does not verify from %s" % address
        CryptBitcoin.opensslVerify = openssl_verify_bk

        # CryptHash
        yield "<br>CryptHash:<br>"
        from Crypt import CryptHash
        from cStringIO import StringIO

        data = StringIO("Hello" * 1024 * 1024)  # 5m
        with benchmark("sha256 5M x 10", 0.6):
            for i in range(10):
                data.seek(0)
                hash = CryptHash.sha256sum(data)
                yield "."
            valid = "8cd629d9d6aff6590da8b80782a5046d2673d5917b99d5603c3dcb4005c45ffa"
            assert hash == valid, "%s != %s" % (hash, valid)

        data = StringIO("Hello" * 1024 * 1024)  # 5m
        with benchmark("sha512 5M x 10", 0.6):
            for i in range(10):
                data.seek(0)
                hash = CryptHash.sha512sum(data)
                yield "."
            valid = "9ca7e855d430964d5b55b114e95c6bbb114a6d478f6485df93044d87b108904d"
            assert hash == valid, "%s != %s" % (hash, valid)

        with benchmark("os.urandom(256) x 1000", 0.0065):
            for i in range(10):
                for y in range(100):
                    data = os.urandom(256)
                yield "."

        # Msgpack
        import msgpack
        yield "<br>Msgpack: (version: %s)<br>" % ".".join(
            map(str, msgpack.version))
        binary = 'fqv\xf0\x1a"e\x10,\xbe\x9cT\x9e(\xa5]u\x072C\x8c\x15\xa2\xa8\x93Sw)\x19\x02\xdd\t\xfb\xf67\x88\xd9\xee\x86\xa1\xe4\xb6,\xc6\x14\xbb\xd7$z\x1d\xb2\xda\x85\xf5\xa0\x97^\x01*\xaf\xd3\xb0!\xb7\x9d\xea\x89\xbbh8\xa1"\xa7]e(@\xa2\xa5g\xb7[\xae\x8eE\xc2\x9fL\xb6s\x19\x19\r\xc8\x04S\xd0N\xe4]?/\x01\xea\xf6\xec\xd1\xb3\xc2\x91\x86\xd7\xf4K\xdf\xc2lV\xf4\xe8\x80\xfc\x8ep\xbb\x82\xb3\x86\x98F\x1c\xecS\xc8\x15\xcf\xdc\xf1\xed\xfc\xd8\x18r\xf9\x80\x0f\xfa\x8cO\x97(\x0b]\xf1\xdd\r\xe7\xbf\xed\x06\xbd\x1b?\xc5\xa0\xd7a\x82\xf3\xa8\xe6@\xf3\ri\xa1\xb10\xf6\xd4W\xbc\x86\x1a\xbb\xfd\x94!bS\xdb\xaeM\x92\x00#\x0b\xf7\xad\xe9\xc2\x8e\x86\xbfi![%\xd31]\xc6\xfc2\xc9\xda\xc6v\x82P\xcc\xa9\xea\xb9\xff\xf6\xc8\x17iD\xcf\xf3\xeeI\x04\xe9\xa1\x19\xbb\x01\x92\xf5nn4K\xf8\xbb\xc6\x17e>\xa7 \xbbv'
        data = {
            "int": 1024 * 1024 * 1024,
            "float": 12345.67890,
            "text": "hello" * 1024,
            "binary": binary
        }
        with benchmark("pack 5K x 10 000", 0.78):
            for i in range(10):
                for y in range(1000):
                    data_packed = msgpack.packb(data)
                yield "."
            valid = """\x84\xa3int\xce@\x00\x00\x00\xa4text\xda\x14\x00hellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohello\xa5float\xcb@\xc8\x1c\xd6\xe61\xf8\xa1\xa6binary\xda\x01\x00fqv\xf0\x1a"e\x10,\xbe\x9cT\x9e(\xa5]u\x072C\x8c\x15\xa2\xa8\x93Sw)\x19\x02\xdd\t\xfb\xf67\x88\xd9\xee\x86\xa1\xe4\xb6,\xc6\x14\xbb\xd7$z\x1d\xb2\xda\x85\xf5\xa0\x97^\x01*\xaf\xd3\xb0!\xb7\x9d\xea\x89\xbbh8\xa1"\xa7]e(@\xa2\xa5g\xb7[\xae\x8eE\xc2\x9fL\xb6s\x19\x19\r\xc8\x04S\xd0N\xe4]?/\x01\xea\xf6\xec\xd1\xb3\xc2\x91\x86\xd7\xf4K\xdf\xc2lV\xf4\xe8\x80\xfc\x8ep\xbb\x82\xb3\x86\x98F\x1c\xecS\xc8\x15\xcf\xdc\xf1\xed\xfc\xd8\x18r\xf9\x80\x0f\xfa\x8cO\x97(\x0b]\xf1\xdd\r\xe7\xbf\xed\x06\xbd\x1b?\xc5\xa0\xd7a\x82\xf3\xa8\xe6@\xf3\ri\xa1\xb10\xf6\xd4W\xbc\x86\x1a\xbb\xfd\x94!bS\xdb\xaeM\x92\x00#\x0b\xf7\xad\xe9\xc2\x8e\x86\xbfi![%\xd31]\xc6\xfc2\xc9\xda\xc6v\x82P\xcc\xa9\xea\xb9\xff\xf6\xc8\x17iD\xcf\xf3\xeeI\x04\xe9\xa1\x19\xbb\x01\x92\xf5nn4K\xf8\xbb\xc6\x17e>\xa7 \xbbv"""
            assert data_packed == valid, "%s<br>!=<br>%s" % (repr(data_packed),
                                                             repr(valid))

        with benchmark("unpack 5K x 10 000", 1.2):
            for i in range(10):
                for y in range(1000):
                    data_unpacked = msgpack.unpackb(data_packed)
                yield "."
            assert data == data_unpacked, "%s != %s" % (data_unpacked, data)

        with benchmark("streaming unpack 5K x 10 000", 1.4):
            for i in range(10):
                unpacker = msgpack.Unpacker()
                for y in range(1000):
                    unpacker.feed(data_packed)
                    for data_unpacked in unpacker:
                        pass
                yield "."
            assert data == data_unpacked, "%s != %s" % (data_unpacked, data)

        # Db
        from Db import Db
        import sqlite3
        yield "<br>Db: (version: %s, API: %s)<br>" % (sqlite3.sqlite_version,
                                                      sqlite3.version)

        schema = {
            "db_name": "TestDb",
            "db_file": "%s/benchmark.db" % config.data_dir,
            "maps": {
                ".*": {
                    "to_table": {
                        "test": "test"
                    }
                }
            },
            "tables": {
                "test": {
                    "cols": [["test_id", "INTEGER"], ["title", "TEXT"],
                             ["json_id", "INTEGER REFERENCES json (json_id)"]],
                    "indexes":
                    ["CREATE UNIQUE INDEX test_key ON test(test_id, json_id)"],
                    "schema_changed":
                    1426195822
                }
            }
        }

        if os.path.isfile("%s/benchmark.db" % config.data_dir):
            os.unlink("%s/benchmark.db" % config.data_dir)

        with benchmark("Open x 10", 0.13):
            for i in range(10):
                db = Db(schema, "%s/benchmark.db" % config.data_dir)
                db.checkTables()
                db.close()
                yield "."

        db = Db(schema, "%s/benchmark.db" % config.data_dir)
        db.checkTables()
        import json

        with benchmark("Insert x 10 x 1000", 1.0):
            for u in range(10):  # 10 user
                data = {"test": []}
                for i in range(1000):  # 1000 line of data
                    data["test"].append({
                        "test_id":
                        i,
                        "title":
                        "Testdata for %s message %s" % (u, i)
                    })
                json.dump(data,
                          open("%s/test_%s.json" % (config.data_dir, u), "w"))
                db.updateJson("%s/test_%s.json" % (config.data_dir, u))
                os.unlink("%s/test_%s.json" % (config.data_dir, u))
                yield "."

        with benchmark("Buffered insert x 100 x 100", 1.3):
            cur = db.getCursor()
            cur.execute("BEGIN")
            cur.logging = False
            for u in range(100, 200):  # 100 user
                data = {"test": []}
                for i in range(100):  # 1000 line of data
                    data["test"].append({
                        "test_id":
                        i,
                        "title":
                        "Testdata for %s message %s" % (u, i)
                    })
                json.dump(data,
                          open("%s/test_%s.json" % (config.data_dir, u), "w"))
                db.updateJson("%s/test_%s.json" % (config.data_dir, u),
                              cur=cur)
                os.unlink("%s/test_%s.json" % (config.data_dir, u))
                if u % 10 == 0:
                    yield "."
            cur.execute("COMMIT")

        yield " - Total rows in db: %s<br>" % db.execute(
            "SELECT COUNT(*) AS num FROM test").fetchone()[0]

        with benchmark("Indexed query x 1000", 0.25):
            found = 0
            cur = db.getCursor()
            cur.logging = False
            for i in range(1000):  # 1000x by test_id
                res = cur.execute("SELECT * FROM test WHERE test_id = %s" % i)
                for row in res:
                    found += 1
                if i % 100 == 0:
                    yield "."

            assert found == 20000, "Found: %s != 20000" % found

        with benchmark("Not indexed query x 100", 0.6):
            found = 0
            cur = db.getCursor()
            cur.logging = False
            for i in range(100):  # 1000x by test_id
                res = cur.execute("SELECT * FROM test WHERE json_id = %s" % i)
                for row in res:
                    found += 1
                if i % 10 == 0:
                    yield "."

            assert found == 18900, "Found: %s != 18900" % found

        with benchmark("Like query x 100", 1.8):
            found = 0
            cur = db.getCursor()
            cur.logging = False
            for i in range(100):  # 1000x by test_id
                res = cur.execute(
                    "SELECT * FROM test WHERE title LIKE '%%message %s%%'" % i)
                for row in res:
                    found += 1
                if i % 10 == 0:
                    yield "."

            assert found == 38900, "Found: %s != 11000" % found

        db.close()
        if os.path.isfile("%s/benchmark.db" % config.data_dir):
            os.unlink("%s/benchmark.db" % config.data_dir)

        gc.collect()  # Implicit grabage collection

        # Zip
        yield "<br>Compression:<br>"
        import zipfile
        test_data = "Test" * 1024
        file_name = "\xc3\x81rv\xc3\xadzt\xc5\xb0r\xc5\x91t\xc3\xbck\xc3\xb6r\xc3\xb3g\xc3\xa9p\xe4\xb8\xad\xe5\x8d\x8e%s.txt"

        with benchmark("Zip pack x 10", 0.12):
            for i in range(10):
                with zipfile.ZipFile('%s/test.zip' % config.data_dir,
                                     'w') as archive:
                    for y in range(100):
                        zip_info = zipfile.ZipInfo(file_name % y,
                                                   (1980, 1, 1, 0, 0, 0))
                        zip_info.compress_type = zipfile.ZIP_DEFLATED
                        zip_info.create_system = 3
                        archive.writestr(zip_info, test_data)
                yield "."

            hash = CryptHash.sha512sum(
                open("%s/test.zip" % config.data_dir, "rb"))
            valid = "f6ef623e6653883a1758db14aa593350e26c9dc53a8406d6e6defd6029dbd483"
            assert hash == valid, "Invalid hash: %s != %s<br>" % (hash, valid)

        with benchmark("Zip unpack x 10", 0.2):
            for i in range(10):
                with zipfile.ZipFile('%s/test.zip' %
                                     config.data_dir) as archive:
                    for y in range(100):
                        assert archive.read(file_name % y) == test_data
                yield "."

        if os.path.isfile("%s/test.zip" % config.data_dir):
            os.unlink("%s/test.zip" % config.data_dir)

        # Tar.gz
        import tarfile
        import struct

        # Monkey patch _init_write_gz to use fixed date in order to keep the hash independent from datetime
        def nodate_write_gzip_header(self):
            self.mtime = 0
            original_write_gzip_header(self)

        import gzip
        original_write_gzip_header = gzip.GzipFile._write_gzip_header
        gzip.GzipFile._write_gzip_header = nodate_write_gzip_header

        test_data_io = StringIO("Test" * 1024)
        with benchmark("Tar.gz pack x 10", 0.3):
            for i in range(10):
                with tarfile.open('%s/test.tar.gz' % config.data_dir,
                                  'w:gz') as archive:
                    for y in range(100):
                        test_data_io.seek(0)
                        tar_info = tarfile.TarInfo(file_name % y)
                        tar_info.size = 4 * 1024
                        archive.addfile(tar_info, test_data_io)
                yield "."

            hash = CryptHash.sha512sum(
                open("%s/test.tar.gz" % config.data_dir, "rb"))
            valid = "4704ebd8c987ed6f833059f1de9c475d443b0539b8d4c4cb8b49b26f7bbf2d19"
            assert hash == valid, "Invalid hash: %s != %s<br>" % (hash, valid)

        with benchmark("Tar.gz unpack x 10", 0.2):
            for i in range(10):
                with tarfile.open('%s/test.tar.gz' % config.data_dir,
                                  'r:gz') as archive:
                    for y in range(100):
                        assert archive.extractfile(file_name %
                                                   y).read() == test_data
                yield "."

        if os.path.isfile("%s/test.tar.gz" % config.data_dir):
            os.unlink("%s/test.tar.gz" % config.data_dir)

        # Tar.bz2
        import tarfile
        test_data_io = StringIO("Test" * 1024)
        with benchmark("Tar.bz2 pack x 10", 2.0):
            for i in range(10):
                with tarfile.open('%s/test.tar.bz2' % config.data_dir,
                                  'w:bz2') as archive:
                    for y in range(100):
                        test_data_io.seek(0)
                        tar_info = tarfile.TarInfo(file_name % y)
                        tar_info.size = 4 * 1024
                        archive.addfile(tar_info, test_data_io)
                yield "."

            hash = CryptHash.sha512sum(
                open("%s/test.tar.bz2" % config.data_dir, "rb"))
            valid = "90cba0b4d9abaa37b830bf37e4adba93bfd183e095b489ebee62aaa94339f3b5"
            assert hash == valid, "Invalid hash: %s != %s<br>" % (hash, valid)

        with benchmark("Tar.bz2 unpack x 10", 0.5):
            for i in range(10):
                with tarfile.open('%s/test.tar.bz2' % config.data_dir,
                                  'r:bz2') as archive:
                    for y in range(100):
                        assert archive.extractfile(file_name %
                                                   y).read() == test_data
                yield "."

        if os.path.isfile("%s/test.tar.bz2" % config.data_dir):
            os.unlink("%s/test.tar.bz2" % config.data_dir)

        yield "<br>Done. Total: %.2fs" % (time.time() - t)
Beispiel #8
0
 def add_file(self, file_object, filename):
     self._archive.addfile(tarfile.TarInfo(), fileobj=file_object)
Beispiel #9
0
def create_tar(files, output_folder=None):
    '''create_memory_tar will take a list of files (each a dictionary
    with name, permission, and content) and write the tarfile
    (a sha256 sum name is used) to the output_folder.
    If there is no output folde specified, the
    tar is written to a temporary folder.
    '''
    if output_folder is None:
        output_folder = tempfile.mkdtemp()

    finished_tar = None
    additions = []
    contents = []

    for entity in files:
        info = tarfile.TarInfo(name=entity['name'])
        info.mode = entity['mode']
        info.mtime = int(datetime.datetime.now().strftime('%s'))
        info.uid = entity["uid"]
        info.gid = entity["gid"]
        info.uname = entity["uname"]
        info.gname = entity["gname"]

        # Get size from stringIO write
        filey = io.StringIO()
        content = None
        try:  # python3
            info.size = filey.write(entity['content'])
            content = io.BytesIO(entity['content'].encode('utf8'))
        except Exception:  # python2
            info.size = int(filey.write(entity['content'].decode('utf-8')))
            content = io.BytesIO(entity['content'].encode('utf8'))
        pass

        if content is not None:
            addition = {'content': content,
                        'info': info}
            additions.append(addition)
            contents.append(content)

    # Now generate the sha256 name based on content
    if len(additions) > 0:
        hashy = get_content_hash(contents)
        finished_tar = "%s/sha256:%s.tar.gz" % (output_folder, hashy)

        # Warn the user if it already exists
        if os.path.exists(finished_tar):
            msg = "metadata file %s already exists " % finished_tar
            msg += "will over-write."
            bot.debug(msg)

        # Add all content objects to file
        tar = tarfile.open(finished_tar, "w:gz")
        for a in additions:
            tar.addfile(a["info"], a["content"])
        tar.close()

    else:
        msg = "No contents, environment or labels"
        msg += " for tarfile, will not generate."
        bot.debug(msg)

    return finished_tar
Beispiel #10
0
def GetAndUploadStateDeltaDiff(blockNum, lastBlockNum):
    global start
    # check if there is diff and buffer the diff_output
    bashCommand = "aws s3 sync --dryrun --delete temp/persistence/stateDelta " + getBucketString(
        PERSISTENCE_SNAPSHOT_NAME) + "/persistence/stateDelta"
    process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
    diff_output, error = process.communicate()
    str_diff_output = diff_output.decode("utf-8")
    if re.match(r'^\s*$', str_diff_output):
        logging.warning("No state delta diff, interesting...")
        tf = tarfile.open("stateDelta_" + str(blockNum) + ".tar.gz",
                          mode="w:gz")
        t = tarfile.TarInfo("stateDelta_" + str(blockNum))
        t.type = tarfile.DIRTYPE
        tf.addfile(t)
        tf.close()
        bashCommand = "aws s3 cp stateDelta_" + str(
            blockNum) + ".tar.gz " + getBucketString(
                STATEDELTA_DIFF_NAME) + "/stateDelta_" + str(
                    blockNum) + ".tar.gz"
        process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
        output, error = process.communicate()
        logging.info("DUMMY upload: State-delta Diff for new txBlk :" +
                     str(blockNum) + ") in Remote S3 bucket: " +
                     getBucketString(STATEDELTA_DIFF_NAME) + " is Synced")
        os.remove("stateDelta_" + str(blockNum) + ".tar.gz")
        start = (int)(
            time.time())  # reset inactive start time - delta was uploaded
        return 1

    if (blockNum % NUM_FINAL_BLOCK_PER_POW == 0 or (lastBlockNum == 0)):
        # we dont need to upload diff here. Instead complete stateDelta
        tf = tarfile.open("stateDelta_" + str(blockNum) + ".tar.gz",
                          mode="w:gz")
        tf.add("temp/persistence/stateDelta",
               arcname=os.path.basename("persistence/stateDelta_" +
                                        str(blockNum)))
        tf.close()
        bashCommand = "aws s3 cp stateDelta_" + str(
            blockNum) + ".tar.gz " + getBucketString(
                STATEDELTA_DIFF_NAME) + "/stateDelta_" + str(
                    blockNum) + ".tar.gz"
        process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
        output, error = process.communicate()
        logging.info("New state-delta snapshot for new ds epoch (TXBLK:" +
                     str(blockNum) + ") in Remote S3 bucket: " +
                     getBucketString(STATEDELTA_DIFF_NAME) + " is Synced")
        os.remove("stateDelta_" + str(blockNum) + ".tar.gz")
        start = (int)(
            time.time())  # reset inactive start time - delta was uploaded
        return 0

    str_diff_output = str_diff_output.strip()
    splitted = str_diff_output.split('\n')
    result = []
    if (len(splitted) > 0):
        for x in splitted:
            tok = x.split(' ')
            # skip deleted files
            if (len(tok) >= 3 and tok[1] == "upload:"):
                result.append(tok[2])

        tf = tarfile.open("stateDelta_" + str(blockNum) + ".tar.gz",
                          mode="w:gz")
        for x in result:
            tf.add(x,
                   arcname="stateDelta_" + str(blockNum) + "/" + path_leaf(x))
        tf.close()
        bashCommand = "aws s3 cp stateDelta_" + str(
            blockNum) + ".tar.gz " + getBucketString(
                STATEDELTA_DIFF_NAME) + "/stateDelta_" + str(
                    blockNum) + ".tar.gz"
        process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
        output, error = process.communicate()
        logging.info("State-delta Diff for new txBlk :" + str(blockNum) +
                     ") in Remote S3 bucket: " +
                     getBucketString(STATEDELTA_DIFF_NAME) + " is Synced")
        os.remove("stateDelta_" + str(blockNum) + ".tar.gz")
        start = (int)(
            time.time())  # reset inactive start time - delta was uploaded
        return 0  #success
    return 1
 def _add_file(f, fname, buf):
     info = tarfile.TarInfo(fname)
     info.size = len(buf)
     f.addfile(info, BytesIO(buf))
Beispiel #12
0
def SyncLocalToS3Persistence(blockNum, lastBlockNum):

    # Try uploading stateDelta diff to S3
    result = GetAndUploadStateDeltaDiff(blockNum, lastBlockNum)

    # Try syncing S3 with latest persistence only if NUM_DSBLOCK blocks have crossed.
    if ((blockNum + 1) % (NUM_DSBLOCK * NUM_FINAL_BLOCK_PER_POW) == 0
            or lastBlockNum == 0):
        bashCommand = "aws s3 sync --delete temp/persistence " + getBucketString(
            PERSISTENCE_SNAPSHOT_NAME
        ) + "/persistence --exclude 'diagnosticNodes/*' --exclude 'diagnosticCoinb/*' "
        process = subprocess.Popen(bashCommand,
                                   universal_newlines=True,
                                   shell=True,
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.STDOUT)
        output, error = process.communicate()
        if re.match(r'^\s*$', output):
            logging.warning("No entire persistence diff, interesting...")
        else:
            logging.info("Remote S3 bucket: " +
                         getBucketString(PERSISTENCE_SNAPSHOT_NAME) +
                         "/persistence is entirely Synced")
        # clear the state-delta bucket now.
        if (lastBlockNum != 0):
            CleanS3StateDeltas()
            CleanS3PersistenceDiffs()
    elif (result == 0):
        # we still need to sync persistence except for state, stateroot, contractCode, contractStateData, contractStateIndex so that next time for next blocknum we can get statedelta diff and persistence diff correctly
        bashCommand = "aws s3 sync --delete temp/persistence " + getBucketString(
            PERSISTENCE_SNAPSHOT_NAME
        ) + "/persistence --exclude '*' --include 'microBlockKeys/*' --include 'microBlocks*' --include 'dsBlocks/*' --include 'minerInfoDSComm/*' --include 'minerInfoShards/*' --include 'dsCommittee/*' --include 'shardStructure/*' --include 'txBlocks/*' --include 'VCBlocks/*' --include 'blockLinks/*' --include 'metaData/*' --include 'stateDelta/*' --include 'txEpochs/*' --include 'txBodies*' --include 'extSeedPubKeys/*' "
        process = subprocess.Popen(bashCommand,
                                   universal_newlines=True,
                                   shell=True,
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.STDOUT)
        str_diff_output, error = process.communicate()
        logging.info(
            "Remote S3 bucket: " + getBucketString(PERSISTENCE_SNAPSHOT_NAME) +
            "/persistence is Synced without state/stateRoot/contractCode/contractStateData/contractStateIndex"
        )

        if re.match(
                r'^\s*$', str_diff_output
        ):  # if output of sync command is either empty or just whitespaces
            print("No persistence diff, interesting...")
            tf = tarfile.open("diff_persistence_" + str(blockNum) + ".tar.gz",
                              mode="w:gz")
            t = tarfile.TarInfo("diff_persistence_" + str(blockNum))
            t.type = tarfile.DIRTYPE
            tf.addfile(t)
            tf.close()
            bashCommand = "aws s3 cp diff_persistence_" + str(
                blockNum) + ".tar.gz " + getBucketString(
                    PERSISTENCE_SNAPSHOT_NAME) + "/diff_persistence_" + str(
                        blockNum) + ".tar.gz"
            process = subprocess.Popen(bashCommand.split(),
                                       stdout=subprocess.PIPE)
            output, error = process.communicate()
            logging.info("DUMMY upload: persistence Diff for new txBlk :" +
                         str(blockNum) + ") in Remote S3 bucket: " +
                         getBucketString(PERSISTENCE_SNAPSHOT_NAME) +
                         " is Synced")
            os.remove("diff_persistence_" + str(blockNum) + ".tar.gz")
        else:
            str_diff_output = str_diff_output.strip()
            splitted = str_diff_output.split('\n')
            result = []
            if (len(splitted) > 0):
                for x in splitted:
                    tok = x.split(' ')
                    # skip deleted files
                    if (len(tok) >= 3 and tok[0] == "upload:"):
                        result.append(tok[1])

            tf = tarfile.open("diff_persistence_" + str(blockNum) + ".tar.gz",
                              mode="w:gz")
            for x in result:
                print(x)
                tf.add(x,
                       arcname="diff_persistence_" + str(blockNum) + "/" +
                       x.split("persistence/", 1)[1])
            tf.close()
            bashCommand = "aws s3 cp diff_persistence_" + str(
                blockNum) + ".tar.gz " + getBucketString(
                    PERSISTENCE_SNAPSHOT_NAME) + "/diff_persistence_" + str(
                        blockNum) + ".tar.gz"
            process = subprocess.Popen(bashCommand.split(),
                                       stdout=subprocess.PIPE)
            output, error = process.communicate()
            logging.info(
                "Persistence Diff for new txBlk :" + str(blockNum) +
                ") in Remote S3 bucket: " +
                getBucketString(PERSISTENCE_SNAPSHOT_NAME) +
                " is Synced without state/stateroot/contractCode/contractStateData/contractStateIndex"
            )
            os.remove("diff_persistence_" + str(blockNum) + ".tar.gz")
    else:
        logging.info("Not supposed to upload state now!")
def export_emails_archive(data_set_id,
                          email_ids=["f9c9c59a-7fe8-11e5-bb05-08002705cb99"]):
    cherrypy.log("email.get_attachments_sender(index=%s, attachment_id=%s)" %
                 (data_set_id, email_ids))
    if not data_set_id:
        return tangelo.HTTPStatusCode(400,
                                      "invalid service call - missing index")
    # if not email:
    #     return tangelo.HTTPStatusCode(400, "invalid service call - missing attachment_id")

    # elasticsearch.exceptions.ConnectionTimeout: ConnectionTimeout caused by - ReadTimeoutError(HTTPConnectionPool(host='10.1.70.143', port=9200): Read timed out. (read timeout=10))
    es = Elasticsearch([{
        "host": "10.1.70.143",
        "port": 9200
    }],
                       request_timeout=60)
    # TODO can implement with multiple doc_types and combine attachments in
    emails = es.mget(index=data_set_id,
                     doc_type="emails",
                     body={"docs": [{
                         "_id": id
                     } for id in email_ids]})

    # TODO filename
    filename = "export.tar.gz"
    tangelo.content_type("application/x-gzip")
    header("Content-Disposition", 'attachment; filename="{}"'.format(filename))

    string_buffer = cStringIO.StringIO()
    tar = tarfile.open(mode='w:gz', fileobj=string_buffer)

    # Add each email to the tar
    for email_source in emails["docs"]:

        email = email_source["_source"]

        tarinfo_parent = tarfile.TarInfo(name=email["id"])
        tarinfo_parent.type = tarfile.DIRTYPE
        tarinfo_parent.mode = 0755
        tarinfo_parent.mtime = time.time()
        tar.addfile(tarinfo_parent)

        tarinfo = tarfile.TarInfo(email["id"] + "/" + email["id"] + ".json")
        # TODO -- email transformation
        data_string = json.dumps(email)
        fobj = cStringIO.StringIO(data_string)

        tarinfo.size = len(data_string)
        tarinfo.mode = 0644
        tarinfo.mtime = time.time()
        tar.addfile(tarinfo, fobj)

        # Get the attachments
        if email["attachments"]:
            attachments = es.mget(index=data_set_id,
                                  doc_type="attachments",
                                  body={
                                      "docs": [{
                                          "_id": attch["guid"]
                                      } for attch in email["attachments"]]
                                  })
            for attachment_source in attachments["docs"]:
                attachment = attachment_source["_source"]
                filename = attachment["filename"]
                attch_data = str(base64.b64decode(attachment["contents64"]))

                tarinfo_attch = tarfile.TarInfo(email["id"] + "/" + filename)
                tarinfo_attch.size = len(attch_data)
                tarinfo_attch.mode = 0644
                tarinfo_attch.mtime = time.time()
                tar.addfile(tarinfo_attch, cStringIO.StringIO(attch_data))
    tar.close()

    return string_buffer.getvalue()
def export_attachments(data_set_id,
                       outfile,
                       sender='',
                       attachment_extension='jpg',
                       date_bounds=None):
    print(
        "email.get_attachments_sender(index=%s, sender=%s, attachment_type=%s, date_bounds=%s)"
        % (data_set_id, sender, attachment_extension, date_bounds))
    if not data_set_id:
        print "invalid service call - missing index"
        return 1
    # elasticsearch.exceptions.ConnectionTimeout: ConnectionTimeout caused by - ReadTimeoutError(HTTPConnectionPool(host='10.1.70.143', port=9200): Read timed out. (read timeout=10))
    es = Elasticsearch([{"host": "10.1.70.143", "port": 9200}], timeout=60)

    # TODO get accurate count -- this is not strictly needed as attachments will be accessed as inner docs on the email_address
    max_inner_attachments_returned = 100000

    # Get all attachments by extension
    rows = []
    body = _attch_nested__ext_query(
        sender,
        attachment_extension,
        date_bounds,
        max_inner_attachments_returned=max_inner_attachments_returned)
    print body
    addresses_count = es.count(index=data_set_id,
                               doc_type="email_address",
                               body=body)["count"]
    print "total addresses: " + str(addresses_count)
    addresses = es.search(index=data_set_id,
                          doc_type="email_address",
                          body=body,
                          size=addresses_count)
    for address in addresses["hits"]["hits"]:
        rows += [[
            address["_source"]["addr"], attachment["_source"]["guid"],
            attachment["_source"]["filename"],
            attachment["_source"]["datetime"]
        ] for attachment in address["inner_hits"]["sender_attachments"]["hits"]
                 ["hits"]]

    print "total attachments: " + str(len(rows))

    #  start tar.gz
    # tar = tarfile.open(mode='w:gz', name="big-export.tar.gz")
    # Start tar
    tar = tarfile.open(mode='w', name=outfile)

    csv_string_buffer = cStringIO.StringIO()
    csv_file = csv.writer(csv_string_buffer)

    # Add all rows to attachment csv
    csv_file.writerows(rows)
    tarinfo = tarfile.TarInfo("attachments.csv")

    tarinfo.size = csv_string_buffer.tell()
    tarinfo.mode = 0644
    tarinfo.mtime = time.time()
    csv_string_buffer.seek(0)

    tar.addfile(tarinfo, csv_string_buffer)

    # This is the buffer size of how many attachments to pull from ES at each iteration
    num_returned = 3
    index = 0
    # Paging
    while index < len(rows):
        # Get num_returned attachments from ES
        attachments = es.mget(index=data_set_id,
                              doc_type="attachments",
                              body={
                                  "docs":
                                  [{
                                      "_id": row[1]
                                  }
                                   for row in rows[index:index + num_returned]]
                              })
        index += num_returned

        # Add all attachments to the archive
        for attachment_source in attachments["docs"]:
            attachment = attachment_source["_source"]
            filename = attachment["filename"]
            attch_data = str(base64.b64decode(attachment["contents64"]))

            tarinfo_attch = tarfile.TarInfo(attachment["guid"] + "/" +
                                            filename)
            tarinfo_attch.size = len(attch_data)
            tarinfo_attch.mode = 0644
            tarinfo_attch.mtime = time.time()
            tar.addfile(tarinfo_attch, cStringIO.StringIO(attch_data))

    tar.close()
Beispiel #15
0
def make_bin(basename, filelist, type, kver, sign=0, jailbreak=0):
    fd, tgz_fname = tempfile.mkstemp()
    os.close(fd)
    tar = tarfile.open(tgz_fname, "w:gz")

    dat_list = ""

    if sign:
        fd, keyfile = tempfile.mkstemp()
        fs = os.fdopen(fd, "wb")
        fs.write(SIGN_KEY)
        fs.close()

    if jailbreak:
        random.seed()

        # Create fake symlink
        namedir = '__dir' + str(random.randint(1000, 9999))
        tarinfo = tarfile.TarInfo(namedir)
        tarinfo.type = tarfile.SYMTYPE
        tarinfo.linkname = KINDLE_HACK_DIR
        tar.addfile(tarinfo)

        # Create new key
        fd, tmpfile = tempfile.mkstemp()
        fs = os.fdopen(fd, "wb")
        fs.write(NEW_KEY)
        fs.close()
        tarinfo = tar.gettarinfo(tmpfile,
                                 arcname=namedir + '/' + KINDLE_HACK_KEYNAME)
        add_tarfile(tarinfo, tmpfile, tar)
        os.remove(tmpfile)

        # Create additional install script
        nameinstall = '_install' + str(random.randint(1000, 9999)) + '.sh'
        fd, tmpinstall = tempfile.mkstemp()
        fs = os.fdopen(fd, "wb")
        fs.write(INSTALL_SCRIPT)
        fs.close()
        tarinfo = tar.gettarinfo(tmpinstall, arcname=nameinstall)
        add_tarfile(tarinfo, tmpinstall, tar)
        if sign:
            create_sig(keyfile, tmpinstall, tar, finalname=nameinstall)

        # Creating extra script signature

    if sign:
        for name in filelist:
            print "calculating signature for %s" % name
            create_sig(keyfile, name, tar)

    for name in filelist:
        print "adding %s" % name
        tarinfo = tar.gettarinfo(name)
        if name.endswith(".sh"):
            fid = 129
        else:
            fid = 128
        add_tarfile(tarinfo, name, tar)

        fsize = os.path.getsize(name) / 64
        inf = open(name, "rb")
        dat_list += "%d %s %s %d %s\n" % (fid, s_md5(
            inf.read()), name, fsize, name + "_file")
        inf.close()

    if jailbreak:
        fsize = os.path.getsize(tmpinstall) / 64
        inf = open(tmpinstall, "rb")
        dat_list += "%d %s %s %d %s\n" % (129, s_md5(
            inf.read()), nameinstall, fsize, nameinstall + "_file")
        inf.close()
        os.remove(tmpinstall)

    fd, tmpdat = tempfile.mkstemp()
    fs = os.fdopen(fd, "wb")
    fs.write(dat_list)
    fs.close()
    tarinfo = tar.gettarinfo(tmpdat, arcname=basename + '.dat')
    add_tarfile(tarinfo, tmpdat, tar)

    # Sign the bundle file, too (needed since fw 3.x)
    if sign:
        print "calculating signature for bundle file"
        create_sig(keyfile, tmpdat, tar, basename + '.dat')
        os.remove(keyfile)

    os.remove(tmpdat)
    tar.close()
    convert_bin(basename, tgz_fname, type, kver)
    os.remove(tgz_fname)
Beispiel #16
0
 def add_file(filename, contents):
   info = tarfile.TarInfo(filename)
   info.size = len(contents)
   tar.addfile(tarinfo=info, fileobj=cStringIO.StringIO(contents))
Beispiel #17
0
def _add_file(tar, filename, content):
    tar_info = tarfile.TarInfo(name=filename)
    tar_info.size = len(content)
    content = io.BytesIO(content)
    content.seek(0)
    tar.addfile(tar_info, content)
Beispiel #18
0
def create(info, verbose=False):
    tmp_dir_base_path = join(dirname(info['_outpath']), "tmp")
    try:
        os.makedirs(tmp_dir_base_path)
    except Exception:
        pass
    tmp_dir = tempfile.mkdtemp(dir=tmp_dir_base_path)
    preconda_write_files(info, tmp_dir)

    preconda_tarball = join(tmp_dir, 'preconda.tar.bz2')
    postconda_tarball = join(tmp_dir, 'postconda.tar.bz2')
    pre_t = tarfile.open(preconda_tarball, 'w:bz2')
    post_t = tarfile.open(postconda_tarball, 'w:bz2')
    for dist in preconda_files:
        fn = filename_dist(dist)
        pre_t.add(join(tmp_dir, fn), 'pkgs/' + fn)
    for key in 'pre_install', 'post_install':
        if key in info:
            pre_t.add(
                info[key],
                'pkgs/%s.sh' % key,
                filter=make_executable if has_shebang(info[key]) else None)
    cache_dir = join(tmp_dir, 'cache')
    if isdir(cache_dir):
        for cf in os.listdir(cache_dir):
            if cf.endswith(".json"):
                pre_t.add(join(cache_dir, cf), 'pkgs/cache/' + cf)
    for dist in info['_dists']:
        if filename_dist(dist).endswith(".conda"):
            _dist = filename_dist(dist)[:-6]
        elif filename_dist(dist).endswith(".tar.bz2"):
            _dist = filename_dist(dist)[:-8]
        record_file = join(_dist, 'info', 'repodata_record.json')
        record_file_src = join(tmp_dir, record_file)
        record_file_dest = join('pkgs', record_file)
        pre_t.add(record_file_src, record_file_dest)
    pre_t.addfile(tarinfo=tarfile.TarInfo("conda-meta/history"))
    post_t.add(join(tmp_dir, 'conda-meta', 'history'), 'conda-meta/history')
    pre_t.close()
    post_t.close()

    tarball = join(tmp_dir, 'tmp.tar')
    t = tarfile.open(tarball, 'w')
    t.add(preconda_tarball, basename(preconda_tarball))
    t.add(postconda_tarball, basename(postconda_tarball))
    if 'license_file' in info:
        t.add(info['license_file'], 'LICENSE.txt')
    for dist in info['_dists']:
        fn = filename_dist(dist)
        t.add(join(info['_download_dir'], fn), 'pkgs/' + fn)
    t.close()

    conda_exec = info["_conda_exe"]
    header = get_header(conda_exec, tarball, info)
    shar_path = info['_outpath']
    with open(shar_path, 'wb') as fo:
        fo.write(header.encode('utf-8'))
        for payload in [conda_exec, tarball]:
            with open(payload, 'rb') as fi:
                while True:
                    chunk = fi.read(262144)
                    if not chunk:
                        break
                    fo.write(chunk)

    os.unlink(tarball)
    os.chmod(shar_path, 0o755)
    shutil.rmtree(tmp_dir)
Beispiel #19
0
 def add(self, path):
     self.tar.addfile(tarfile.TarInfo(name=path))
Beispiel #20
0
    def tar(self, uncompressed_size=65536, num_files=1, min_file_size=4096, compression=None):
        """Generate a bytes object containing a random valid tar file.

        The number and sizes of files contained inside the resulting archive can be controlled
        using the following arguments:

        - ``uncompressed_size`` - the total size of files before compression, 16 KiB by default
        - ``num_files`` - the number of files archived in resulting zip file, 1 by default
        - ``min_file_size`` - the minimum size of each file before compression, 4 KiB by default

        No compression is used by default, but setting ``compression`` to one of the values listed
        below will use the corresponding compression type.

        - ``'bzip2'`` or ``'bz2'`` for BZIP2
        - ``'lzma'`` or ``'xz'`` for LZMA
        - ``'gzip'`` or ``'gz'`` for GZIP

        :sample: uncompressed_size=256, num_files=4, min_file_size=32
        :sample: uncompressed_size=256, num_files=32, min_file_size=4, compression='bz2'
        """
        if any([
            not isinstance(num_files, int) or num_files <= 0,
            not isinstance(min_file_size, int) or min_file_size <= 0,
            not isinstance(uncompressed_size, int) or uncompressed_size <= 0,
        ]):
            raise ValueError(
                '`num_files`, `min_file_size`, and `uncompressed_size` must be positive integers',
            )
        if min_file_size * num_files > uncompressed_size:
            raise AssertionError(
                '`uncompressed_size` is smaller than the calculated minimum required size',
            )
        if compression in ['gzip', 'gz']:
            mode = 'w:gz'
        elif compression in ['bzip2', 'bz2']:
            mode = 'w:bz2'
        elif compression in ['lzma', 'xz']:
            mode = 'w:xz'
        else:
            mode = 'w'

        tar_buffer = io.BytesIO()
        remaining_size = uncompressed_size
        with tarfile.open(mode=mode, fileobj=tar_buffer) as tar_handle:
            for file_number in range(1, num_files + 1):
                file_buffer = io.BytesIO()
                filename = self.generator.pystr() + str(file_number)

                max_allowed_size = remaining_size - (num_files - file_number) * min_file_size
                if file_number < num_files:
                    file_size = self.generator.random.randint(min_file_size, max_allowed_size)
                    remaining_size = remaining_size - file_size
                else:
                    file_size = remaining_size

                tarinfo = tarfile.TarInfo(name=filename)
                data = self.generator.binary(file_size)
                file_buffer.write(data)
                tarinfo.size = len(file_buffer.getvalue())
                file_buffer.seek(0)
                tar_handle.addfile(tarinfo, file_buffer)
                file_buffer.close()
        return tar_buffer.getvalue()
def write_to_tar(url_file, out_file, makevocab=False):
    """Reads the tokenized .story files corresponding to the urls listed in the
       url_file and writes them to a out_file.
    """
    print("Making bin file for URLs listed in {}...".format(url_file))
    url_list = [line.strip() for line in open(url_file)]
    url_hashes = get_url_hashes(url_list)
    story_fnames = [s + ".story" for s in url_hashes]
    num_stories = len(story_fnames)

    if makevocab:
        vocab_counter = collections.Counter()
    idx_to_story_dict = {}

    with tarfile.open(out_file, 'w') as writer:
        for idx, s in enumerate(story_fnames):
            if idx % 1000 == 0:
                print("Writing story {} of {}; {:.2f} percent done".format(
                    idx, num_stories,
                    float(idx) * 100.0 / float(num_stories)))

            # Look in the tokenized story dirs to find the .story file
            # corresponding to this url
            if os.path.isfile(os.path.join(cnn_tokenized_stories_dir, s)):
                story_file = os.path.join(cnn_tokenized_stories_dir, s)
            elif os.path.isfile(os.path.join(dm_tokenized_stories_dir, s)):
                story_file = os.path.join(dm_tokenized_stories_dir, s)
            else:
                print("Error: Couldn't find tokenized story file {} in either"
                      " tokenized story directories {} and {}. Was there an"
                      " error during tokenization?".format(
                          s, cnn_tokenized_stories_dir,
                          dm_tokenized_stories_dir))

            # Get the strings to write to .bin file
            article_sents, abstract_sents = get_art_abs(story_file)

            # Write to JSON file
            js_example = {}
            js_example['id'] = s.replace('.story', '')
            js_example['article'] = article_sents
            js_example['abstract'] = abstract_sents
            js_serialized = json.dumps(js_example, indent=4).encode()
            save_file = io.BytesIO(js_serialized)
            tar_info = tarfile.TarInfo('{}/{}.json'.format(
                os.path.basename(out_file).replace('.tar', ''), idx))
            tar_info.size = len(js_serialized)
            writer.addfile(tar_info, save_file)
            idx_to_story_dict[idx] = s

            # Write the vocab to file, if applicable
            if makevocab:
                art_tokens = ' '.join(article_sents).split()
                abs_tokens = ' '.join(abstract_sents).split()
                tokens = art_tokens + abs_tokens
                tokens = [t.strip() for t in tokens]  # strip
                tokens = [t for t in tokens if t != ""]  # remove empty
                vocab_counter.update(tokens)

    print("Finished writing file {}\n".format(out_file))
    with open(os.path.join(finished_files_dir, "id_to_story_dict.pkl"),
              'wb') as story_dict_f:
        pkl.dump(idx_to_story_dict, story_dict_f)

    # write vocab to file
    if makevocab:
        print("Writing vocab file...")
        with open(os.path.join(finished_files_dir, "vocab_cnt.pkl"),
                  'wb') as vocab_file:
            pkl.dump(vocab_counter, vocab_file)
        print("Finished writing vocab file")
Beispiel #22
0
    def get_submission_archive(self,
                               submissions,
                               sub_folders,
                               aggregations,
                               archive_file=None):
        """
        :param submissions: a list of submissions
        :param sub_folders: possible values:
            []: put all submissions in /
            ['taskid']: put all submissions for each task in a different directory /taskid/
            ['username']: put all submissions for each user in a different directory /username/
            ['taskid','username']: /taskid/username/
            ['username','taskid']: /username/taskid/
        :return: a file-like object containing a tgz archive of all the submissions
        """
        tmpfile = archive_file if archive_file is not None else tempfile.TemporaryFile(
        )
        tar = tarfile.open(fileobj=tmpfile, mode='w:gz')

        for submission in submissions:
            submission = self.get_input_from_submission(submission)

            submission_yaml = io.BytesIO(
                inginious.common.custom_yaml.dump(submission).encode('utf-8'))

            # Considering multiple single submissions for each user
            for username in submission["username"]:
                # Compute base path in the tar file
                base_path = "/"
                for sub_folder in sub_folders:
                    if sub_folder == 'taskid':
                        base_path = submission['taskid'] + base_path
                    elif sub_folder == 'username':
                        base_path = '_' + '-'.join(
                            submission['username']) + base_path
                        base_path = base_path[1:]
                    elif sub_folder == 'aggregation':
                        if aggregations[username] is None:
                            # If classrooms are not used, and user is not grouped, his classroom is replaced by None
                            base_path = '_' + '-'.join(
                                submission['username']) + base_path
                            base_path = base_path[1:]
                        else:
                            base_path = (
                                aggregations[username]["description"] + " (" +
                                str(aggregations[username]["_id"]) +
                                ")").replace(" ", "_") + base_path

                    base_path = '/' + base_path
                base_path = base_path[1:]

                submission_yaml_fname = base_path + str(
                    submission["_id"]) + '/submission.test'

                # Avoid putting two times the same submission on the same place
                if submission_yaml_fname not in tar.getnames():

                    info = tarfile.TarInfo(name=submission_yaml_fname)
                    info.size = submission_yaml.getbuffer().nbytes
                    info.mtime = time.mktime(
                        submission["submitted_on"].timetuple())

                    # Add file in tar archive
                    tar.addfile(info, fileobj=submission_yaml)

                    # If there is an archive, add it too
                    if 'archive' in submission and submission[
                            'archive'] is not None and submission[
                                'archive'] != "":
                        subfile = self._gridfs.get(submission['archive'])
                        subtar = tarfile.open(fileobj=subfile, mode="r:gz")

                        for member in subtar.getmembers():
                            subtarfile = subtar.extractfile(member)
                            member.name = base_path + str(
                                submission["_id"]) + "/archive/" + member.name
                            tar.addfile(member, subtarfile)

                        subtar.close()
                        subfile.close()

                    # If there files that were uploaded by the student, add them
                    if submission['input'] is not None:
                        for pid, problem in submission['input'].items():
                            # If problem is a dict, it is a file (from the specification of the problems)
                            if isinstance(problem, dict):
                                # Get the extension (match extensions with more than one dot too)
                                DOUBLE_EXTENSIONS = [
                                    '.tar.gz', '.tar.bz2', '.tar.bz', '.tar.xz'
                                ]
                                ext = ""
                                if not problem['filename'].endswith(
                                        tuple(DOUBLE_EXTENSIONS)):
                                    _, ext = os.path.splitext(
                                        problem['filename'])
                                else:
                                    for t_ext in DOUBLE_EXTENSIONS:
                                        if problem['filename'].endswith(t_ext):
                                            ext = t_ext

                                subfile = io.BytesIO(
                                    base64.b64decode(problem['value']))
                                taskfname = base_path + str(
                                    submission["_id"]
                                ) + '/uploaded_files/' + pid + ext

                                # Generate file info
                                info = tarfile.TarInfo(name=taskfname)
                                info.size = subfile.getbuffer().nbytes
                                info.mtime = time.mktime(
                                    submission["submitted_on"].timetuple())

                                # Add file in tar archive
                                tar.addfile(info, fileobj=subfile)

        # Close tarfile and put tempfile cursor at 0
        tar.close()
        tmpfile.seek(0)
        return tmpfile
def main(configuration, viewer_dir, viewer_exes, libs_suffix, dump_syms_tool,
         viewer_symbol_file):
    print "generate_breakpad_symbols run with args: %s" % str(
        (configuration, viewer_dir, viewer_exes, libs_suffix, dump_syms_tool,
         viewer_symbol_file))

    if not re.match("release", configuration, re.IGNORECASE):
        print "skipping breakpad symbol generation for non-release build."
        return 0

    # split up list of viewer_exes
    # "'Second Life' SLPlugin" becomes ['Second Life', 'SLPlugin']
    viewer_exes = shlex.split(viewer_exes)

    found_required = dict([(module, False) for module in viewer_exes])

    def matches(f):
        if f in viewer_exes:
            found_required[f] = True
            return True
        return fnmatch.fnmatch(f, libs_suffix)

    def list_files():
        for (dirname, subdirs, filenames) in os.walk(viewer_dir):
            #print "scanning '%s' for modules..." % dirname
            for f in itertools.ifilter(matches, filenames):
                yield os.path.join(dirname, f)

    def dump_module(m):
        print "dumping module '%s' with '%s'..." % (m, dump_syms_tool)
        child = subprocess.Popen([dump_syms_tool, m], stdout=subprocess.PIPE)
        out, err = child.communicate()
        return (m, child.returncode, out, err)

    out = tarfile.open(viewer_symbol_file, 'w:bz2')

    for (filename, status, symbols,
         err) in itertools.imap(dump_module, list_files()):
        if status == 0:
            module_line = symbols[:symbols.index('\n')]
            module_line = module_line.split()
            hash_id = module_line[3]
            module = ' '.join(module_line[4:])
            if sys.platform in ['win32', 'cygwin']:
                mod_name = module[:module.rindex('.pdb')]
            else:
                mod_name = module
            symbolfile = StringIO.StringIO(symbols)
            info = tarfile.TarInfo(
                "%(module)s/%(hash_id)s/%(mod_name)s.sym" %
                dict(module=module, hash_id=hash_id, mod_name=mod_name))
            info.size = symbolfile.len
            out.addfile(info, symbolfile)
        else:
            print >> sys.stderr, "warning: failed to dump symbols for '%s': %s" % (
                filename, err)

    out.close()

    missing_modules = [
        m for (m, _) in itertools.ifilter(lambda (k, v): not v,
                                          found_required.iteritems())
    ]
    if missing_modules:
        print >> sys.stderr, "failed to generate %s" % viewer_symbol_file
        os.remove(viewer_symbol_file)
        raise MissingModuleError(missing_modules)

    symbols = tarfile.open(viewer_symbol_file, 'r:bz2')
    tarfile_members = symbols.getnames()
    symbols.close()

    for required_module in viewer_exes:

        def match_module_basename(m):
            return os.path.splitext(required_module)[0].lower() \
                   == os.path.splitext(os.path.basename(m))[0].lower()

        # there must be at least one .sym file in tarfile_members that matches
        # each required module (ignoring file extensions)
        if not reduce(operator.or_,
                      itertools.imap(match_module_basename, tarfile_members)):
            print >> sys.stderr, "failed to find required %s in generated %s" \
                    % (required_module, viewer_symbol_file)
            os.remove(viewer_symbol_file)
            raise MissingModuleError([required_module])

    print "successfully generated %s including required modules '%s'" % (
        viewer_symbol_file, viewer_exes)

    return 0
Beispiel #24
0
    def export(self):
#        missmedia_action = 0
        #--------------------------------------------------------------
        # def remove_clicked():
        #     # File is lost => remove all references and the object itself
        #     for p_id in self.db.iter_family_handles():
        #         p = self.db.get_family_from_handle(p_id)
        #         nl = p.get_media_list()
        #         for o in nl:
        #             if o.get_reference_handle() == m_id:
        #                 nl.remove(o)
        #         p.set_media_list(nl)
        #         self.db.commit_family(p,None)
        #     for key in self.db.iter_person_handles():
        #         p = self.db.get_person_from_handle(key)
        #         nl = p.get_media_list()
        #         for o in nl:
        #             if o.get_reference_handle() == m_id:
        #                 nl.remove(o)
        #         p.set_media_list(nl)
        #         self.db.commit_person(p,None)
        #     for key in self.db.get_source_handles():
        #         p = self.db.get_source_from_handle(key)
        #         nl = p.get_media_list()
        #         for o in nl:
        #             if o.get_reference_handle() == m_id:
        #                 nl.remove(o)
        #         p.set_media_list(nl)
        #         self.db.commit_source(p,None)
        #     for key in self.db.get_place_handles():
        #         p = self.db.get_place_from_handle(key)
        #         nl = p.get_media_list()
        #         for o in nl:
        #             if o.get_reference_handle() == m_id:
        #                 nl.remove(o)
        #         p.set_media_list(nl)
        #         self.db.commit_place(p,None)
        #     for key in self.db.get_event_handles():
        #         p = self.db.get_event_from_handle(key)
        #         nl = p.get_media_list()
        #         for o in nl:
        #             if o.get_reference_handle() == m_id:
        #                 nl.remove(o)
        #         p.set_media_list(nl)
        #         self.db.commit_event(p,None)
        #     self.db.remove_media(m_id,None)

        # def leave_clicked():
        #     # File is lost => do nothing, leave as is
        #     pass

        # def select_clicked():
        #     # File is lost => select a file to replace the lost one
        #     def fs_close_window(obj):
        #         pass

        #     def fs_ok_clicked(obj):
        #         name = fs_top.get_filename()
        #         if os.path.isfile(name):
        #             archive.add(name)

        #     fs_top = gtk.FileChooserDialog("%s - GRAMPS" % _("Select file"),
        #                 buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
        #                          gtk.STOCK_OK, Gtk.ResponseType.OK)
        #                 )
        #     response = fs_top.run()
        #     if response == Gtk.ResponseType.OK:
        #         fs_ok_clicked(fs_top)
        #     elif response == gtk.RESPONSE_CANCEL:
        #         fs_close_window(fs_top)

        #     fs_top.destroy()
        #---------------------------------------------------------------

        try:
            archive = tarfile.open(self.filename,'w:gz')
        except EnvironmentError as msg:
            log.warn(str(msg))
            self.user.notify_error(_('Failure writing %s') % self.filename, str(msg))
            return 0

        # Write media files first, since the database may be modified
        # during the process (i.e. when removing object)
        for m_id in self.db.get_media_handles(sort_handles=True):
            mobject = self.db.get_media_from_handle(m_id)
            filename = media_path_full(self.db, mobject.get_path())
            archname = str(mobject.get_path())
            if os.path.isfile(filename) and os.access(filename, os.R_OK):
                archive.add(filename, archname)
#             else:
#                 # File is lost => ask what to do
#                 if missmedia_action == 0:
#                     mmd = MissingMediaDialog(
#                         _("Media object could not be found"),
#                         _("%(file_name)s is referenced in the database, "
#                           "but no longer exists. The file may have been "
#                           "deleted or moved to a different location. "
#                           "You may choose to either remove the reference "
#                           "from the database, keep the reference to the "
#                           "missing file, or select a new file."
#                           ) % { 'file_name' : filename },
#                         remove_clicked, leave_clicked, select_clicked)
#                     missmedia_action = mmd.default_action
#                 elif missmedia_action == 1:
#                     remove_clicked()
#                 elif missmedia_action == 2:
#                     leave_clicked()
#                 elif missmedia_action == 3:
#                     select_clicked()

        # Write XML now
        g = BytesIO()
        gfile = XmlWriter(self.db, self.user, 2)
        gfile.write_handle(g)
        tarinfo = tarfile.TarInfo('data.gramps')
        tarinfo.size = len(g.getvalue())
        tarinfo.mtime = time.time()
        if not win():
            tarinfo.uid = os.getuid()
            tarinfo.gid = os.getgid()
        g.seek(0)
        archive.addfile(tarinfo, g)
        archive.close()
        g.close()

        return True
Beispiel #25
0
def createPackedInputSandbox(sandbox_files, inws, name):
    """Put all sandbox_files into tarball called name and write it into to the input workspace.
       This function is called by Ganga client at the submission time.
       Arguments:
                'sandbox_files': a list of File or FileBuffer objects.
                'inws': a InputFileWorkspace object
       Return: a list containing a path to the tarball
       """

    #    from Ganga.Core import FileWorkspace
    #    from Ganga.GPIDev.Lib.File import File

    #    tgzfile = os.path.join(tmpdir,name)

    tgzfile = inws.getPath(name)

    import tarfile
    import stat

    logger.debug("Creating packed Sandbox with %s many sandbox files." %
                 len(sandbox_files))

    #
    # Curent release with os module
    #

    #   wsdir = os.path.join(tmpdir,"ws")
    #   ws = FileWorkspace.FileWorkspace(wsdir)
    #   ws.create()
    #   for f in sandbox_files:
    #       ws.writefile(f)

    # if os.system("tar -C %s -czf %s ."%(wsdir,tgzfile)) !=0:
    #       print "ERROR:: can't create tarball file with InputSandbox"

    #
    # Future release with tarball module

    if mimetypes.guess_type(tgzfile)[1] in ['gzip']:
        file_format = 'gz'
    elif mimetypes.guess_type(tgzfile)[1] in ['bzip2']:
        file_format = 'bz2'
    else:
        file_format = ''

    with open(tgzfile, 'w:%s' % file_format) as this_tarfile:
        tf = tarfile.open(name=tgzfile, fileobj=this_tarfile, mode="w:gz")
        tf.dereference = True  # --not needed in Windows

        from Ganga.GPIDev.Lib.File.FileBuffer import FileBuffer
        from Ganga.GPIDev.Lib.File.File import File
        from Ganga.GPIDev.Base.Proxy import isType

        for f in sandbox_files:
            fileobj = None
            if isType(f, FileBuffer):
                contents = f.getContents()  # is it FileBuffer?
                # print "Getting FileBuffer Contents"

                from StringIO import StringIO
                fileobj = StringIO(contents)

                tinfo = tarfile.TarInfo()
                # FIX for Ganga/test/Internals/FileBuffer_Sandbox
                # Don't keep the './' on files as looking for an exact filename
                # afterwards won't work
                if f.subdir == os.curdir:
                    tinfo.name = os.path.basename(f.name)
                else:
                    tinfo.name = os.path.join(f.subdir,
                                              os.path.basename(f.name))
                import time
                tinfo.mtime = time.time()
                tinfo.size = fileobj.len

            else:
                #   except AttributeError as err:         # File
                # print "Getting File %s" % f.name
                # tf.add(f.name,os.path.join(f.subdir,os.path.basename(f.name)))
                logger.debug("Opening file for sandbox: %s" % f.name)
                try:
                    fileobj = open(f.name)
                except Exception as err:
                    raise SandboxError("File '%s' does not exist." % f.name)

                tinfo = tf.gettarinfo(
                    f.name, os.path.join(f.subdir, os.path.basename(f.name)))

            if f.isExecutable():
                tinfo.mode = tinfo.mode | stat.S_IXUSR
            tf.addfile(tinfo, fileobj)
            fileobj.close()
        tf.close()

    return [tgzfile]
Beispiel #26
0
    def write_archive(self, treeish, archive, timestamp=None, prefix=''):
        """Write treeish into an archive

        If no timestamp is provided and 'treeish' is a commit, its committer
        timestamp will be used. Otherwise the current time will be used.

        All path names in the archive are added to 'prefix', which defaults to
        an empty string.

        Arguments:

        treeish
            The treeish to write.
        archive
            An archive from the 'tarfile' module
        timestamp
            Timestamp to use for the files in the archive.
        prefix
            Extra prefix to add to the path names in the archive.

        Example::

            >>> import tarfile, pygit2
            >>>> with tarfile.open('foo.tar', 'w') as archive:
            >>>>     repo = pygit2.Repsitory('.')
            >>>>     repo.write_archive(archive, repo.head.target)
        """

        # Try to get a tree form whatever we got
        if isinstance(treeish, Tree):
            tree = treeish

        if isinstance(treeish, Oid) or is_string(treeish):
            treeish = self[treeish]

        # if we don't have a timestamp, try to get it from a commit
        if not timestamp:
            try:
                commit = treeish.peel(Commit)
                timestamp = commit.committer.time
            except Exception:
                pass

        # as a last resort, use the current timestamp
        if not timestamp:
            timestamp = int(time())

        tree = treeish.peel(Tree)

        index = Index()
        index.read_tree(tree)

        for entry in index:
            content = self[entry.id].read_raw()
            info = tarfile.TarInfo(prefix + entry.path)
            info.size = len(content)
            info.mtime = timestamp
            info.uname = info.gname = 'root' # just because git does this
            if entry.mode == GIT_FILEMODE_LINK:
                info.type = archive.SYMTYPE
                info.linkname = content
                info.mode = 0o777 # symlinks get placeholder
                info.size = 0
                archive.addfile(info)
            else:
                archive.addfile(info, StringIO(content))
Beispiel #27
0
    def add_file(self,
                 name,
                 kind=tarfile.REGTYPE,
                 content=None,
                 link=None,
                 file_content=None,
                 uid=0,
                 gid=0,
                 uname='',
                 gname='',
                 mtime=None,
                 mode=None):
        """Add a file to the current tar.

    Args:
      name: the name of the file to add.
      kind: the type of the file to add, see tarfile.*TYPE.
      content: a textual content to put in the file.
      link: if the file is a link, the destination of the link.
      file_content: file to read the content from. Provide either this
          one or `content` to specifies a content for the file.
      uid: owner user identifier.
      gid: owner group identifier.
      uname: owner user names.
      gname: owner group names.
      mtime: modification time to put in the archive.
      mode: unix permission mode of the file, default 0644 (0755).
    """
        if file_content and os.path.isdir(file_content):
            # Recurse into directory
            self.add_dir(name, file_content, uid, gid, uname, gname, mtime,
                         mode)
            return
        if not (name == self.root_directory or name.startswith('/')
                or name.startswith(self.root_directory + '/')):
            name = os.path.join(self.root_directory, name)
        if kind == tarfile.DIRTYPE:
            name = name.rstrip('/')
            if name in self.directories:
                return
        if mtime is None:
            mtime = self.default_mtime

        components = name.rsplit('/', 1)
        if len(components) > 1:
            d = components[0]
            self.add_file(d,
                          tarfile.DIRTYPE,
                          uid=uid,
                          gid=gid,
                          uname=uname,
                          gname=gname,
                          mtime=mtime,
                          mode=0o755)
        tarinfo = tarfile.TarInfo(name)
        tarinfo.mtime = mtime
        tarinfo.uid = uid
        tarinfo.gid = gid
        tarinfo.uname = uname
        tarinfo.gname = gname
        tarinfo.type = kind
        if mode is None:
            tarinfo.mode = 0o644 if kind == tarfile.REGTYPE else 0o755
        else:
            tarinfo.mode = mode
        if link:
            tarinfo.linkname = link
        if content:
            content_bytes = six.ensure_binary(content, 'utf-8')
            tarinfo.size = len(content_bytes)
            self._addfile(tarinfo, io.BytesIO(content_bytes))
        elif file_content:
            with open(file_content, 'rb') as f:
                tarinfo.size = os.fstat(f.fileno()).st_size
                self._addfile(tarinfo, f)
        else:
            if kind == tarfile.DIRTYPE:
                self.directories.add(name)
            self._addfile(tarinfo)
Beispiel #28
0
 def add_to_tar(tar, name, sio_obj, perm=420):
     info = tarfile.TarInfo(name=name)
     info.size = flen(sio_obj)
     info.mode = perm
     sio_obj.seek(0)
     tar.addfile(info, sio_obj)
Beispiel #29
0
    def makepkg(self, path):
        """Creates an Arch Linux package archive.

        A package archive is generated in the location 'path', based on the data
        from the object.
        """
        archive_files = []

        # .PKGINFO
        data = ["pkgname = %s" % self.name]
        data.append("pkgver = %s" % self.version)
        data.append("pkgdesc = %s" % self.desc)
        data.append("url = %s" % self.url)
        data.append("builddate = %s" % self.builddate)
        data.append("packager = %s" % self.packager)
        data.append("size = %s" % self.size)
        if self.arch:
            data.append("arch = %s" % self.arch)
        for i in self.license:
            data.append("license = %s" % i)
        for i in self.replaces:
            data.append("replaces = %s" % i)
        for i in self.groups:
            data.append("group = %s" % i)
        for i in self.depends:
            data.append("depend = %s" % i)
        for i in self.optdepends:
            data.append("optdepend = %s" % i)
        for i in self.conflicts:
            data.append("conflict = %s" % i)
        for i in self.provides:
            data.append("provides = %s" % i)
        for i in self.backup:
            data.append("backup = %s" % i)
        archive_files.append((".PKGINFO", "\n".join(data)))

        # .INSTALL
        if any(self.install.values()):
            archive_files.append((".INSTALL", self.installfile()))

        self.path = os.path.join(path, self.filename())
        util.mkdir(os.path.dirname(self.path))

        # Generate package metadata
        tar = tarfile.open(self.path, "w:gz")
        for name, data in archive_files:
            info = tarfile.TarInfo(name)
            info.size = len(data)
            tar.addfile(info, StringIO(data))

        # Generate package file system
        for name in self.files:
            fileinfo = util.getfileinfo(name)
            info = tarfile.TarInfo(fileinfo["filename"])
            if fileinfo["hasperms"]:
                info.mode = fileinfo["perms"]
            elif fileinfo["isdir"]:
                info.mode = 0o755
            if fileinfo["isdir"]:
                info.type = tarfile.DIRTYPE
                tar.addfile(info)
            elif fileinfo["islink"]:
                info.type = tarfile.SYMTYPE
                info.linkname = fileinfo["link"]
                tar.addfile(info)
            else:
                # TODO wow what a hack, adding a newline to match mkfile?
                filedata = name + "\n"
                info.size = len(filedata)
                tar.addfile(info, StringIO(filedata))

        tar.close()
def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('topic',
                        type=str,
                        help='Name of Kafka topic to listen to.')
    parser.add_argument('--group',
                        type=str,
                        help='Globally unique name of the consumer group. '
                        'Consumers in the same group will share messages '
                        '(i.e., only one consumer will receive a message, '
                        'as in a queue). Default is value of $HOSTNAME.')
    parser.add_argument('--tarName', type=str, help='Name of tar file.')
    avrogroup = parser.add_mutually_exclusive_group()
    avrogroup.add_argument('--decode',
                           dest='avroFlag',
                           action='store_true',
                           help='Decode from Avro format. (default)')
    avrogroup.add_argument('--decode-off',
                           dest='avroFlag',
                           action='store_false',
                           help='Do not decode from Avro format.')
    parser.set_defaults(avroFlag=True)

    args = parser.parse_args()

    # Configure consumer connection to Kafka broker
    conf = {
        'bootstrap.servers':
        'epyc.astro.washington.edu:9092,epyc.astro.washington.edu:9093,epyc.astro.washington.edu:9094',
        'default.topic.config': {
            'auto.offset.reset': 'smallest'
        }
    }
    if args.group:
        conf['group.id'] = args.group
    else:
        conf['group.id'] = os.environ['HOSTNAME']

    # Configure Avro reader schema
    schema_files = [
        "./ztf-avro-alert/schema/candidate.avsc",
        "./ztf-avro-alert/schema/cutout.avsc",
        "./ztf-avro-alert/schema/prv_candidate.avsc",
        "./ztf-avro-alert/schema/alert.avsc"
    ]

    # Start consumer and collect alerts in a stream
    with alertConsumer.AlertConsumer(args.topic, schema_files,
                                     **conf) as streamReader:

        with tarfile.open("./" + args.tarName + ".tar", "a") as tar:
            while True:

                try:
                    msg = streamReader.poll(decode=args.avroFlag)

                    if msg is None:

                        print('currenttime: ', int(strftime('%H')))
                        if (int(strftime('%H')) >= stopTIME):
                            print("break \n")

                            break
                        else:
                            print("continue \n")
                            continue

                    else:
                        for record in msg:

                            #record0 = msg_text(record)
                            candidate_data = record.get('candidate')
                            fn = str(candidate_data['candid']) + ".avro"

                            with io.BytesIO() as avro_file:
                                record0 = [record]
                                fastavro.writer(avro_file,
                                                (combineSchemas(schema_files)),
                                                record0)
                                avro_file.seek(0)
                                tarinfo = tarfile.TarInfo(name=fn)
                                tarinfo.size = len(avro_file.getvalue())
                                tarinfo.mtime = time.time()
                                tarinfo.mode = 0o744
                                tarinfo.type = tarfile.REGTYPE
                                tarfile.uid = tarfile.gid = 0
                                tarfile.unmae = tarfile.gname = "root"
                                tar.addfile(tarinfo, avro_file)

                            #print( "%s \t %8.9f \t %8.5f \t %8.5f \n" % \
                            #    (record.get('objectId'),candidate_data['jd'],candidate_data['ra'],candidate_data['dec']) )

                except alertConsumer.EopError as e:
                    # Write when reaching end of partition
                    sys.stderr.write(e.message)
                    #continue
                except IndexError:
                    sys.stderr.write('%% Data cannot be decoded\n')
                except UnicodeDecodeError:
                    sys.stderr.write('%% Unexpected data format received\n')
                except KeyboardInterrupt:
                    sys.stderr.write('%% Aborted by user\n')
                    break

            print('we reached the end of stream at: {}'.format(
                strftime("%b %d %Y %H:%M:%S")))

            sys.exit()