def test_memory_usage_coordinates():
    """
    Watch out for high memory usage on huge spatial files
    """

    ntf = tempfile.NamedTemporaryFile()

    tracemalloc.start()

    snap1 = tracemalloc.take_snapshot()

    # create a "flat" cube
    cube,_ = utilities.generate_gaussian_cube(shape=[1,2000,2000])
    sz = _.dtype.itemsize

    snap1b = tracemalloc.take_snapshot()
    diff = snap1b.compare_to(snap1, 'lineno')
    diffvals = np.array([dd.size_diff for dd in diff])
    # at this point, the generated cube should still exist in memory
    assert diffvals.max()*u.B >= 2000**2*sz*u.B

    del _
    snap2 = tracemalloc.take_snapshot()
    diff = snap2.compare_to(snap1b, 'lineno')
    assert diff[0].size_diff*u.B < -0.3*u.MB

    print(cube)

    # printing the cube should not occupy any more memory
    # (it will allocate a few bytes for the cache, but should *not*
    # load the full 2000x2000 coordinate arrays for RA, Dec
    snap3 = tracemalloc.take_snapshot()
    diff = snap3.compare_to(snap2, 'lineno')
    assert sum([dd.size_diff for dd in diff])*u.B < 100*u.kB
Beispiel #2
0
    def req_debug_handler(self):
        global mem_stat
        req = urlparse.urlparse(self.path).query
        reqs = urlparse.parse_qs(req, keep_blank_values=True)

        try:
            import tracemalloc
            import gc
            gc.collect()

            if not mem_stat or "reset" in reqs:
                mem_stat = tracemalloc.take_snapshot()

            snapshot = tracemalloc.take_snapshot()

            if "compare" in reqs:
                top_stats = snapshot.compare_to(mem_stat, 'traceback')
            else:
                top_stats = snapshot.statistics('traceback')

            python_lib = os.path.join(root_path, "python27")

            dat = ""
            for stat in top_stats[:100]:
                print("%s memory blocks: %.1f KiB" % (stat.count, stat.size / 1024))
                lines = stat.traceback.format()
                ll = "\n".join(lines)
                ln = len(lines)
                pl = ""
                for i in xrange(ln, 0, -1):
                    line = lines[i - 1]
                    print(line)
                    if line[8:].startswith(python_lib):
                        break
                    if not line.startswith("  File"):
                        pl = line
                        continue
                    if not line[8:].startswith(root_path):
                        break
                    ll = line[8:] + "\n" + pl

                if ll[0] == "[":
                    pass

                dat += "%d KB, count:%d %s\n" % (stat.size / 1024, stat.count, ll)

            if hasattr(threading, "_start_trace"):
                dat += "\n\nThread stat:\n"
                for path in threading._start_trace:
                    n = threading._start_trace[path]
                    if n <= 1:
                        continue
                    dat += "%s => %d\n\n" % (path, n)

            self.send_response("text/plain", dat)
        except Exception as e:
            xlog.exception("debug:%r", e)
            self.send_response("text/html", "no mem_top")
Beispiel #3
0
def main():
    # Parse command line options
    parser = argparse.ArgumentParser()
    parser.add_argument('input_file', help='input ninja file')
    parser.add_argument('--encoding', default='utf-8',
                        help='ninja file encoding')
    parser.add_argument('--ninja-deps', help='.ninja_deps file')
    args = parser.parse_args()

    if DEBUG_ALLOC:
        tracemalloc.start(25)
        tc_start = tracemalloc.take_snapshot()

    # Parse ninja file
    manifest = Parser().parse(args.input_file, args.encoding, args.ninja_deps)

    if DEBUG_ALLOC:
        tc_end = tracemalloc.take_snapshot()

    for rule in manifest.rules:
        print('rule', rule.name)

    for build in manifest.builds:
        print('build')
        for path in build.explicit_outs:
            print('  explicit_out:', path)
        for path in build.implicit_outs:
            print('  implicit_out:', path)
        for path in build.explicit_ins:
            print('  explicit_in:', path)
        for path in build.implicit_ins:
            print('  implicit_in:', path)
        for path in build.prerequisites:
            print('  prerequisites:', path)
        for path in build.depfile_implicit_ins:
            print('  depfile_implicit_in:', path)

    for pool in manifest.pools:
        print('pool', pool.name)

    for default in manifest.defaults:
        print('default')
        for path in default.outs:
            print('  out:', path)

    if DEBUG_ALLOC:
        top_stats = tc_end.compare_to(tc_start, 'traceback')
        with open('tracemalloc.log', 'w') as fp:
            for s in top_stats:
                print('', file=fp)
                print('========================================', file=fp)
                print(s, file=fp)
                for line in s.traceback.format():
                    print(line, file=fp)
Beispiel #4
0
 def measure_memory_diff(self, func):
     import tracemalloc
     tracemalloc.start()
     try:
         before = tracemalloc.take_snapshot()
         # Keep the result and only delete it after taking a snapshot
         res = func()
         after = tracemalloc.take_snapshot()
         del res
         return after.compare_to(before, 'lineno')
     finally:
         tracemalloc.stop()
Beispiel #5
0
def memory_obj(args: tuple, packet: ircp.Packet, ___: dict):
    """ Print the biggest memory hogs """
    if not _IS_TRACING:
        return packet.notice(
            "Sorry, but tracing is currently disabled. "
            'Please restart probot with the "PYTHONTRACEMALLOC=NFRAME" '
            "environment variable."
        )

    snapshot = tracemalloc.take_snapshot()
    top_stats = snapshot.statistics("filename")

    num = 0
    if len(args) >= 2:
        try:
            num = int(args[1])
        except ValueError:
            return packet.notice("Your argument must be an integer")
    else:
        return packet.notice("You must specify an object to inspect!")

    if len(top_stats) >= num:
        output = [packet.notice("Memory hog #{}".format(num))]
        obj = top_stats[num]
        trace = tracemalloc.get_object_traceback(obj)
        for line in trace:
            output.append(packet.notice(line))
        return output
    else:
        return packet.notice("Sorry, but that object does not exist")
 def flush_and_send(self, cursor = None, wal_end = None):
     if os.getenv("DEBUG_MODE") == "yes":
         snapshot = tracemalloc.take_snapshot()
         display_top(snapshot)
     if len(self.batch_msgs) > 0:
         comp_batch_msgs = zlib.compress(json.dumps(self.batch_msgs).encode('utf-8'))
         if os.getenv("DEBUG_MODE") == "yes" or os.getenv("ROW_LEVEL_MODE") == "yes":
             print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") + " : sending batch, length: " + str(len(self.batch_msgs)))
             print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") + " : uncompressed batch, length: " + str(len(json.dumps(self.batch_msgs))))
             print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") + " : compressed batch, length: " + str(len(comp_batch_msgs)))
         sending_not_successfull = True
         while sending_not_successfull:
             try:
                 requests.post(self.rest_api_endpoint, headers=self.headers, data=comp_batch_msgs)
                 sending_not_successfull = False
             except requests.exceptions.ConnectionError:
                 print("can't send POST Request. Retrying in 2 seconds ...")
                 time.sleep(2)
         if os.getenv("DEBUG_MODE") == "yes" or os.getenv("ROW_LEVEL_MODE") == "yes":
             print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") + " : sending batch successfull ...")
             
         self.batch_msgs.clear()
         
         if cursor is not None and wal_end is not None:
             try:
                 if os.getenv("DEBUG_MODE") == "yes" or os.getenv("ROW_LEVEL_MODE") == "yes":
                     print("syncing with pg server single")
                     cursor.send_replication_feedback(flush_lsn=wal_end)
             except AttributeError:
                 print("cannot sync wal_log, since no valuable cursor was provided ...")
         signal.alarm(TIMEOUT)
Beispiel #7
0
    def wrapper(*args, **kwargs):
        import sys
        do_prof, do_tracemalloc, do_traceopen = 3 * [False]
        if len(sys.argv) > 1:
            do_prof = sys.argv[1] == "prof"
            do_tracemalloc = sys.argv[1] == "tracemalloc"
            do_traceopen = sys.argv[1] == "traceopen"

        if do_prof or do_tracemalloc or do_traceopen: sys.argv.pop(1)

        if do_prof:
            print("Entering profiling mode...")
            import pstats, cProfile, tempfile
            prof_file = kwargs.pop("prof_file", None)
            if prof_file is None:
                _, prof_file = tempfile.mkstemp()
                print("Profiling data stored in %s" % prof_file)

            sortby = kwargs.pop("sortby", "time")
            cProfile.runctx("main()", globals(), locals(), prof_file)
            s = pstats.Stats(prof_file)
            s.strip_dirs().sort_stats(sortby).print_stats()
            return 0

        elif do_tracemalloc:
            print("Entering tracemalloc mode...")
            # Requires py3.4
            try:
                import tracemalloc
            except ImportError:
                print("Error while trying to import tracemalloc (requires py3.4)")
                raise SystemExit(1)

            tracemalloc.start()
            retcode = main(*args, **kwargs)
            snapshot = tracemalloc.take_snapshot()
            top_stats = snapshot.statistics('lineno')

            n = min(len(top_stats), 20)
            print("[Top %d]" % n)
            for stat in top_stats[:20]:
                print(stat)

        elif do_traceopen:
            try:
                import psutil
            except ImportError:
                print("traceopen requires psutil module")
                raise SystemExit(1)
            import os
            p = psutil.Process(os.getpid())
            retcode = main(*args, **kwargs)
            print("open_files", p.open_files())

        else:
            retcode = main(*args, **kwargs)

        return retcode
Beispiel #8
0
    def test(self):
        timings = []
        memory_usage = []
        tracemalloc.start()

        for i in range(self.repeat):
            before_memory = tracemalloc.take_snapshot()
            start_time = time.time()

            self.bench()

            end_time = time.time()
            after_memory = tracemalloc.take_snapshot()
            timings.append(end_time - start_time)
            memory_usage.append(sum([t.size for t in after_memory.compare_to(before_memory, 'filename')]))

        print("time min:", min(timings), "max:", max(timings), "avg:", sum(timings) / len(timings))  # NOQA
        print("memory min:", min(memory_usage), "max:", max(memory_usage), "avg:", sum(memory_usage) / len(memory_usage))  # NOQA
 def tracemalloc_tool():
     # .. cross-platform but but requires Python 3.4 or higher ..
     stat = next(filter(lambda item: str(item).startswith(filename),
                        tracemalloc.take_snapshot().statistics('filename')))
     mem = stat.size / _TWO_20
     if timestamps:
         return mem, time.time()
     else:
         return mem
Beispiel #10
0
def take_snapshot():
    snapshot = tracemalloc.take_snapshot()
    return snapshot.filter_traces(
        (
            tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
            tracemalloc.Filter(False, "tracemalloc"),
            tracemalloc.Filter(False, "<unknown>"),
        )
    )
Beispiel #11
0
def sigterm_handler(signal, frame):
    snapshot = tracemalloc.take_snapshot()
    top_stats = snapshot.statistics('lineno')
    if tracemallocfile:
        with open(tracemallocfile, 'w+') as f:
            f.write("SIGTERM tracemalloc before shutdown [ Top 10 ]\n")
            for stat in top_stats[:10]:
                f.write("{}\n".format(stat))
        sys.exit(0)
Beispiel #12
0
 def display_biggest_traceback():
     snapshot = tracemalloc.take_snapshot()
     top_stats = snapshot.statistics('traceback')
 
     # pick the biggest memory block
     stat = top_stats[0]
     print("%s memory blocks: %.1f KiB" % (stat.count, stat.size / 1024))
     for line in stat.traceback.format():
         print(line)
Beispiel #13
0
def take_snapshots():
    all_snapshots = []
    for loop in range(NGET_SNAPSHOT):
        objs = [alloc_object() for index in range(NOBJECTS)]
        snapshot = tracemalloc.take_snapshot()
        objs = None
        all_snapshots.append(snapshot)
        snapshots = None
    all_snapshots = None
Beispiel #14
0
    def test_snapshot_save_attr(self):
        # take a snapshot with a new attribute
        snapshot = tracemalloc.take_snapshot()
        snapshot.test_attr = "new"
        snapshot.dump(support.TESTFN)
        self.addCleanup(support.unlink, support.TESTFN)

        # load() should recreate the attribute
        snapshot2 = tracemalloc.Snapshot.load(support.TESTFN)
        self.assertEqual(snapshot2.test_attr, "new")
 def test_does_not_leak_too_much(self):
     tracemalloc.start()
     gc.collect()
     series = []
     snapshot1 = tracemalloc.take_snapshot()
     for i in range(100):
         try:
             execute_script(self.feature, self)
         except Exception:
             pass
         gc.collect()
         snapshot2 = tracemalloc.take_snapshot()
         stats = snapshot2.compare_to(snapshot1, "lineno")
         snapshot1 = snapshot2
         series.append(sum(stat.size / 1024 for stat in stats))
     tracemalloc.stop()
     series = series[1:]  # ignore first run, which creates regex
     cv = statistics.stdev(series) / statistics.mean(series)
     assert cv < 0.1
Beispiel #16
0
async def test_leak_in_transport(zipkin_url, client, loop):

    tracemalloc.start()

    endpoint = az.create_endpoint('simple_service')
    tracer = await az.create(zipkin_url, endpoint, sample_rate=1,
                             send_interval=0.0001, loop=loop)

    await asyncio.sleep(5)
    gc.collect()
    snapshot1 = tracemalloc.take_snapshot()

    await asyncio.sleep(10)
    gc.collect()
    snapshot2 = tracemalloc.take_snapshot()

    top_stats = snapshot2.compare_to(snapshot1, 'lineno')
    count = sum(s.count for s in top_stats)
    await tracer.close()
    assert count < 400  # in case of leak this number is around 901452
Beispiel #17
0
def _stop_memory_tracing():
    try:
        import tracemalloc
    except ImportError:
        return

    snapshot = tracemalloc.take_snapshot()

    _log_memory_top(snapshot)

    tracemalloc.stop()
Beispiel #18
0
def print_malloc_context(**kwargs):
    """
    :param \**kwargs: see print_malloc_snapshot
    """
    if tracemalloc is None:
        logger.error('tracemalloc required')
        return
    tracemalloc.start()
    yield
    snapshot = tracemalloc.take_snapshot()
    print_malloc_snapshot(snapshot, **kwargs)
Beispiel #19
0
    def test_snapshot(self):
        obj, source = allocate_bytes(123)

        # take a snapshot
        snapshot = tracemalloc.take_snapshot()

        # write on disk
        snapshot.dump(support.TESTFN)
        self.addCleanup(support.unlink, support.TESTFN)

        # load from disk
        snapshot2 = tracemalloc.Snapshot.load(support.TESTFN)
        self.assertEqual(snapshot2.traces, snapshot.traces)

        # tracemalloc must be tracing memory allocations to take a snapshot
        tracemalloc.stop()
        with self.assertRaises(RuntimeError) as cm:
            tracemalloc.take_snapshot()
        self.assertEqual(str(cm.exception),
                         "the tracemalloc module must be tracing memory "
                         "allocations to take a snapshot")
Beispiel #20
0
def dump_tracemalloc():
    """
    Dumps memory usage information to file
    """
    gc.collect()
    snapshot = tracemalloc.take_snapshot()

    output_file = PROFILING_OUTPUT_FMT % get_filename_fmt()
    with open(output_file, 'wb') as fp:
        cPickle.dump(snapshot, fp, 2)

    # Make sure the snapshot goes away
    snapshot = None
Beispiel #21
0
def tracemalloc_dump() -> None:
    if not tracemalloc.is_tracing():
        logger.warning("pid {}: tracemalloc off, nothing to dump"
                       .format(os.getpid()))
        return
    # Despite our name for it, `timezone_now` always deals in UTC.
    basename = "snap.{}.{}".format(os.getpid(),
                                   timezone_now().strftime("%F-%T"))
    path = os.path.join(settings.TRACEMALLOC_DUMP_DIR, basename)
    os.makedirs(settings.TRACEMALLOC_DUMP_DIR, exist_ok=True)

    gc.collect()
    tracemalloc.take_snapshot().dump(path)

    procstat = open('/proc/{}/stat'.format(os.getpid()), 'rb').read().split()
    rss_pages = int(procstat[23])
    logger.info("tracemalloc dump: tracing {} MiB ({} MiB peak), using {} MiB; rss {} MiB; dumped {}"
                .format(tracemalloc.get_traced_memory()[0] // 1048576,
                        tracemalloc.get_traced_memory()[1] // 1048576,
                        tracemalloc.get_tracemalloc_memory() // 1048576,
                        rss_pages // 256,
                        basename))
    def stop_profiler(self):
        self.agent.log('Deactivating memory allocation profiler.')

        with self.profile_lock:
            if self.overhead_monitor:
                self.overhead_monitor.cancel()
                self.overhead_monitor = None

            if tracemalloc.is_tracing():
                snapshot = tracemalloc.take_snapshot()
                self.agent.log('Allocation profiler memory overhead {0} bytes'.format(tracemalloc.get_tracemalloc_memory()))
                tracemalloc.stop()
                self.process_snapshot(snapshot, time.time() - self.start_ts)
Beispiel #23
0
def showMemoryTrace():
    try:
        import tracemalloc  # @UnresolvedImport
    except ImportError:
        pass
    else:
        snapshot = tracemalloc.take_snapshot()
        stats = snapshot.statistics("lineno")

        printLine("Top 50 memory allocations:")
        for count, stat in enumerate(stats):
            if count == 50:
                break
            printLine(stat)
Beispiel #24
0
def trace_print():
    global snapshot
    snapshot2 = tracemalloc.take_snapshot()
    snapshot2 = snapshot2.fileter_traces(
        tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
        tracemalloc.Filter(False, "<unknown>"),
        tracemalloc.Filter(False, tracemalloc.__file__)
    )

    if snapshot2 is not None:
        print("="*10, " Begin Trace:")
        top_stats = snapshot2.compare_to(snapshot, "lineno", cumulative=True)
        for stat in top_stats[:10]:
            print(stat)
    snapshot = snapshot2
Beispiel #25
0
def memprofile():
    import resource
    import tracemalloc

    tracemalloc.start()

    ast = parse_file('/tmp/197.c')

    print('Memory usage: %s (kb)' %
            resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)

    snapshot = tracemalloc.take_snapshot()
    print("[ tracemalloc stats ]")
    for stat in snapshot.statistics('lineno')[:20]:
        print(stat)
 def take_snapshot(self):
     filename = (filename_pattern
                 % (os.getpid(), self.counter))
     t0 = time.time()
     print("Write snapshot into %s..." % filename, file=sys.__stderr__)
     gc.collect()
     snapshot = tracemalloc.take_snapshot()
     with open(filename, "wb") as fp:
         # Pickle version 2 can be read by Python 2 and Python 3
         pickle.dump(snapshot, fp, 2)
     snapshot = None
     dt = time.time() - t0
     print("Snapshot written into %s (%.1f sec)" % (filename, dt),
           file=sys.__stderr__)
     self.counter += 1
def snapshot(filename=None):
    with _lock:
        if not is_running():
            logging.error('Memory profiler must be running '
                          'to take snapshots')
            return

        name = filename or _make_snapshot_name()
        gc.collect()
        snap = tracemalloc.take_snapshot()
        with gzip.open(name + '.gz', 'wb') as fp:
            # Pickle version 2 can be read by Python 2 and Python 3
            pickle.dump(snap, fp, 2)
            snap = None
        return name
Beispiel #28
0
    def test_create_snapshot(self):
        raw_traces = [(5, (("a.py", 2),))]

        with contextlib.ExitStack() as stack:
            stack.enter_context(patch.object(tracemalloc, "is_tracing", return_value=True))
            stack.enter_context(patch.object(tracemalloc, "get_traceback_limit", return_value=5))
            stack.enter_context(patch.object(tracemalloc, "_get_traces", return_value=raw_traces))

            snapshot = tracemalloc.take_snapshot()
            self.assertEqual(snapshot.traceback_limit, 5)
            self.assertEqual(len(snapshot.traces), 1)
            trace = snapshot.traces[0]
            self.assertEqual(trace.size, 5)
            self.assertEqual(len(trace.traceback), 1)
            self.assertEqual(trace.traceback[0].filename, "a.py")
            self.assertEqual(trace.traceback[0].lineno, 2)
    def trace(self, signum, frame):  # pylint: disable=unused-argument
        """ Signal handler used to take snapshots of the running process. """

        # the last pending signal after trace_stop
        if not self.profiling:
            return

        gc.collect()

        snapshot = tracemalloc.take_snapshot()
        timestamp = time.time()
        sample_data = (timestamp, snapshot)

        # *Must* use the HIGHEST_PROTOCOL, otherwise the serialization will
        # use GBs of memory
        pickle.dump(sample_data, self.trace_stream, protocol=pickle.HIGHEST_PROTOCOL)
        self.trace_stream.flush()
Beispiel #30
0
 def run(self):
     if hasattr(signal, 'pthread_sigmask'):
         # Available on UNIX with Python 3.3+
         signal.pthread_sigmask(signal.SIG_BLOCK, range(1, signal.NSIG))
     while True:
         logger.debug('Sleeping {0} secongs...'.format(self.interval))
         time.sleep(self.interval)
         filename = ("/tmp/tracemalloc-%d-%04d.dump"
                     % (os.getpid(), self.counter))
         logger.info("Write snapshot into %s..." % filename)
         gc.collect()
         snapshot = tracemalloc.take_snapshot()
         snapshot.dump(filename )
         self.snapshot_q.put(filename)
         logger.debug('Queue size: {0}'.format(self.snapshot_q.qsize()))
         snapshot = None
         logger.info("Snapshot written into %s" % filename)
         self.counter += 1
def _run(**kwargs):
    # Make sure that, if one worker crashes, the entire MPI process is aborted
    def handle_exception(exc_type, exc_value, exc_traceback):
        sys.__excepthook__(exc_type, exc_value, exc_traceback)
        sys.stderr.flush()
        if hvd.size() > 1:
            mpi.COMM_WORLD.Abort(1)

    sys.excepthook = handle_exception

    track_memory = kwargs['trace_memory']
    disable_logging = bool(kwargs['disable_logging'])
    warm_up_cycles = kwargs['warm_up_cycles']
    log_after_warm_up = kwargs['log_after_warm_up']
    screenshot_merge = kwargs['screenshot_merge']
    clear_checkpoints = list(
        filter(None, kwargs['clear_checkpoints'].split(':')))

    if 'all' in clear_checkpoints:
        clear_checkpoints = CHECKPOINT_ABBREVIATIONS.keys()

    if track_memory:
        tracemalloc.start(25)

    kwargs = del_out_of_setup_args(kwargs)
    expl, log_par = setup(**kwargs)
    local_logger.info('setup done')

    # We only need one MPI worker to log the results
    local_logger.info('Initializing logger')
    logger = None
    traj_logger = None
    if hvd.rank() == 0 and not disable_logging:
        logger = SimpleLogger(log_par.base_path + '/log.txt')
        traj_logger = SimpleLogger(log_par.base_path + '/traj_log.txt')

    ########################
    # START THE EXPERIMENT #
    ########################

    local_logger.info('Starting experiment')
    checkpoint_tracker = CheckpointTracker(log_par, expl)
    prev_checkpoint = None
    merged_dict = {}
    sil_trajectories = []
    if screenshot_merge[0:9] == 'from_dir:':
        screen_shot_dir = screenshot_merge[9:]
    else:
        screen_shot_dir = f'{log_par.base_path}/screen_shots'

    local_logger.info('Initiate cycle')
    expl.init_cycle()
    local_logger.info('Initiating Cycle done')

    if kwargs['expl_state'] is not None:
        local_logger.info('Performing warm up cycles...')
        expl.start_warm_up()
        for i in range(warm_up_cycles):
            if hvd.rank() == 0:
                local_logger.info(f'Running warm up cycle: {i}')
            expl.run_cycle()
        expl.end_warm_up()
        checkpoint_tracker.n_iters = expl.cycles
        checkpoint_tracker.log_warmup = log_after_warm_up
        local_logger.info('Performing warm up cycles... done')

    while checkpoint_tracker.should_continue():
        # Run one iteration
        if hvd.rank() == 0:
            local_logger.info(f'Running cycle: {checkpoint_tracker.n_iters}')

        checkpoint_tracker.pre_cycle()
        expl.run_cycle()
        checkpoint_tracker.post_cycle()

        write_checkpoint = None
        if hvd.rank() == 0:
            write_checkpoint = checkpoint_tracker.calc_write_checkpoint()
        write_checkpoint = mpi.get_comm_world().bcast(write_checkpoint, root=0)
        checkpoint_tracker.set_should_write_checkpoint(write_checkpoint)

        # Code that should be executed by all workers at a checkpoint generation
        if checkpoint_tracker.should_write_checkpoint():
            local_logger.debug(
                f'Rank: {hvd.rank()} is exchanging screenshots for checkpoint: {expl.frames_compute}'
            )
            screenshots = expl.trajectory_gatherer.env.recursive_getattr(
                'rooms')
            if screenshot_merge == 'mpi':
                screenshots = flatten_lists(
                    mpi.COMM_WORLD.allgather(screenshots))
            merged_dict = {}
            for screenshot_dict in screenshots:
                for key, value in screenshot_dict.items():
                    if key not in merged_dict:
                        merged_dict[key] = value
                    else:
                        after_threshold_screenshot_taken_merged = merged_dict[
                            key][0]
                        after_threshold_screenshot_taken_current = screenshot_dict[
                            key][0]
                        if after_threshold_screenshot_taken_current and not after_threshold_screenshot_taken_merged:
                            merged_dict[key] = value

            if screenshot_merge == 'disk':
                for key, value in merged_dict.items():
                    filename = f'{screen_shot_dir}/{key}_{hvd.rank()}.png'
                    os.makedirs(screen_shot_dir, exist_ok=True)
                    if not os.path.isfile(filename):
                        im = Image.fromarray(value[1])
                        im.save(filename)
                        im_array = imageio.imread(filename)
                        assert (im_array == value[1]).all()

                mpi.COMM_WORLD.barrier()

            local_logger.debug('Merging SIL trajectories')
            sil_trajectories = [expl.prev_selected_traj]
            if hvd.size() > 1:
                sil_trajectories = flatten_lists(
                    mpi.COMM_WORLD.allgather(sil_trajectories))
            local_logger.debug(
                f'Rank: {hvd.rank()} is done merging trajectories for checkpoint: {expl.frames_compute}'
            )

            expl.sync_before_checkpoint()
            local_logger.debug(
                f'Rank: {hvd.rank()} is done synchronizing for checkpoint: {expl.frames_compute}'
            )

        # Code that should be executed only by the master
        if hvd.rank() == 0 and not disable_logging:
            gatherer = expl.trajectory_gatherer
            return_success_rate = -1
            if gatherer.nb_return_goals_chosen > 0:
                return_success_rate = gatherer.nb_return_goals_reached / gatherer.nb_return_goals_chosen
            exploration_success_rate = -1
            if gatherer.nb_exploration_goals_chosen > 0:
                exploration_success_rate = gatherer.nb_exploration_goals_reached / gatherer.nb_exploration_goals_chosen

            cum_success_rate = 0
            for reached in expl.archive.cells_reached_dict.values():
                success_rate = sum(reached) / len(reached)
                cum_success_rate += success_rate
            mean_success_rate = cum_success_rate / len(expl.archive.archive)

            logger.write('it', checkpoint_tracker.n_iters)
            logger.write('score', expl.archive.max_score)
            logger.write('cells', len(expl.archive.archive))
            logger.write('ret_suc', return_success_rate)
            logger.write('exp_suc', exploration_success_rate)
            logger.write('rew_mean', gatherer.reward_mean)
            logger.write('len_mean', gatherer.length_mean)
            logger.write('ep', gatherer.nb_of_episodes)
            logger.write('arch_suc', mean_success_rate)
            logger.write('cum_suc', cum_success_rate)
            logger.write('frames', expl.frames_compute)

            if len(gatherer.loss_values) > 0:
                loss_values = np.mean(gatherer.loss_values, axis=0)
                assert len(loss_values) == len(gatherer.model.loss_names)
                for (loss_value, loss_name) in zip(loss_values,
                                                   gatherer.model.loss_names):
                    logger.write(loss_name, loss_value)

            stored_frames = 0
            for traj in expl.archive.cell_trajectory_manager.full_trajectories.values(
            ):
                stored_frames += len(traj)

            logger.write('sil_frames', stored_frames)

            nb_no_score_cells = len(expl.archive.archive)
            for weight in expl.archive.cell_selector.selector_weights:
                if hasattr(weight, 'max_score_dict'):
                    nb_no_score_cells = len(weight.max_score_dict)
            logger.write('no_score_cells', nb_no_score_cells)

            cells_found_ret = 0
            cells_found_rand = 0
            cells_found_policy = 0
            for cell_key in expl.archive.archive:
                cell_info = expl.archive.archive[cell_key]
                if cell_info.ret_discovered == global_const.EXP_STRAT_NONE:
                    cells_found_ret += 1
                elif cell_info.ret_discovered == global_const.EXP_STRAT_RAND:
                    cells_found_rand += 1
                elif cell_info.ret_discovered == global_const.EXP_STRAT_POLICY:
                    cells_found_policy += 1

            logger.write('cells_found_ret', cells_found_ret)
            logger.write('cells_found_rand', cells_found_rand)
            logger.write('cells_found_policy', cells_found_policy)
            logger.flush()

            traj_manager = expl.archive.cell_trajectory_manager
            new_trajectories = sorted(
                traj_manager.new_trajectories,
                key=lambda t: traj_manager.cell_trajectories[t].frame_finished)
            for traj_id in new_trajectories:
                traj_info = traj_manager.cell_trajectories[traj_id]
                traj_logger.write('it', checkpoint_tracker.n_iters)
                traj_logger.write('frame', traj_info.frame_finished)
                traj_logger.write('exp_strat', traj_info.exp_strat)
                traj_logger.write('exp_new_cells', traj_info.exp_new_cells)
                traj_logger.write('ret_new_cells', traj_info.ret_new_cells)
                traj_logger.write('score', traj_info.score)
                traj_logger.write('total_actions', traj_info.total_actions)
                traj_logger.write('id', traj_info.id)
                traj_logger.flush()

            # Code that should be executed by only the master at a checkpoint generation
            if checkpoint_tracker.should_write_checkpoint():
                local_logger.info(
                    f'Rank: {hvd.rank()} is writing checkpoint: {expl.frames_compute}'
                )
                filename = f'{log_par.base_path}/{expl.frames_compute:0{log_par.n_digits}}'

                # Save pictures
                if len(log_par.save_pictures) > 0:
                    if screenshot_merge == 'disk':
                        for file_name in os.listdir(screen_shot_dir):
                            if file_name.endswith('.png'):
                                room = int(file_name.split('_')[0])
                                if room not in merged_dict:
                                    screen_shot = imageio.imread(
                                        f'{screen_shot_dir}/{file_name}')
                                    merged_dict[room] = (True, screen_shot)

                    elif screenshot_merge[0:9] == 'from_dir:':
                        for file_name in os.listdir(screen_shot_dir):
                            if file_name.endswith('.png'):
                                room = int(file_name.split('.')[0])
                                if room not in merged_dict:
                                    screen_shot = imageio.imread(
                                        f'{screen_shot_dir}/{file_name}')
                                    merged_dict[room] = (True, screen_shot)

                    render_pictures(log_par, expl, filename, prev_checkpoint,
                                    merged_dict, sil_trajectories)

                # Save archive state
                if log_par.save_archive:
                    save_state(expl.get_state(), filename + ARCHIVE_POSTFIX)
                    expl.archive.cell_trajectory_manager.dump(filename +
                                                              TRAJ_POSTFIX)

                # Save model
                if log_par.save_model:
                    expl.trajectory_gatherer.save_model(filename +
                                                        MODEL_POSTFIX)

                # Clean up previous checkpoint.
                if prev_checkpoint:
                    for checkpoint_type in clear_checkpoints:
                        if checkpoint_type in CHECKPOINT_ABBREVIATIONS:
                            postfix = CHECKPOINT_ABBREVIATIONS[checkpoint_type]
                        else:
                            postfix = checkpoint_type
                        with contextlib.suppress(FileNotFoundError):
                            local_logger.debug(
                                f'Removing old checkpoint: {prev_checkpoint + postfix}'
                            )
                            os.remove(prev_checkpoint + postfix)
                prev_checkpoint = filename

                if track_memory:
                    snapshot = tracemalloc.take_snapshot()
                    display_top(snapshot)

                if PROFILER:
                    local_logger.info(
                        f'ITERATION: {checkpoint_tracker.n_iters}')
                    PROFILER.disable()
                    PROFILER.dump_stats(filename + '.stats')
                    PROFILER.enable()

    local_logger.info(f'Rank {hvd.rank()} finished experiment')
    mpi.get_comm_world().barrier()
Beispiel #32
0
tracemalloc.start()
test2()
# print('可回收对象', gc.DEBUG_COLLECTABLE)
# print('不可回收对象', gc.DEBUG_UNCOLLECTABLE)
gc.set_debug(gc.DEBUG_UNCOLLECTABLE)
print(gc.garbage)
# 返回函数的16进制地址
print(hex(id(test2)))
# 返回所有被gc管理的对象
print(len(gc.get_objects()))
# 返回test2直接指向的对象
print(gc.get_referents(test2))
# 返回指向test2的对象
print(gc.get_referrers(test2))

snapshot = tracemalloc.take_snapshot()
top_stats = snapshot.statistics('lineno')

print("[ Top 10 ]")
for stat in top_stats[:10]:
    print(stat)
# gc.disable() # 这里是否disable事实上无所谓
# gc.set_debug(gc.DEBUG_COLLECTABLE)
# for _ in range(1):
#     test2()
# print(gc.garbage)
# gc.collect()
print("**********")
# time.sleep(5)

def getMemory():
    """
    Retorna la memoria alocada en un instante de tiempo
    """
    return tracemalloc.take_snapshot()
Beispiel #34
0
 def get_traced_memory(self):
     # Get the traced size in the domain
     snapshot = tracemalloc.take_snapshot()
     domain_filter = tracemalloc.DomainFilter(True, self.domain)
     snapshot = snapshot.filter_traces([domain_filter])
     return sum(trace.size for trace in snapshot.traces)
def run_SVMreject(trainname, testname, n):

    trainDataPath = '/albona/nobackup/biostat/datasets/singlecell/tabulaMuris_benchmark/' + trainname + '.csv'
    trainLabelsPath = '/albona/nobackup/biostat/datasets/singlecell/tabulaMuris_benchmark/' + trainname + '_label.csv'

    testDataPath = '/albona/nobackup/biostat/datasets/singlecell/tabulaMuris_benchmark/' + testname + '.csv'
    testLabelsPath = '/albona/nobackup/biostat/datasets/singlecell/tabulaMuris_benchmark/' + testname + '_label.csv'

    # read the data
    train = pd.read_csv(trainDataPath, index_col=0, sep=',')
    test = pd.read_csv(testDataPath, index_col=0, sep=',')

    y_train = pd.read_csv(trainLabelsPath, header=0, index_col=0, sep=',')
    y_train = y_train['x'].ravel()
    y_test = pd.read_csv(testLabelsPath, header=0, index_col=0, sep=',')
    y_test = y_test['x'].ravel()

    truelab = []
    pred = []

    train = train.transpose()
    test = test.transpose()

    now = time.time()
    tracemalloc.start()

    Classifier = LinearSVC()
    clf = CalibratedClassifierCV(Classifier)

    clf.fit(train, y_train)

    snapshot = tracemalloc.take_snapshot()
    mem_train = display_top(snapshot)

    later = time.time()
    time_train = int(later - now)

    now = time.time()
    tracemalloc.start()

    predicted = clf.predict(test)

    snapshot = tracemalloc.take_snapshot()
    mem_test = display_top(snapshot)

    later = time.time()
    time_test = int(later - now)

    prob = np.max(clf.predict_proba(test), axis=1)

    unlabeled = np.where(prob < 0.7)
    predicted[unlabeled] = 'Unassigned'

    truelab = y_test
    pred = predicted

    truelab = pd.DataFrame(truelab)
    pred = pd.DataFrame(pred)

    os.chdir("/dora/nobackup/yuec/scclassify/benchmark/SVMreject/vary_test")

    truelab.to_csv(n + "_SVMreject_true.csv", index=False)
    pred.to_csv(n + "_SVMreject_pred.csv", index=False)

    return mem_train, time_train, mem_test, time_test
# > Traceback where an object was allocated
# > Statistics on allocated memory blocks per filename and per line number: total size, number and average size of allocated memory blocks
# > Compute the differences between two snapshots to detect memory leaks
# To trace most memory blocks allocated by Python, the module should be started as early as possible by setting the PYTHONTRACEMALLOC environment
# variable to 1, or by using -X tracemalloc command line option. The tracemalloc.start() function can be called at runtime to start tracing Python
# memory allocations.
#
# Compute differences.
#
# Take two snapshots and display the differences:
#

import tracemalloc

tracemalloc.start()

# ... start your application ...

snapshot1 = tracemalloc.take_snapshot()

# ... call the function leaking memory ...

snapshot2 = tracemalloc.take_snapshot()

top_stats = snapshot2.compare_to(snapshot1, 'lineno')

print("[ Top 10 differences ]")

for stat in top_stats[:10]:
    print(stat)
Beispiel #37
0
def get_allocated_memory():
    gc.collect()
    snapshot = tracemalloc.take_snapshot()
    current = snapshot.statistics('filename')
    return sum(stat.size for stat in current) / (2**20)
Beispiel #38
0
 def disable(self):
     """
     Stop profiling calls.
     """
     self.snapshot = tracemalloc.take_snapshot()
def memorytrace_print():
    global tracemalloc_start
    snapshot = tracemalloc.take_snapshot()
    top_stats = snapshot.compare_to(tracemalloc_start, "lineno")
    for stat in top_stats[:10]:
        print(stat)
def memorytrace_start():
    global tracemalloc_start
    if tracemalloc_start == None:
        print("Begin memorytrace")
        tracemalloc.start()
        tracemalloc_start = tracemalloc.take_snapshot()
Beispiel #41
0
        for ds in data_from.get('ds',[]):
            ds['exec_stat']='0:未开始'
    for one in config_data.get('vars',[]):
        one['exec_stat']='0:未开始'
    for one in config_data.get('template_output_act',[]):
        one['exec_stat']='0:未开始'
    for one in config_data.get('text_tpls',[]):
        one['exec_stat']='0:未开始'


if __name__ == '__main__':
    import glb
    import objgraph  
    import gc,tracemalloc
    tracemalloc.start()
    b_snapshot = tracemalloc.take_snapshot()
    for i in range(10):
        with glb.db_connect() as conn:
            with conn.cursor(as_dict=True) as cursor:
                cursor.execute("SELECT * FROM zhanbao_tbl WHERE  id=4274 order by id asc")
                row = cursor.fetchone()
                while row:
                    b1_snapshot = tracemalloc.take_snapshot()
                    try:
                        print('worker_no:'+ row['worker_no']+"\t"+ str(row['id']) +"     "+ str(tracemalloc.get_traced_memory()))
                        files_template_exec(row['id'],json.loads(row['config_txt']),row['worker_no'],glb.config['UPLOAD_FOLDER'] ,wx_queue=glb.msg_queue)  
                        print("====================================")
                        print('worker_no:'+ row['worker_no']+"\t"+ str(row['id']) +"     "+ str(tracemalloc.get_traced_memory()))
                        print("====================================")
                        snapshot2 = tracemalloc.take_snapshot()
                        top_stats = snapshot2.compare_to(b1_snapshot, 'lineno')
Beispiel #42
0
depending on a threshold: 128 KB by default. The threshold is dynamic nowadays.
Use mallopt(M_MMAP_THRESHOLD, nbytes) to change this threshold.

See also:

* http://pushingtheweb.com/2010/06/python-and-tcmalloc/
* http://sourceware.org/ml/libc-alpha/2006-03/msg00033.html
* http://www.linuxdevcenter.com/pub/a/linux/2006/11/30/linux-out-of-memory.html?page=2
* http://cloudfundoo.wordpress.com/2012/05/18/minor-page-faults-and-dynamic-memory-allocation-in-linux/
"""
import gc
import sys
import tracemalloc

tracemalloc.start()
previous_snapshot = tracemalloc.take_snapshot()


def dump_memory():
    if 1:
        global previous_snapshot

        snapshot = tracemalloc.take_snapshot()
        exclude = tracemalloc.Filter(False, tracemalloc.__file__)
        snapshot = snapshot.filter_traces([exclude])
        top_stats = snapshot.compare_to(previous_snapshot, 'lineno')
        previous_snapshot = snapshot

        print("[ Top 10 ]")
        for stat in top_stats[:10]:
            print(stat)
Beispiel #43
0
def main(argv):
    global csv_output_filename
    start1 = datetime.datetime.now()

    try:
        opts, args = getopt.getopt(argv, "ha:n:s:t:v:p:i:", [
            "agent=", "numflows=", "flowsize=", "timesteps=", "interval=",
            "proportion=", "iter="
        ])
    except getopt.GetoptError:
        print(
            'run-experiments.py -a <agent> -n <numflows> -s <flowsize> -t <timesteps> -v <interval> -p <proportion> -i <iter>'
        )
        sys.exit(2)

    agent = None
    num_flows = None
    flows_size = None
    timesteps = None
    interval = None
    iter = None
    proportion = None

    for opt, arg in opts:
        if opt == '-h':
            print(
                'run-experiments.py -a <agent> -n <numflows> -s <flowsize> -t <timesteps> -v <interval> -p <proportion> -i <iter>'
            )
            sys.exit()
        elif opt in ("-a", "--agent"):
            agent = arg
        elif opt in ("-n", "--numflows"):
            num_flows = arg
        elif opt in ("-s", "--flowsize"):
            flows_size = arg
        elif opt in ("-t", "--timesteps"):
            timesteps = arg
        elif opt in ("-i", "--iter"):
            iter = arg
        elif opt in ("-v", "--interval"):
            interval = arg
        elif opt in ("-p", "--proportion"):
            proportion = arg

    csv_output_filename = './output-experiments-app/{0}-{1}_flows-{2}-{3}_steps-{4}_sec-prop_{5}-v_{6}.csv'.format(
        agent, num_flows, flows_size, timesteps, interval, proportion, iter)

    print(
        'Running: agent = {0}, number of flows = {1}, flows size = {2}, timesteps = {3}, interval = {4}, proportion = {5}, iter = {6}'
        .format(agent, num_flows, flows_size, timesteps, interval, proportion,
                iter))

    tracemalloc.start()
    start_time = datetime.datetime.now()

    if agent == 'F-HET':
        wait_time = int(timesteps) * 2
        print('wait_time = ', wait_time)
        time.sleep(wait_time)
    elif agent == 'F' or agent == 'F2':
        flow_size_bits = int(flows_size.strip('M')) * 8
        wait_time = (int(num_flows) * flow_size_bits / 10) * 4
        print('wait_time = ', wait_time)
        time.sleep(wait_time)
    else:
        env, original_env = createVectorizedEnv()

        # trainLookAheadAgent(env, agent)
        testAgent(env, original_env, agent, timesteps)

        time_interval = datetime.datetime.now() - start_time
        snapshot = tracemalloc.take_snapshot()
        memory_usage = getTopMemoryUsage(snapshot)

        output_filename_compcosts = './output-experiments-app/{0}-{1}_flows-{2}-{3}steps-{4}_sec-prop_{5}-v_{6}-compcosts.txt'.format(
            agent, num_flows, flows_size, timesteps, interval, proportion,
            iter)

        with open(output_filename_compcosts, 'w+') as output_file:
            output_file.write("%s\n" % time_interval)
            output_file.write("%s\n" % memory_usage)

        print('Arquivo {0} criado.'.format(output_filename_compcosts))
Beispiel #44
0
def run_moana(trainname, testname, n):

    
    
    DataPath  = '/albona/nobackup/biostat/datasets/singlecell/tabulaMuris_benchmark/'+trainname+'.csv'  
    matrix = ExpMatrix.read_tsv(DataPath, sep = ',')    
    LabelsPath = '/albona/nobackup/biostat/datasets/singlecell/tabulaMuris_benchmark/'+trainname+'_label.csv'
    truelab = pd.read_csv(LabelsPath, header=0,index_col=0, sep=',')
    data = ExpMatrix(X = matrix.X, genes = matrix.genes, cells = matrix.cells)
    
    data.genes.name = 'Genes'
    data.cells.name = 'Cells'
    data.index.name = 'Genes'
    data.columns.name = 'Cells'
    
    l = CellAnnVector(cells=data.cells, data=truelab['x'].values)
    
    
    
    
    now = time.time()
    
    tracemalloc.start() 
    clf = CellTypeClassifier()
    clf.fit(matrix = data, cell_labels = l)
    snapshot = tracemalloc.take_snapshot()
    mem_train = display_top(snapshot)
    
    later = time.time()
    time_train = int(later - now)
  

    
    DataPath  = '/albona/nobackup/biostat/datasets/singlecell/tabulaMuris_benchmark/'+testname+'.csv'  
    matrix = ExpMatrix.read_tsv(DataPath, sep = ',') 
    data = ExpMatrix(X = matrix.X, genes = matrix.genes, cells = matrix.cells)
    data.genes.name = 'Genes'
    data.cells.name = 'Cells'
    data.index.name = 'Genes'
    data.columns.name = 'Cells'
        
    
    
    now = time.time()
    
    tracemalloc.start() 
    predictions = clf.predict(data) 
    snapshot = tracemalloc.take_snapshot()
    mem_test = display_top(snapshot)
    
    later = time.time()
    time_test = int(later - now)
  

    
    
    
    predictions = np.asarray(predictions)
    pred = pd.DataFrame(predictions)
        
        
    LabelsPath = '/albona/nobackup/biostat/datasets/singlecell/tabulaMuris_benchmark/'+testname+'_label.csv'
    truelab =  pd.read_csv(LabelsPath, header=0,index_col=None, sep=',')
    
    os.chdir("/dora/nobackup/yuec/scclassify/benchmark/moanna/vary_celltype")
       
        
    truelab.to_csv(n + "_moana_True.csv", index = False)
    pred.to_csv( n  + "_moana_Pred.csv", index = False)
    
      
    return mem_train, time_train, mem_test, time_test
Beispiel #45
0
#!/usr/bin/env PYTHONHASHSEED=1234 python3

# Copyright 2014-2019 Brett Slatkin, Pearson Education Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import tracemalloc

tracemalloc.start(10)  # Set stack depth
time1 = tracemalloc.take_snapshot()  # Before snapshot

import waste_memory

x = waste_memory.run()  # Usage to debug
time2 = tracemalloc.take_snapshot()  # After snapshot

stats = time2.compare_to(time1, 'lineno')  # Compare snapshots
for stat in stats[:3]:
    print(stat)
Beispiel #46
0
def assignment_b_suffixarray_2():

    # For testing.
    class TestNormalizer(Normalizer):

        _table = str.maketrans({'Ø': 'O'})

        def canonicalize(self, buffer: str) -> str:
            return buffer

        def normalize(self, token: str) -> str:
            return token.upper().translate(self._table)

    # For testing.
    class TestDocument(Document):

        def __init__(self, document_id: int, a: str, b: str):
            self._document_id = document_id
            self._a = a
            self._b = b

        def get_document_id(self) -> int:
            return self._document_id

        def get_field(self, field_name: str, default: str) -> str:
            if field_name == "a":
                return self._a
            if field_name == "b":
                return self._b
            return default

    # For testing.
    class TestCorpus(Corpus):
        def __init__(self):
            self._docs = []
            self._docs.append(TestDocument(len(self._docs), "ø  o\n\n\nø\n\no", "ø o\nø   \no"))
            self._docs.append(TestDocument(len(self._docs), "ba", "b bab"))
            self._docs.append(TestDocument(len(self._docs), "ø  o Ø o", "ø o"))
            self._docs.append(TestDocument(len(self._docs), "øO" * 10000, "o"))
            self._docs.append(TestDocument(len(self._docs), "cbab o øbab Ø ", "ø o " * 10000))

        def __iter__(self):
            return iter(self._docs)

        def size(self) -> int:
            return len(self._docs)

        def get_document(self, document_id: int) -> Document:
            return self._docs[document_id]

    # Run the tests!
    for fields in [("b",), ("a", "b")]:

        # Create the suffix array over the given set of fields. Measure memory usage. If memory usage is
        # excessive, most likely the implementation is copying strings or doing other silly stuff instead
        # of working with buffer indices. The naive reference implementation is not in any way optimized,
        # and uses about 1.5 MB of memory on this corpus.
        tracemalloc.start()
        snapshot1 = tracemalloc.take_snapshot()
        engine = SuffixArray(TestCorpus(), fields, TestNormalizer(), BrainDeadTokenizer())
        snapshot2 = tracemalloc.take_snapshot()
        for statistic in snapshot2.compare_to(snapshot1, "filename"):
            if statistic.traceback[0].filename == inspect.getfile(SuffixArray):
                assert statistic.size_diff < 2000000, f"Memory usage is {statistic.size_diff}"
        tracemalloc.stop()
        results = []

        def process(m):
            results.append((m['document'].document_id, m['score']))

        expected_results = {
            ('b',): (
                ('bab', [(1, 1)]),
                ('ø o', [(4, 19999), (0, 3), (2, 1)]),
                ('o O', [(4, 19999), (0, 3), (2, 1)]),
                ('oooooo', []),
                ('o o o o', [(4, 19997), (0, 1)]),
            ),
            ('a', 'b'): (
                ('bab', [(1, 1)]),
                ('ø o', [(4, 20000), (0, 6), (2, 4)]),
                ('o O', [(4, 20000), (0, 6), (2, 4)]),
                ('oøØOøO', [(3, 1), ]),
                ('o o o o', [(4, 19997), (0, 2), (2, 1)]),
            )
        }

        for query, expected in expected_results[fields]:
            results.clear()
            engine.evaluate(query, {'hit_count': 10}, process)
            assert results == expected
Beispiel #47
0
def get_allocated_khash_memory():
    snapshot = tracemalloc.take_snapshot()
    snapshot = snapshot.filter_traces(
        (tracemalloc.DomainFilter(True, ht.get_hashtable_trace_domain()), ))
    return sum(map(lambda x: x.size, snapshot.traces))
def main():
    args = parse_args()
    program_start = time.time()

    if args.trace_malloc:
        tracemalloc.start()

    print(
        "Virtual Change/Drift Detection - Association Rule Mining using Python."
    )
    print("Drift Algorithm: {}".format(args.drift_algorithm))
    print("Input file: {}".format(args.input))
    print("Output file prefix: {}".format(args.output))
    print("Training window size: {}".format(args.training_window_size))
    print("Minimum confidence: {}".format(args.min_confidence))
    print("Minimum support: {}".format(args.min_support))
    print("Minimum lift: {}".format(args.min_lift))
    if args.fixed_drift_confidence is not None:
        print("Fixed drift confidence of: {}".format(
            args.fixed_drift_confidence))
    print("Tracing memory allocations: {}".format(args.trace_malloc))
    print("Save rules: {}".format(args.save_rules))
    print("Generating maximal itemsets: {}".format(args.maximal_itemsets))

    reader = iter(DatasetReader(args.input))
    transaction_num = 0
    end_of_last_window = 0
    cohort_num = 1
    volatility_detector = make_volatility_detector(args)
    while True:
        window = take(args.training_window_size, reader)
        if len(window) == 0:
            break
        print("")
        print("Mining window [{},{}]".format(transaction_num,
                                             transaction_num + len(window)))
        end_of_last_window = transaction_num + len(window)
        transaction_num += len(window)
        print("Running FP-Growth...", flush=True)
        start = time.time()

        (itemsets, itemset_counts,
         num_transactions) = mine_fp_tree(window, args.min_support,
                                          args.maximal_itemsets)
        assert (num_transactions == len(window))

        duration = time.time() - start
        print("FPGrowth mined {} items in {:.2f} seconds".format(
            len(itemsets), duration))

        print("Generating rules...", flush=True)
        start = time.time()
        rules = list(
            generate_rules(itemsets, itemset_counts, num_transactions,
                           args.min_confidence, args.min_lift))
        duration = time.time() - start
        print("Generated {} rules in {:.2f} seconds".format(
            len(rules), duration),
              flush=True)

        if len(rules) == 0:
            print("No rules; just noise. Skipping change detection.")
            print(
                "Consider increasing training window size or lowering minsup/conf."
            )
            continue

        if args.save_rules:
            start = time.time()
            output_filename = args.output + "." + str(cohort_num)
            cohort_num += 1
            write_rules_to_file(rules, output_filename)
            print("Wrote rules for cohort {} to file {} in {:.2f} seconds".
                  format(cohort_num, output_filename, duration),
                  flush=True)

        drift_detector = make_drift_detector(args, volatility_detector)
        drift_detector.train(window, rules)

        # Read transactions until a drift is detected.
        for transaction in reader:
            transaction_num += 1
            drift = drift_detector.check_for_drift(transaction,
                                                   transaction_num)
            if drift is not None:
                print(
                    "Detected drift of type {} at transaction {}, {} after end of training window"
                    .format(drift.drift_type, transaction_num,
                            transaction_num - end_of_last_window))
                if drift.hellinger_value is not None:
                    print(
                        "Hellinger value: {}, confidence interval: {} ± {} ([{},{}])"
                        .format(drift.hellinger_value, drift.mean,
                                drift.confidence,
                                drift.mean - drift.confidence,
                                drift.mean + drift.confidence))
                # Record the drift in the volatility detector. This is used inside
                # the drift detector to help determine how large a confidence interval
                # is required when detecting drifts.
                if volatility_detector is not None:
                    volatility_detector.add(transaction_num)
                # Break out of the inner loop, we'll jump back up to the top and mine
                # a new training window.
                break

        if len(window) < args.training_window_size:
            break

    print("\nEnd of stream\n")

    duration = time.time() - program_start
    print("Total runtime {:.2f} seconds".format(duration))

    if args.trace_malloc:
        (_, peak_memory) = tracemalloc.get_traced_memory()
        tracemalloc_memory = tracemalloc.get_tracemalloc_memory()
        print("Peak memory usage: {:.3f} MB".format(peak_memory / 10**6))
        print("tracemalloc overhead: {:.3f} MB".format(
            (tracemalloc_memory / 10**6)))
        print("Peak memory usage minus tracemalloc overhead: {:.3f} MB".format(
            (peak_memory - tracemalloc_memory) / 10**6))
        snapshot = tracemalloc.take_snapshot()
        bytes_allocated = sum(x.size for x in snapshot.traces)
        print("Total traced memory allocated: {:.3f} MB".format(
            bytes_allocated / 10**6))

        tracemalloc.stop()

    return 0
def getMemory():
    """
    toma una muestra de la memoria alocada en instante de tiempo
    """
    return tracemalloc.take_snapshot()
Beispiel #50
0
 async def wrapper(*args, **kwargs):
     snapshot = tracemalloc.take_snapshot()
     res = await f(*args, **kwargs)
     self.stats[f.__name__] = tracemalloc.take_snapshot().compare_to(
         snapshot, 'lineno')
     return res
def getMemory():

    return tracemalloc.take_snapshot()
Beispiel #52
0
def print_stat():
    snapshot = tracemalloc.take_snapshot()
    stats = snapshot.statistics('lineno')
    for s in stats:
        print(s)
        pass
Beispiel #53
0
#!/usr/bin/env python3.8
# top_n.py
import tracemalloc

tracemalloc.start(10)  # 스택 깊이 설정
time1 = tracemalloc.take_snapshot()  # 이전 스냅샷

import waste_memory

x = waste_memory.run()  # Usage to debug
time2 = tracemalloc.take_snapshot()  # 이후 스냅샷

stats = time2.compare_to(time1, 'lineno')  # 두 스냅샷을 비교
for stat in stats[:3]:
    print(stat)
Beispiel #54
0
 def capture(self, idx=1):
     """捕捉最新的快照到指定的槽位"""
     self.__dict__['snap%d' %
                   idx] = tracemalloc.take_snapshot().filter_traces(
                       self.filters)
Beispiel #55
0
    def __enter__(self):
        # type: () -> MeasureMemory

        self.snapshot = tracemalloc.take_snapshot()
        return self
Beispiel #56
0
def test_history_performance():
    try:
        tracemalloc.start()
    except:
        pass

    for _ in range( 3 ):
        path		= "/tmp/test_performance_%d" % random.randint( 100000, 999999 )
        if os.path.exists( path ):
            continue
    assert not os.path.exists( path ), "Couldn't find an unused name: %s" % path 

    files		= []
    try:
        day		= 24*60*60
        dur		= 3*day		# a few days worth of data
        regstps		= 0.0,5.0	# 0-5secs between updates
        numfiles	= dur//day+1	# ~1 file/day, but at least 2
        values		= {}		# Initial register values
        regscount	= 1000		# Number of different registers
        regschanged	= 1,10		# From 1-25 registers per row
        regsbase	= 40001

        start		= timer()

        now = beg	= start - dur
        linecnt		= 0
        for e in reversed( range( numfiles )):
            f		= path + (( '.%d' % e ) if e else '') # 0'th file has no extension
            files.append( f )
            with logger( f ) as l:
                if values:
                    l.write( values, now=now ); linecnt += 1
                while now < beg + len(files) * dur/numfiles:
                    lst	= now
                    now += random.uniform( *regstps )
                    assert now >= lst
                    assert timestamp( now ) >= timestamp( lst ), "now: %s, timestamp(now): %s" % ( now, timestamp( now ))
                    updates = {}
                    for _ in range( random.randint( *regschanged )):
                        updates[random.randint( regsbase, regsbase + regscount - 1 )] = random.randint( 0, 1<<16 - 1 )
                    values.update( updates )
                    l.write( updates, now=now ); linecnt += 1
                lst 	= now
                now    += random.uniform( *regstps )
                assert now >= lst
                assert timestamp( now ) >= timestamp( lst )
            if e:
                # Compress .1 onward using a random format; randomly delete origin uncompressed file
                # so sometimes both files exist
                if random.choice( (True, False, False, False) ):
                    continue # Don't make a compressed version of some files
                fz	 = f + '.%s' % random.choice( ('gz', 'bz2', 'xz') )
                files.append( fz )
                with opener( fz, mode='wb' ) as fd:
                    with open( f, 'rb' ) as rd:
                        fd.write( rd.read() )
                if random.choice( (True, False, False) ):
                    continue # Don't remove some of the uncompressed files
                os.unlink( f )
                files.pop( files.index( f ))

        logging.warning( "Generated data in %.3fs; lines: %d", timer() - start, linecnt )

        # Start somewhere within 0-1% the dur of the beg, forcing the load the look back to
        # find the first file.  Try to do it all in the next 'playback' second (just to push it to
        # the max), in 'chunks' pieces.
        historical	= timestamp( random.uniform( beg + dur*0/100, beg + dur*1/100 ))
        basis		= timer()
        playback	= 2.0 * dur/day # Can sustain ~2 seconds / day of history on a single CPU
        chunks		= 1000
        factor		= dur / playback
        lookahead	= 60.0
        duration	= None
        if random.choice( (True,False) ):
            duration	= random.uniform( dur * 98/100, dur * 102/100 )

        begoff		= historical.value - beg
        endoff		= 0 if duration is None else (( historical.value + duration ) - ( beg + dur ))
        logging.warning( "Playback starts at beginning %s %s, duration %s, ends at ending %s %s",
                         timestamp( beg ), format_offset( begoff, ms=False ),
                         None if duration is None else format_offset( duration, ms=False, symbols='-+' ),
                         timestamp( beg + dur ), format_offset( endoff, ms=False ))

        ld		= loader(
            path, historical=historical, basis=basis, factor=factor, lookahead=lookahead, duration=duration )
        eventcnt	= 0
        slept		= 0
        cur		= None
        while ld:
            once	= False
            while ld.state < ld.AWAITING or not once:
                once		= True
                upcoming	= None
                limit		= random.randint( 0, 250 )
                if random.choice( (True,False) ):
                    upcoming	= ld.advance()
                    if random.choice( (True,False) ) and cur:
                        # ~25% of the time, provide an 'upcoming' timestamp that is between the
                        # current advancing historical time and the last load time.
                        upcoming-= random.uniform( 0, upcoming.value - cur.value )
                cur,events	= ld.load( upcoming=upcoming, limit=limit )
                eventcnt       += len( events )
                advance		= ld.advance()
                offset		= advance.value - cur.value
                logging.detail( "%s loaded up to %s (%s w/ upcoming %14s); %4d future, %4d values: %4d events / %4d limit" ,
                                ld, cur, format_offset( offset ),
                                format_offset( upcoming.value - advance.value ) if upcoming is not None else None,
                                len( ld.future ), len( ld.values ), len( events ), limit )

            logging.warning( "%s loaded up to %s; %3d future, %4d values: %6d events total",
                                ld, cur, len( ld.future ), len( ld.values ), eventcnt )
            try:
                snapshot	= tracemalloc.take_snapshot()
                display_top( snapshot, limit=10 )
            except:
                pass

            time.sleep( playback/chunks )
            slept	       += playback/chunks

        elapsed		= timer() - basis
        eventtps	= eventcnt // ( elapsed - slept )
        logging.error( "Playback in %.3fs (slept %.3fs); events: %d ==> %d historical records/sec",
                       elapsed, slept, eventcnt, eventtps )
        if not logging.getLogger().isEnabledFor( logging.NORMAL ):
            # Ludicrously low threshold, to pass tests on very slow machines
            assert eventtps >= 1000, \
                "Historical event processing performance low: %d records/sec" % eventtps
        try:
            display_biggest_traceback()
        except:
            pass

    except Exception as exc:
        logging.normal( "Test failed: %s", exc )
        '''
        for f in files:
            logging.normal( "%s:\n    %s", f, "    ".join( l for l in open( f )))
        '''
        raise

    finally:
        for f in files:
            logging.detail( "unlinking %s", f )
            try:
                os.unlink( f )
            except:
                pass
Beispiel #57
0
profiler.runcall(my_program)
stats = Stats(profiler)
stats.strip_dirs()
stats.sort_stats('cumulative')
stats.print_callers()

################################
# detecting memory leaks
################################
import os
import hashlib

import tracemalloc

tracemalloc.start(10)
time1 = tracemalloc.take_snapshot()

#TODO: to debug .. not working on windows
#import waste_memory
#x = waste_memory.run()

time2 = tracemalloc.take_snapshot()
stats = time2.compare_to(time1, 'lineno')
for stat in stats[:3]:
    print(stat)


class Obj(object):
    def __init__(self):
        self.x = os.urandom(100)
        self.y = hashlib.sha1(self.x).hexdigest()
Beispiel #58
0
    def print_stats(self, path, coverage=None):
        ''' Prints stats about the generated path
        Args:
            path: A Nx3 array with waypoints
        '''

        print("Done with planning. Calculating stats.")

        if not len(path):
            self.print("ERROR: Length of path is 0.")

        end_time = timeit.default_timer()
        snapshot = tracemalloc.take_snapshot()
        nbr_of_points_in_path = len(path)

        def get_memory_consumption(snapshot, key_type='lineno'):
            ''' Calculates memory consumption of the algorithm in KiB
            '''
            snapshot = snapshot.filter_traces((
                tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
                tracemalloc.Filter(False, "<unknown>"),
            ))
            top_stats = snapshot.statistics(key_type)
            total = sum(stat.size for stat in top_stats)
            return total / 1024

        def get_length_of_path(path):
            ''' Calculates length of the path in meters
            '''
            length = 0
            for point_idx in range(len(path) - 1):
                length += np.linalg.norm(path[point_idx] - path[point_idx + 1])
            return length

        def get_total_rotation(path):
            ''' Calculates the total rotation made by the robot while executing the path
            '''
            rotation = 0

            for point_idx in range(len(path) - 2):
                prev = (path[point_idx + 1] -
                        path[point_idx]) / np.linalg.norm(path[point_idx] -
                                                          path[point_idx + 1])
                next = (path[point_idx + 2] - path[point_idx + 1]
                        ) / np.linalg.norm(path[point_idx + 2] -
                                           path[point_idx + 1])
                dot_product = np.dot(prev, next)
                curr_rotation = np.arccos(dot_product)
                if not np.isnan(curr_rotation):
                    rotation += abs(curr_rotation)

            return rotation

        length_of_path = get_length_of_path(path)
        rotation = get_total_rotation(path[:, 0:2])
        #unessecary_coverage_mean = self.coverable_pcd.get_coverage_count_per_point(path)
        computational_time = end_time - self.start_time
        if coverage is None:
            coverage = self.coverable_pcd.get_coverage_efficiency()

        memory_consumption = get_memory_consumption(snapshot)

        print_text = "\n" + "=" * 20
        print_text += "\nAlgorithm: " + self.name
        print_text += "\nCoverage efficiency: " + str(round(coverage * 100,
                                                            2)) + "%"
        print_text += "\nNumber of waypoints: " + str(nbr_of_points_in_path)
        print_text += "\nLength of path: " + str(
            round(length_of_path)) + " meter"
        print_text += "\nTotal rotation: " + str(round(rotation)) + " rad"
        #print_text += "\nVisits per point: " + str(unessecary_coverage_mean)
        print_text += "\nComputational time: " + str(
            round(computational_time, 1)) + " sec"
        print_text += "\nMemory consumption: " + str(
            round(memory_consumption, 1)) + " KiB"

        print_text += "\n" + "=" * 20
        self.print(print_text)

        return {
            "Algorithm": self.name,
            "Coverage efficiency": round(coverage * 100, 2),
            "Number of waypoints": nbr_of_points_in_path,
            "Length of path": round(length_of_path),
            "Total rotation": round(rotation),
            #"Visits per point": unessecary_coverage_mean,
            "Computational time": round(computational_time, 1),
            "Memory consumption": round(memory_consumption)
        }
Beispiel #59
0
import tracemalloc as t

print("*start")
print([t._format_size(x, False) for x in t.get_traced_memory()])
t.start()

L = [[_ for _ in range(10000)] for i in range(100)]
print("*gen")
print([t._format_size(x, False) for x in t.get_traced_memory()])

snapshot = t.take_snapshot()
for stats in snapshot.statistics("traceback")[:3]:
    print(stats)

print("----------------------------------------")
snapshot = t.take_snapshot()
for stats in snapshot.statistics("lineno", cumulative=True)[:3]:
    print(stats)

t.stop()
print([t._format_size(x, False) for x in t.get_traced_memory()])
Beispiel #60
0
 async def debug(self, ctx):
     if tracemalloc.is_tracing():
         snapshot = tracemalloc.take_snapshot()
         top_stats = snapshot.statistics("lineno")
         await ctx.send("```" + "\n".join([str(x) for x in top_stats[:10]]) + "```")