Example #1
0
def _start_memory_tracing():
    try:
        import tracemalloc
    except ImportError:
        return

    tracemalloc.start()
Example #2
0
def startMemoryTracing():
    try:
        import tracemalloc  # @UnresolvedImport
    except ImportError:
        pass
    else:
        tracemalloc.start()
Example #3
0
def test_memory_usage_coordinates():
    """
    Watch out for high memory usage on huge spatial files
    """

    ntf = tempfile.NamedTemporaryFile()

    tracemalloc.start()

    snap1 = tracemalloc.take_snapshot()

    # create a "flat" cube
    cube,_ = utilities.generate_gaussian_cube(shape=[1,2000,2000])
    sz = _.dtype.itemsize

    snap1b = tracemalloc.take_snapshot()
    diff = snap1b.compare_to(snap1, 'lineno')
    diffvals = np.array([dd.size_diff for dd in diff])
    # at this point, the generated cube should still exist in memory
    assert diffvals.max()*u.B >= 2000**2*sz*u.B

    del _
    snap2 = tracemalloc.take_snapshot()
    diff = snap2.compare_to(snap1b, 'lineno')
    assert diff[0].size_diff*u.B < -0.3*u.MB

    print(cube)

    # printing the cube should not occupy any more memory
    # (it will allocate a few bytes for the cache, but should *not*
    # load the full 2000x2000 coordinate arrays for RA, Dec
    snap3 = tracemalloc.take_snapshot()
    diff = snap3.compare_to(snap2, 'lineno')
    assert sum([dd.size_diff for dd in diff])*u.B < 100*u.kB
Example #4
0
    def test_stop_track(self):
        tracemalloc.start()
        tracemalloc.stop()

        with self.assertRaises(RuntimeError):
            self.track()
        self.assertIsNone(self.get_traceback())
Example #5
0
    def test_stop_untrack(self):
        tracemalloc.start()
        self.track()

        tracemalloc.stop()
        with self.assertRaises(RuntimeError):
            self.untrack()
Example #6
0
    def test_get_traces_intern_traceback(self):
        # dummy wrappers to get more useful and identical frames in the traceback
        def allocate_bytes2(size):
            return allocate_bytes(size)

        def allocate_bytes3(size):
            return allocate_bytes2(size)

        def allocate_bytes4(size):
            return allocate_bytes3(size)

        # Ensure that two identical tracebacks are not duplicated
        tracemalloc.stop()
        tracemalloc.start(4)
        obj_size = 123
        obj1, obj1_traceback = allocate_bytes4(obj_size)
        obj2, obj2_traceback = allocate_bytes4(obj_size)

        traces = tracemalloc._get_traces()

        trace1 = self.find_trace(traces, obj1_traceback)
        trace2 = self.find_trace(traces, obj2_traceback)
        size1, traceback1 = trace1
        size2, traceback2 = trace2
        self.assertEqual(traceback2, traceback1)
        self.assertIs(traceback2, traceback1)
Example #7
0
    def wrapper(*args, **kwargs):
        import sys
        do_prof, do_tracemalloc, do_traceopen = 3 * [False]
        if len(sys.argv) > 1:
            do_prof = sys.argv[1] == "prof"
            do_tracemalloc = sys.argv[1] == "tracemalloc"
            do_traceopen = sys.argv[1] == "traceopen"

        if do_prof or do_tracemalloc or do_traceopen: sys.argv.pop(1)

        if do_prof:
            print("Entering profiling mode...")
            import pstats, cProfile, tempfile
            prof_file = kwargs.pop("prof_file", None)
            if prof_file is None:
                _, prof_file = tempfile.mkstemp()
                print("Profiling data stored in %s" % prof_file)

            sortby = kwargs.pop("sortby", "time")
            cProfile.runctx("main()", globals(), locals(), prof_file)
            s = pstats.Stats(prof_file)
            s.strip_dirs().sort_stats(sortby).print_stats()
            return 0

        elif do_tracemalloc:
            print("Entering tracemalloc mode...")
            # Requires py3.4
            try:
                import tracemalloc
            except ImportError:
                print("Error while trying to import tracemalloc (requires py3.4)")
                raise SystemExit(1)

            tracemalloc.start()
            retcode = main(*args, **kwargs)
            snapshot = tracemalloc.take_snapshot()
            top_stats = snapshot.statistics('lineno')

            n = min(len(top_stats), 20)
            print("[Top %d]" % n)
            for stat in top_stats[:20]:
                print(stat)

        elif do_traceopen:
            try:
                import psutil
            except ImportError:
                print("traceopen requires psutil module")
                raise SystemExit(1)
            import os
            p = psutil.Process(os.getpid())
            retcode = main(*args, **kwargs)
            print("open_files", p.open_files())

        else:
            retcode = main(*args, **kwargs)

        return retcode
Example #8
0
    def peak_monitor_start(self):
        self.peak_monitoring = True

        # start RAM tracing
        tracemalloc.start()

        # this thread samples RAM usage as long as the current epoch of the fit loop is running
        peak_monitor_thread = threading.Thread(target=self.peak_monitor_func)
        peak_monitor_thread.daemon = True
        peak_monitor_thread.start()
Example #9
0
 def __init__(self, interval=60, n_frames=None):
     self.snapshot_q = multiprocessing.Queue()
     self.interval = interval
     self.n_frames = n_frames
     if self.n_frames:
         tracemalloc.start(self.n_frames)
     else:
         tracemalloc.start()
     self.counter = 1
     logger.info('Tracemalloc started')
     super(TakeSnapshot, self).__init__()
Example #10
0
def main():
    # Parse command line options
    parser = argparse.ArgumentParser()
    parser.add_argument('input_file', help='input ninja file')
    parser.add_argument('--encoding', default='utf-8',
                        help='ninja file encoding')
    parser.add_argument('--ninja-deps', help='.ninja_deps file')
    args = parser.parse_args()

    if DEBUG_ALLOC:
        tracemalloc.start(25)
        tc_start = tracemalloc.take_snapshot()

    # Parse ninja file
    manifest = Parser().parse(args.input_file, args.encoding, args.ninja_deps)

    if DEBUG_ALLOC:
        tc_end = tracemalloc.take_snapshot()

    for rule in manifest.rules:
        print('rule', rule.name)

    for build in manifest.builds:
        print('build')
        for path in build.explicit_outs:
            print('  explicit_out:', path)
        for path in build.implicit_outs:
            print('  implicit_out:', path)
        for path in build.explicit_ins:
            print('  explicit_in:', path)
        for path in build.implicit_ins:
            print('  implicit_in:', path)
        for path in build.prerequisites:
            print('  prerequisites:', path)
        for path in build.depfile_implicit_ins:
            print('  depfile_implicit_in:', path)

    for pool in manifest.pools:
        print('pool', pool.name)

    for default in manifest.defaults:
        print('default')
        for path in default.outs:
            print('  out:', path)

    if DEBUG_ALLOC:
        top_stats = tc_end.compare_to(tc_start, 'traceback')
        with open('tracemalloc.log', 'w') as fp:
            for s in top_stats:
                print('', file=fp)
                print('========================================', file=fp)
                print(s, file=fp)
                for line in s.traceback.format():
                    print(line, file=fp)
Example #11
0
def print_malloc_context(**kwargs):
    """
    :param \**kwargs: see print_malloc_snapshot
    """
    if tracemalloc is None:
        logger.error('tracemalloc required')
        return
    tracemalloc.start()
    yield
    snapshot = tracemalloc.take_snapshot()
    print_malloc_snapshot(snapshot, **kwargs)
Example #12
0
    def check_track(self, release_gil):
        nframe = 5
        tracemalloc.start(nframe)

        size = tracemalloc.get_traced_memory()[0]

        frames = self.track(release_gil, nframe)
        self.assertEqual(self.get_traceback(),
                         tracemalloc.Traceback(frames))

        self.assertEqual(self.get_traced_memory(), self.size)
def start():
    """ Starts application memory profiling """
    global tracemalloc

    logging.debug("Starting memory profiling")

    import tracemalloc

    with _lock:
        if is_running():
            raise RuntimeError('Memory profiler is already running')
        tracemalloc.start(_FRAMES)
Example #14
0
def start_tracemalloc_dump():
    """
    If the environment variable W3AF_PYTRACEMALLOC is set to 1, then we start
    the thread that will dump the memory usage data which can be retrieved
    using tracemalloc module.

    :return: None
    """
    # save 25 frames
    tracemalloc.start(25)

    dump_data_every_thread(dump_tracemalloc, DELAY_MINUTES, SAVE_TRACEMALLOC_PTR)
Example #15
0
    def test_track_already_tracked(self):
        nframe = 5
        tracemalloc.start(nframe)

        # track a first time
        self.track()

        # calling _PyTraceMalloc_Track() must remove the old trace and add
        # a new trace with the new traceback
        frames = self.track(nframe=nframe)
        self.assertEqual(self.get_traceback(),
                         tracemalloc.Traceback(frames))
Example #16
0
 def measure_memory_diff(self, func):
     import tracemalloc
     tracemalloc.start()
     try:
         before = tracemalloc.take_snapshot()
         # Keep the result and only delete it after taking a snapshot
         res = func()
         after = tracemalloc.take_snapshot()
         del res
         return after.compare_to(before, 'lineno')
     finally:
         tracemalloc.stop()
 def exec_with_profiler(filename, profiler, backend):
     choose_backend(backend)
     if _backend == 'tracemalloc' and has_tracemalloc:
         tracemalloc.start()
     builtins.__dict__['profile'] = profiler
     # shadow the profile decorator defined above
     ns = dict(_CLEAN_GLOBALS, profile=profiler)
     try:
         with open(filename) as f:
             exec(compile(f.read(), filename, 'exec'), ns, ns)
     finally:
         if has_tracemalloc and tracemalloc.is_tracing():
             tracemalloc.stop()
def exec_with_profiler(filename, profiler, backend, passed_args=[]):
    from runpy import run_module
    builtins.__dict__['profile'] = profiler
    ns = dict(_CLEAN_GLOBALS, profile=profiler)
    _backend = choose_backend(backend)
    sys.argv = [filename] + passed_args
    try:
        if _backend == 'tracemalloc' and has_tracemalloc:
            tracemalloc.start()
        with open(filename) as f:
            exec(compile(f.read(), filename, 'exec'), ns, ns)
    finally:
        if has_tracemalloc and tracemalloc.is_tracing():
            tracemalloc.stop()
Example #19
0
def memprofile():
    import resource
    import tracemalloc

    tracemalloc.start()

    ast = parse_file('/tmp/197.c')

    print('Memory usage: %s (kb)' %
            resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)

    snapshot = tracemalloc.take_snapshot()
    print("[ tracemalloc stats ]")
    for stat in snapshot.statistics('lineno')[:20]:
        print(stat)
Example #20
0
    def test_untrack(self):
        tracemalloc.start()

        self.track()
        self.assertIsNotNone(self.get_traceback())
        self.assertEqual(self.get_traced_memory(), self.size)

        # untrack must remove the trace
        self.untrack()
        self.assertIsNone(self.get_traceback())
        self.assertEqual(self.get_traced_memory(), 0)

        # calling _PyTraceMalloc_Untrack() multiple times must not crash
        self.untrack()
        self.untrack()
Example #21
0
    def start(self):
        if self.profiling:
            return

        self.profiling = True

        now = datetime.now()
        trace_file = '{:%Y%m%d_%H%M}_trace.pickle'.format(now)
        trace_path = os.path.join(self.datadir, trace_file)
        self.trace_stream = open(trace_path, 'w')
        tracemalloc.start(15)

        # Take snapshots at slower pace because the size of the samples is not
        # negligible, the de/serialization is slow and uses lots of memory.
        self.timer = Timer(self.trace, interval=MINUTE * 5)
Example #22
0
def run_module_with_profiler(module, profiler, backend, passed_args=[]):
    from runpy import run_module
    builtins.__dict__['profile'] = profiler
    ns = dict(_CLEAN_GLOBALS, profile=profiler)
    _backend = choose_backend(backend)
    sys.argv = [module] + passed_args
    if PY2:
        run_module(module, run_name="__main__", init_globals=ns)
    else:
        if _backend == 'tracemalloc' and has_tracemalloc:
            tracemalloc.start()
        try:
            run_module(module, run_name="__main__", init_globals=ns)
        finally:
            if has_tracemalloc and tracemalloc.is_tracing():
                tracemalloc.stop()
Example #23
0
def main(standalone = False):
    tracemalloc.start()

    if standalone:
        app = QLocalApplication(sys.argv)
        # Cleanlooks
        # Plastique
        # Motfif
        # CDE
        style = QtGui.QStyleFactory.create('Plastique')
        #app.setStyle(style)
        #app.setGraphicsSystem('raster')

    w = QMainWindow()

    if standalone:
        sys.exit(app.exec_())
Example #24
0
    def test(self):
        timings = []
        memory_usage = []
        tracemalloc.start()

        for i in range(self.repeat):
            before_memory = tracemalloc.take_snapshot()
            start_time = time.time()

            self.bench()

            end_time = time.time()
            after_memory = tracemalloc.take_snapshot()
            timings.append(end_time - start_time)
            memory_usage.append(sum([t.size for t in after_memory.compare_to(before_memory, 'filename')]))

        print("time min:", min(timings), "max:", max(timings), "avg:", sum(timings) / len(timings))  # NOQA
        print("memory min:", min(memory_usage), "max:", max(memory_usage), "avg:", sum(memory_usage) / len(memory_usage))  # NOQA
Example #25
0
 def test_does_not_leak_too_much(self):
     tracemalloc.start()
     gc.collect()
     series = []
     snapshot1 = tracemalloc.take_snapshot()
     for i in range(100):
         try:
             execute_script(self.feature, self)
         except Exception:
             pass
         gc.collect()
         snapshot2 = tracemalloc.take_snapshot()
         stats = snapshot2.compare_to(snapshot1, "lineno")
         snapshot1 = snapshot2
         series.append(sum(stat.size / 1024 for stat in stats))
     tracemalloc.stop()
     series = series[1:]  # ignore first run, which creates regex
     cv = statistics.stdev(series) / statistics.mean(series)
     assert cv < 0.1
Example #26
0
    def test_set_traceback_limit(self):
        obj_size = 10

        tracemalloc.stop()
        self.assertRaises(ValueError, tracemalloc.start, -1)

        tracemalloc.stop()
        tracemalloc.start(10)
        obj2, obj2_traceback = allocate_bytes(obj_size)
        traceback = tracemalloc.get_object_traceback(obj2)
        self.assertEqual(len(traceback), 10)
        self.assertEqual(traceback, obj2_traceback)

        tracemalloc.stop()
        tracemalloc.start(1)
        obj, obj_traceback = allocate_bytes(obj_size)
        traceback = tracemalloc.get_object_traceback(obj)
        self.assertEqual(len(traceback), 1)
        self.assertEqual(traceback, obj_traceback)
Example #27
0
async def test_leak_in_transport(zipkin_url, client, loop):

    tracemalloc.start()

    endpoint = az.create_endpoint('simple_service')
    tracer = await az.create(zipkin_url, endpoint, sample_rate=1,
                             send_interval=0.0001, loop=loop)

    await asyncio.sleep(5)
    gc.collect()
    snapshot1 = tracemalloc.take_snapshot()

    await asyncio.sleep(10)
    gc.collect()
    snapshot2 = tracemalloc.take_snapshot()

    top_stats = snapshot2.compare_to(snapshot1, 'lineno')
    count = sum(s.count for s in top_stats)
    await tracer.close()
    assert count < 400  # in case of leak this number is around 901452
def profile(func=None, stream=None, precision=1, backend='psutil'):
    """
    Decorator that will run the function and print a line-by-line profile
    """
    global _backend
    _backend = backend
    if not _backend_chosen:
        choose_backend()
    if _backend == 'tracemalloc' and not tracemalloc.is_tracing():
        tracemalloc.start()
    if func is not None:
        def wrapper(*args, **kwargs):
            prof = LineProfiler()
            val = prof(func)(*args, **kwargs)
            show_results(prof, stream=stream, precision=precision)
            return val
        return wrapper
    else:
        def inner_wrapper(f):
            return profile(f, stream=stream, precision=precision, backend=backend)
        return inner_wrapper
Example #29
0
    def compute(self):
        args = self.args

        if args.track_memory:
            if MS_WINDOWS:
                from perf._win_memory import get_peak_pagefile_usage
            else:
                from perf._memory import PeakMemoryUsageThread
                mem_thread = PeakMemoryUsageThread()
                mem_thread.start()

        if args.tracemalloc:
            import tracemalloc
            tracemalloc.start()

        WorkerTask.compute(self)

        if args.tracemalloc:
            traced_peak = tracemalloc.get_traced_memory()[1]
            tracemalloc.stop()

            if not traced_peak:
                raise RuntimeError("tracemalloc didn't trace any Python "
                                   "memory allocation")

            # drop timings, replace them with the memory peak
            self._set_memory_value(traced_peak)

        if args.track_memory:
            if MS_WINDOWS:
                mem_peak = get_peak_pagefile_usage()
            else:
                mem_thread.stop()
                mem_peak = mem_thread.peak_usage

            if not mem_peak:
                raise RuntimeError("failed to get the memory peak usage")

            # drop timings, replace them with the memory peak
            self._set_memory_value(mem_peak)
Example #30
0
    def pyfaidx_fasta(n):
        print('timings for pyfaidx.Fasta')
        ti = []
        tf = []
        for _ in range(n):
            t = time.time()
            f = pyfaidx.Fasta(fa_file.name)
            ti.append(time.time() - t)

            t = time.time()
            read_dict(f, headers)
            tf.append(time.time() - t)
            os.remove(index)
        # profile memory usage and report timings
        tracemalloc.start()
        f = pyfaidx.Fasta(fa_file.name)
        read_dict(f, headers)
        os.remove(index)
        print(tracemalloc.get_traced_memory())
        print(mean(ti))
        print(mean(tf)/nreads/10*1000*1000)
        tracemalloc.stop()
Example #31
0
def main():
    # workaround, start tracing IPA imports and API init ASAP
    if any('--enable-tracemalloc' in arg for arg in sys.argv):
        tracemalloc.start()

    try:
        ccname = get_ccname()
    except ValueError as e:
        print("ERROR:", e, file=sys.stderr)
        print(
            "\nliteserver requires a KRB5CCNAME env var and "
            "a valid Kerberos TGT:\n",
            file=sys.stderr)
        print("    export KRB5CCNAME=~/.ipa/ccache", file=sys.stderr)
        print("    kinit\n", file=sys.stderr)
        sys.exit(1)

    api = init_api(ccname)

    if api.env.lite_tracemalloc:
        # print memory snapshot of import + init
        snapshot = tracemalloc.take_snapshot()
        display_tracemalloc(snapshot, limit=api.env.lite_tracemalloc)
        del snapshot
        # From here on, only trace requests.
        tracemalloc.clear_traces()

    if os.path.isfile(api.env.lite_pem):
        ctx = ssl.create_default_context(purpose=ssl.Purpose.CLIENT_AUTH)
        ctx.load_cert_chain(api.env.lite_pem)
    else:
        ctx = None

    app = NotFound()
    app = DispatcherMiddleware(
        app, {
            '/ipa': KRBCheater(api.Backend.wsgi_dispatch, ccname),
        })

    # only profile api calls
    if api.env.lite_profiler == '-':
        print('Profiler enable, stats are written to stderr.')
        app = ProfilerMiddleware(app, stream=sys.stderr, restrictions=(30, ))
    elif api.env.lite_profiler:
        profile_dir = os.path.abspath(api.env.lite_profiler)
        print("Profiler enable, profiles are stored in '{}'.".format(
            profile_dir))
        app = ProfilerMiddleware(app, profile_dir=profile_dir)

    if api.env.lite_tracemalloc:
        app = TracemallocMiddleware(app, api)

    app = StaticFilesMiddleware(app, STATIC_FILES)
    app = redirect_ui(app)

    run_simple(
        hostname=api.env.lite_host,
        port=api.env.lite_port,
        application=app,
        processes=5,
        ssl_context=ctx,
        use_reloader=True,
        # debugger doesn't work because framework catches all exceptions
        # use_debugger=not api.env.webui_prod,
        # use_evalex=not api.env.webui_prod,
    )
Example #32
0
def main():
    args = parse_args()

    if args.profile is True:
        safe_makedirs(args.save_profile_to)
        tracemalloc.start()

    # Path format: cv_splits/nfolds/[1...iterations]/[1.csv ... nfolds.csv]
    datadir = os.path.join(
        args.cvdir, "{0:02d}/{1:02d}".format(args.nfolds, args.iteration))
    # Path format: results/qa_strategy/nfolds/[1...iterations]/[<col>.txt]
    query_strategy_str = args.query_strategy
    if args.model_change is True:
        query_strategy_str += "_mc"
    if args.qs_kwargs != []:
        kwargs_str = '_'.join(args.qs_kwargs)
        query_strategy_str += "_{}".format(kwargs_str).replace('=', '-')
    resultsdir = os.path.join(
        args.resultsdir,
        "{0}/{1:02d}/{2:02d}".format(query_strategy_str, args.nfolds,
                                     args.iteration))
    # If results exist, abort.
    if os.path.exists(resultsdir):
        raise OSError(
            "Results directory '{}' exists. Delete it or use --resultsdir option."
            .format(resultsdir))
    else:
        safe_makedirs(resultsdir)

    # Create CV data files for this run only if they do not already exist.
    # Column to keep from features file. E.g. column='indicator_type=VERB'
    column = "all"
    if not os.path.exists(datadir):
        print("Creating CV data files at {}.".format(datadir), flush=True)
        X, y, feature_names = select_features(args.trainfile, column,
                                              args.percentage,
                                              args.save_ranked_features)
        # Split the data according to n-fold CV and save the splits.
        dirname = os.path.join(datadir, column)
        make_cv_files(X,
                      y,
                      feature_names,
                      dirname,
                      nfolds=args.nfolds,
                      random_state=args.iteration)

    if args.profile is True:
        snapshot = tracemalloc.take_snapshot()
        snapshot_outfile = os.path.join(args.save_profile_to, "main_1.out")
        snapshot.dump(snapshot_outfile)

    # Run the active learner.
    print("Running model: '{}'".format(query_strategy_str), flush=True)
    print("Using column: '{}'".format(column), flush=True)
    print("Iteration {} ".format(args.iteration), flush=True)
    cv_basepath = os.path.join(datadir, column)
    # all.scores.shape = (nfolds, ndraws)
    qs_kwargs = process_qs_kwargs(args.qs_kwargs)
    all_scores, choice_orders = run_active_learner(
        cv_basepath,
        args.nfolds,
        args.ndraws,
        args.query_strategy,
        args.model_change,
        qs_kwargs,
        save_profile_to=args.save_profile_to)

    if args.profile is True:
        snapshot = tracemalloc.take_snapshot()
        snapshot_outfile = os.path.join(args.save_profile_to, "main_2.out")
        snapshot.dump(snapshot_outfile)

    # In case the number of examples is not divisible by nfolds,
    min_length = np.min([len(scores) for scores in all_scores])
    all_scores = [scores[:min_length] for scores in all_scores]
    # Compute the mean score over CV folds.
    avg_scores = np.mean(np.array(all_scores), axis=0)

    # Find when the learner reached target performance.
    try:
        first = np.where(avg_scores >= args.score_threshold)[0][0]
    except IndexError:
        first = "NEVER"
    print("Acheived {0} AUC at iteration {1}".format(args.score_threshold,
                                                     first),
          flush=True)

    # Save results
    avg_results_outfile = os.path.join(resultsdir, "avg.txt")
    print("Writing average scores to {}".format(avg_results_outfile),
          flush=True)
    with open(avg_results_outfile, 'w') as outF:
        for score in avg_scores:
            outF.write("{}\n".format(score))

    cv_results_dir = os.path.join(resultsdir, "cv_results")
    safe_makedirs(cv_results_dir)
    print("Writing CV fold scores to {}/".format(cv_results_dir), flush=True)
    for (cvfold, results) in enumerate(all_scores):
        cv_results_outfile = os.path.join(cv_results_dir,
                                          "{0:02d}.txt".format(cvfold + 1))
        with open(cv_results_outfile, 'w') as cv_outF:
            for result in results:
                cv_outF.write("{}\n".format(result))

    print("Writing choice orders to {}/".format(cv_results_dir), flush=True)
    for (cvfold, order) in enumerate(choice_orders):
        choice_order_outfile = os.path.join(
            cv_results_dir, "order-{0:02d}.p".format(cvfold + 1))
        pickle.dump(order, open(choice_order_outfile, "wb"))

    if args.profile is True:
        snapshot = tracemalloc.take_snapshot()
        snapshot_outfile = os.path.join(args.save_profile_to, "main_3.out")
        snapshot.dump(snapshot_outfile)
Example #33
0
def launch_model(images,
                 labels,
                 adam_coef=0.01,
                 t_size=0.2,
                 batch_s=100,
                 nb_epoches=10):
    print(
        '------------------------------------------------------------------------------------------------\n'
    )
    print(
        '{0}\nAdam coef: {4}\tTrain size: {5}%\tPercentage of test data:{1}\tBatch size: {2}\tNb epoches:{3}'
        .format(os.path.basename(__file__), t_size, batch_s, nb_epoches,
                adam_coef, t_size))
    # Split dataset =  % training dataset 20% test_dataset
    X_train, X_test, y_train, y_test = train_test_split(images,
                                                        labels,
                                                        test_size=t_size,
                                                        random_state=11)

    # Transform training and test datasets into PyTorch dataset
    ## Tensors
    tensor_x_train = torch.Tensor(X_train)
    tensor_y_train = torch.Tensor(y_train)
    tensor_x_test = torch.Tensor(X_test)
    tensor_y_test = torch.Tensor(y_test)

    ## Convert labels float type into long type (labels need to be type long)
    tensor_y_train = tensor_y_train.long()
    tensor_y_test = tensor_y_test.long()

    ## Create TensorDataset
    tensorDataset_train = TensorDataset(tensor_x_train, tensor_y_train)
    tensorDataset_test = TensorDataset(tensor_x_test, tensor_y_test)

    ## Create dataloaders
    train_loader = DataLoader(tensorDataset_train,
                              batch_size=batch_s)  # batch_s samples / batch
    test_loader = DataLoader(tensorDataset_test, batch_size=batch_s)

    # Start timer and save memory capacity
    start = time.time()
    tracemalloc.start()

    # Init model
    network = cnn.CNN()
    optimizer = optim.Adam(network.parameters(), lr=adam_coef)

    # Launch epoches
    for epoch in range(nb_epoches):
        total_loss = 0
        total_correct = 0

        for batch in train_loader:  # Get batch
            images, labels = batch
            preds = network(images)  # Pass Batch
            loss = F.cross_entropy(preds, labels)  # Calculate Loss

            # Update hyperparameters
            optimizer.zero_grad()
            loss.backward()  # Calculate Gradients
            optimizer.step()  # Update Weights

            # Save loss and number of good prediction / batch
            total_loss += loss.item()
            total_correct += get_num_correct(preds, labels)
        print("Epoch:", epoch, "Total_correct:", total_correct, "Loss:",
              total_loss)

    # Calculate accurancy for test dataset
    with torch.no_grad():
        test_preds = get_all_preds(network, test_loader)
    all_labels = tensor_y_test
    preds_correct = get_num_correct(test_preds, all_labels)
    print('Total correct:{0}/{1}'.format(preds_correct, len(y_test)))
    accuracy = preds_correct * 100 / len(y_test)
    timer = time.time() - start
    current, peak = tracemalloc.get_traced_memory()
    diff = peak - current
    print('Accuracy: {0} %'.format(accuracy))
    print(
        '------------------------------------------------------------------------------------------------\n'
    )
    return network, {
        'Epoches': nb_epoches,
        'Batchs': batch_s,
        'Accuracies': float("{:.2f}".format(accuracy)),
        'Test_size': t_size,
        'Adam_coef': adam_coef,
        'Timer': float("{:.4f}".format(timer)),
        'Mem_current': current,
        'Mem_peak': peak,
        'Mem_diff': diff
    }
Example #34
0
def compute(polygon,
            polygon_id,
            s_id,
            t_id,
            s,
            t,
            algorithm_list,
            run_timeout=3600,
            max_time=5,
            min_runs=5,
            max_runs=20):
    """Compute the benchmark for one start/end pair in one polygon with all known algorithms."""
    from socket import gethostname
    from datetime import datetime
    from gsp import makestep_shortest_path, delaunay_shortest_path, lee_preparata_shortest_path, \
        trapezoid_shortest_path
    import gc
    import tracemalloc
    import sys
    import traceback
    from time import process_time as timer

    m_run = model.Run.create(
        polygon_id=polygon_id,
        host=model.Host.create_or_get(name=gethostname())[0],
        start=datetime.now(),
        end=datetime.now(),
        s=model.PolygonPoint.get(id=s_id),
        t=model.PolygonPoint.get(id=t_id),
        version=model.Version.get())
    logging.debug('Created run "%s"', m_run)

    for algorithm, sp in dict(
            delaunay=delaunay_shortest_path,
            makestep=makestep_shortest_path,
            trapezoid=trapezoid_shortest_path,
            lee_preparata=lee_preparata_shortest_path).items():
        if algorithm not in algorithm_list:
            continue

        m_algorithm, _ = model.Algorithm.create_or_get(name=algorithm)
        logging.info('Running algorithm "%s"', m_algorithm)
        gc.collect()
        tracemalloc.start()
        try:
            signal.alarm(run_timeout)
            path = list(sp(polygon, s, t))
            signal.alarm(0)
        except BaseException:
            traceback.print_exc(file=sys.stderr)
            continue
        else:
            memory = tracemalloc.get_traced_memory()

            gc.collect()
            tracemalloc.stop()

            m_instance = model.Instance.create(run=m_run,
                                               algorithm=m_algorithm,
                                               memory=memory[1] - memory[0],
                                               path_length=len(path))
            logging.debug('Saved instance "%s"', m_instance)

            logging.debug('Creating resulting path: "%s"', path)
            for i, point in enumerate(path):
                try:
                    m_point = model.Point.get(x=point.x, y=point.y)
                except model.Point.DoesNotExist:
                    for tmp_polygon_point in m_run.polygon.polygon_points:
                        if tmp_polygon_point.point.as_geometry() == point:
                            m_polygon_point = tmp_polygon_point
                            break
                else:
                    m_polygon_point = model.PolygonPoint.get(
                        point=m_point,
                        polygon_id=polygon_id,
                        is_vertex=isinstance(point, PolygonPoint))
                model.PathPoint.create(instance=m_instance,
                                       index=i,
                                       polygon_point=m_polygon_point)

            for property, value in sp.properties.items():
                m_property, _ = model.PropertyName.create_or_get(name=property)
                if isinstance(value, int):
                    model.IntegerProperty.create(instance=m_instance,
                                                 name=m_property,
                                                 value=value)

        total_time = 0
        runs = 0

        times = []

        while runs < min_runs or max_time > total_time and runs < max_runs:
            try:
                signal.alarm(run_timeout)
                gc.disable()
                start = timer()
                list(sp(polygon, s, t))
                time = timer() - start
                gc.enable()
                signal.alarm(0)
            except BaseException:
                traceback.print_exc(file=sys.stderr)
                break
            else:
                times.append(time)
                total_time += time
                runs += 1

        if len(times) > 0:
            with model.db.atomic():
                model.Time.insert_many(
                    dict(instance=m_instance, time=t)
                    for t in times).execute()

            m_instance.median_time = median(times)
            m_instance.save()

    m_run.end = datetime.now()
    m_run.save()

    return (polygon_id, s_id, t_id)
 def start():
     tracemalloc.start(self.MAX_TRACEBACK_SIZE)
Example #36
0
# top_n.py
import tracemalloc
tracemalloc.start(10)  # 스택 프레임을 최대 10개까지 저장

time1 = tracemalloc.take_snapshot()
import waste_memory
x = waste_momory.run()
time2 = tracemalloc.take_snapshot()

stats = time2.compare_to(time1, 'lineno')
for stat in stats[:3]:
    print(stat)
Example #37
0
 def start_tracemalloc(self):
     tracemalloc.start()
Example #38
0
 def setUpClass(cls) -> None:
     """Setup test class with a facade and ten contacts."""
     tracemalloc.start()
     logging.basicConfig(stream=sys.stderr, level=cls.pref_loglevel)
     asyncssh.logging.set_log_level(cls.pref_loglevel)
Example #39
0
found_objects = gc.get_objects()
print('%d objects after' % len(found_objects))
for obj in found_objects[:3]:
    print(repr(obj)[:100])

'''
4756 objects before
14873 objects after
<waste_memory.MyObject object at 0x1063f6940>
<waste_memory.MyObject object at 0x1063f6978>
<waste_memory.MyObject object at 0x1063f69b0>'''

# doesnt show where the object came from

import tracemalloc
tracemalloc.start(10)  # Save up to 10 stack frames

time1 = tracemalloc.take_snapshot()
import waste_memory
x = waste_memory.run()
time2 = tracemalloc.take_snapshot()

stats = time2.compare_to(time1, 'lineno')
for stat in stats[:3]:
    print(stat)

'''
waste_memory.py:6: size=2235 KiB (+2235 KiB), count=29981 (+29981), average=76 B
waste_memory.py:7: size=869 KiB (+869 KiB), count=10000 (+10000), average=89 B
waste_memory.py:12: size=547 KiB (+547 KiB), count=10000 (+10000), average=56 B
'''
import time
import platform
import shutil
from datetime import datetime

# reduce resource request for threading
# for OpenWrt
import threading
try:
    threading.stack_size(128 * 1024)
except:
    pass

try:
    import tracemalloc
    tracemalloc.start(10)
except:
    pass

try:
    raw_input  # python 2
except NameError:
    raw_input = input  # python 3

current_path = os.path.dirname(os.path.abspath(__file__))
root_path = os.path.abspath(os.path.join(current_path, os.pardir))
data_path = os.path.abspath(
    os.path.join(root_path, os.pardir, os.pardir, 'data'))
data_launcher_path = os.path.join(data_path, 'launcher')
python_path = os.path.join(root_path, 'python27', '1.0')
noarch_lib = os.path.abspath(os.path.join(python_path, 'lib', 'noarch'))
Example #41
0
def main():
    tracemalloc.start()
    print(createDict(50000))
    current, peak = tracemalloc.get_traced_memory()
    print(f"Current memory usage is {current / 10**6}MB; Peak was {peak / 10**6}MB")
    tracemalloc.stop()
Example #42
0
def read_write(plot=False):
    # mesh = generate_tetrahedral_mesh()
    mesh = generate_triangular_mesh()
    print(mesh)
    mem_size = mesh.points.nbytes + mesh.cells[0].data.nbytes
    mem_size /= 1024.0 ** 2
    print(f"mem_size: {mem_size:.2f} MB")

    formats = {
        "Abaqus": (meshio.abaqus.write, meshio.abaqus.read, ["out.inp"]),
        "Ansys (ASCII)": (
            lambda f, m: meshio.ansys.write(f, m, binary=False),
            meshio.ansys.read,
            ["out.ans"],
        ),
        # "Ansys (binary)": (
        #     lambda f, m: meshio.ansys.write(f, m, binary=True),
        #     meshio.ansys.read,
        #     ["out.ans"],
        # ),
        "AVS-UCD": (meshio.avsucd.write, meshio.avsucd.read, ["out.ucd"]),
        # "CGNS": (meshio.cgns.write, meshio.cgns.read, ["out.cgns"]),
        "Dolfin-XML": (meshio.dolfin.write, meshio.dolfin.read, ["out.xml"]),
        "Exodus": (meshio.exodus.write, meshio.exodus.read, ["out.e"]),
        # "FLAC3D": (meshio.flac3d.write, meshio.flac3d.read, ["out.f3grid"]),
        "Gmsh 4.1 (ASCII)": (
            lambda f, m: meshio.gmsh.write(f, m, binary=False),
            meshio.gmsh.read,
            ["out.msh"],
        ),
        "Gmsh 4.1 (binary)": (
            lambda f, m: meshio.gmsh.write(f, m, binary=True),
            meshio.gmsh.read,
            ["out.msh"],
        ),
        "MDPA": (meshio.mdpa.write, meshio.mdpa.read, ["out.mdpa"]),
        "MED": (meshio.med.write, meshio.med.read, ["out.med"]),
        "Medit": (meshio.medit.write, meshio.medit.read, ["out.mesh"]),
        "MOAB": (meshio.h5m.write, meshio.h5m.read, ["out.h5m"]),
        "Nastran": (meshio.nastran.write, meshio.nastran.read, ["out.bdf"]),
        "OBJ": (meshio.obj.write, meshio.obj.read, ["out.obj"]),
        "OFF": (meshio.off.write, meshio.off.read, ["out.off"]),
        "Permas": (meshio.permas.write, meshio.permas.read, ["out.dato"]),
        "PLY (binary)": (
            lambda f, m: meshio.ply.write(f, m, binary=True),
            meshio.ply.read,
            ["out.ply"],
        ),
        "PLY (ASCII)": (
            lambda f, m: meshio.ply.write(f, m, binary=False),
            meshio.ply.read,
            ["out.ply"],
        ),
        "STL (binary)": (
            lambda f, m: meshio.stl.write(f, m, binary=True),
            meshio.stl.read,
            ["out.stl"],
        ),
        "STL (ASCII)": (
            lambda f, m: meshio.stl.write(f, m, binary=False),
            meshio.stl.read,
            ["out.stl"],
        ),
        # "TetGen": (meshio.tetgen.write, meshio.tetgen.read, ["out.node", "out.ele"],),
        "VTK (binary)": (
            lambda f, m: meshio.vtk.write(f, m, binary=True),
            meshio.vtk.read,
            ["out.vtk"],
        ),
        "VTK (ASCII)": (
            lambda f, m: meshio.vtk.write(f, m, binary=False),
            meshio.vtk.read,
            ["out.vtk"],
        ),
        "VTU (binary, uncompressed)": (
            lambda f, m: meshio.vtu.write(f, m, binary=True, compression=None),
            meshio.vtu.read,
            ["out.vtu"],
        ),
        "VTU (binary, zlib)": (
            lambda f, m: meshio.vtu.write(f, m, binary=True, compression="zlib"),
            meshio.vtu.read,
            ["out.vtu"],
        ),
        "VTU (binary, LZMA)": (
            lambda f, m: meshio.vtu.write(f, m, binary=True, compression="lzma"),
            meshio.vtu.read,
            ["out.vtu"],
        ),
        "VTU (ASCII)": (
            lambda f, m: meshio.vtu.write(f, m, binary=False),
            meshio.vtu.read,
            ["out.vtu"],
        ),
        "Wavefront .obj": (meshio.obj.write, meshio.obj.read, ["out.obj"]),
        # "wkt": ".wkt",
        "XDMF (binary)": (
            lambda f, m: meshio.xdmf.write(f, m, data_format="Binary"),
            meshio.xdmf.read,
            ["out.xdmf", "out0.bin", "out1.bin"],
        ),
        "XDMF (HDF, GZIP)": (
            lambda f, m: meshio.xdmf.write(f, m, data_format="HDF", compression="gzip"),
            meshio.xdmf.read,
            ["out.xdmf", "out.h5"],
        ),
        "XDMF (HDF, uncompressed)": (
            lambda f, m: meshio.xdmf.write(f, m, data_format="HDF", compression=None),
            meshio.xdmf.read,
            ["out.xdmf", "out.h5"],
        ),
        "XDMF (XML)": (
            lambda f, m: meshio.xdmf.write(f, m, data_format="XML"),
            meshio.xdmf.read,
            ["out.xdmf"],
        ),
    }

    # formats = {
    #     # "VTK (ASCII)": formats["VTK (ASCII)"],
    #     # "VTK (binary)": formats["VTK (binary)"],
    #     # "VTU (ASCII)": formats["VTU (ASCII)"],
    #     # "VTU (binary)": formats["VTU (binary)"],
    #     # "Gmsh 4.1 (binary)": formats["Gmsh 4.1 (binary)"],
    #     # "FLAC3D": formats["FLAC3D"],
    #     "MDPA": formats["MDPA"],
    # }

    # max_key_length = max(len(key) for key in formats)

    elapsed_write = []
    elapsed_read = []
    file_sizes = []
    peak_memory_write = []
    peak_memory_read = []

    print()
    print(
        "format                      "
        + "write (s)    "
        + "read(s)      "
        + "file size    "
        + "write mem    "
        + "read mem "
    )
    print()
    with tempfile.TemporaryDirectory() as directory:
        directory = pathlib.Path(directory)
        for name, (writer, reader, filenames) in formats.items():
            filename = directory / filenames[0]

            tracemalloc.start()
            t = time.time()
            writer(filename, mesh)
            # snapshot = tracemalloc.take_snapshot()
            elapsed_write.append(time.time() - t)
            peak_memory_write.append(tracemalloc.get_traced_memory()[1])
            tracemalloc.stop()

            file_sizes.append(sum(os.stat(directory / f).st_size for f in filenames))

            tracemalloc.start()
            t = time.time()
            reader(filename)
            elapsed_read.append(time.time() - t)
            peak_memory_read.append(tracemalloc.get_traced_memory()[1])
            tracemalloc.stop()
            print(
                "{:<26}  {:e} {:e} {:e} {:e} {:e}".format(
                    name,
                    elapsed_write[-1],
                    elapsed_read[-1],
                    file_sizes[-1] / 1024.0 ** 2,
                    peak_memory_write[-1] / 1024.0 ** 2,
                    peak_memory_read[-1] / 1024.0 ** 2,
                )
            )

    names = list(formats.keys())
    # convert to MB
    file_sizes = np.array(file_sizes)
    file_sizes = file_sizes / 1024.0 ** 2
    peak_memory_write = np.array(peak_memory_write)
    peak_memory_write = peak_memory_write / 1024.0 ** 2
    peak_memory_read = np.array(peak_memory_read)
    peak_memory_read = peak_memory_read / 1024.0 ** 2

    if plot:
        plot_speed(names, elapsed_write, elapsed_read)
        plot_file_sizes(names, file_sizes, mem_size)
        plot_memory_usage(names, peak_memory_write, peak_memory_read, mem_size)
Example #43
0
    def correct(self,
                method: str = 'filter',
                gmms: Optional[pd.core.series.Series] = None,
                fit: bool = True,
                adaptive_num_components: bool = False,
                max_iterations: int = 10,
                num_flank_components: int = 2,
                verbose: bool = False,
                monitor: bool = False) -> Optional[pd.core.frame.DataFrame]:
        """Correct the normalized expression in analysis.dataset.

        Parameters:
            method: str, default: 'filter'
                The possible methods are 'filter', 'noise' and 'none'. 'filter' use the
                posterior probability to replace the normalized expression with zero if
                the fitted model predict the values belong to the centered component.
                'noise' add additional noise to the normalized expression based on the
                standard deviation of the centered component. 'none' fits the mixture
                model, but does not correct for the expression.

            gmms: Optional[pd.core.series.Series] (optional)
                If gmms is provided and fit is False, use the provided gmms to correct
                the expression values.

            fit: bool, default: False
                Force to fit the gaussian mixture model given the expression in
                analysis.dataset.

            adaptive_num_components: bool, default: False
                Enable using likelihood ratio test to determine the optimal number of
                components.

            max_iterations: int, default: 10
                Maximum number of iterations for expectation-maximization. Must be a
                positive integer.

            num_flank_components: int, defualt: 2
                Number of non-centered mixture components. Must be a positive integer.

            verbose: bool, default: False
                Enable verbose output.

            monitor: bool, default: False
                Monitor the memory and computational time usage. (The result is stored
                to ./tmp/correction.log). The result contains 6 columns separated by
                tab:
                    1. timestamp
                    2. event
                    3. elapsed time
                    4. CPU time
                    5. peak memory usage (Mb)
                    6. sample size

        Returns:
            pd.core.frame.DataFrame
                Return the corrected expression.

        """
        if monitor:
            os.makedirs('./tmp', exist_ok=True)
            logging.basicConfig(filename='./tmp/correction.log',
                                encoding='utf-8',
                                level=logging.DEBUG)

        if method not in {'filter', 'noise', 'none'}:
            method = 'filter'
            message = ''.join(
                ("method should be one of {'filter,', 'noise', 'none'}. ",
                 "set to 'filter'"))
            warnings.warn(message, RuntimeWarning)

        if gmms:
            self.gmms = gmms
            message = ''.join((
                'Provided with gmms, ignore parameters: adaptive_num_components, ',
                'max_iterations, num_flank_components.'))
            warnings.warn(message, RuntimeWarning)
        elif self.gmms is None or fit:
            cpu_time_start = time.process_time()
            time_start = time.time()
            tracemalloc.start()
            if verbose:
                message = ''.join(
                    ('Start fitting:\n',
                     f'{"Groups":30}{"Log-likelihood":>14}{"Components":>14}'))
                print(message)
            kwargs = {
                'adaptive_num_components': adaptive_num_components,
                'max_iterations': max_iterations,
                'num_flank_components': num_flank_components,
                'verbose': verbose
            }
            group_df = self.dataset.xprs.stack().groupby('Group')
            self.gmms = group_df.apply(self.__fit, **kwargs)
            if verbose:
                print('Completed fitting successfully.\n')
            if monitor:
                message = '\t'.join(
                    (time.ctime(), 'fitting',
                     f'{time.process_time() - cpu_time_start:.3f}',
                     f'{time.time() - time_start:.3f}',
                     f'{tracemalloc.get_traced_memory()[1] * 9.53 * 1e-7:.3f}',
                     f'{self.dataset.xprs.shape[0]}'))
                logging.debug(message)

        if verbose:
            print('Start correction')

        if method == 'filter':
            correct_function = self.__correct_with_filter
        elif method == 'noise':
            correct_function = self.__correct_with_noise

        cpu_time_start = time.process_time()
        time_start = time.time()
        tracemalloc.stop()
        tracemalloc.start()
        if method in {'filter', 'noise'}:
            corrected = self.dataset.xprs.apply(
                correct_function,
                axis=1,
                result_type='broadcast',
            )
        else:
            corrected = self.dataset.xprs.copy()

        if verbose:
            print('Completed correction successfully.')

        if monitor:
            message = '\t'.join(
                (time.ctime(), 'correct',
                 f'{time.process_time() - cpu_time_start:.3f}',
                 f'{time.time() - time_start:.3f}',
                 f'{tracemalloc.get_traced_memory()[1] * 9.53 * 1e-7:>.3f}',
                 f'{self.dataset.xprs.shape[0]}'))
            logging.debug(message)
        tracemalloc.stop()

        return corrected.transpose()
    def setUpClass(cls) -> None:
        """Setup test class with a facade and ten contacts."""
        tracemalloc.start()
        logging.basicConfig(stream=sys.stderr, level=logging.INFO)

        cls.secret = os.urandom(32)
Example #45
0
# from python 3.4
import tracemalloc

tracemalloc.start(10)  # save stack frame for maximum 10

time1 = tracemalloc.take_snapshot()
import waste_memory

x = waste_memory.run()
time2 = tracemalloc.take_snapshot()

stats = time2.compare_to(time1, 'lineno')
for stat in stats[:3]:
    print(stat)
# >>>
# ../59_tracemalloc.py/waste_memory.py:7: size=2235 KiB (+2235 KiB), count=29985 (+29985), average=76 B
# ../59_tracemalloc.py/waste_memory.py:8: size=869 KiB (+869 KiB), count=10000 (+10000), average=89 B
# ../59_tracemalloc.py/waste_memory.py:13: size=547 KiB (+547 KiB), count=10000 (+10000), average=56 B
Example #46
0
def run_memleak_test(bench, iterations, report):
    tracemalloc.start()

    starti = min(50, iterations // 2)
    endi = iterations

    malloc_arr = np.empty((endi, ), dtype=np.int64)
    rss_arr = np.empty((endi, ), dtype=np.int64)
    rss_peaks = np.empty((endi, ), dtype=np.int64)
    nobjs_arr = np.empty((endi, ), dtype=np.int64)
    garbage_arr = np.empty((endi, ), dtype=np.int64)
    open_files_arr = np.empty((endi, ), dtype=np.int64)
    rss_peak = 0

    p = psutil.Process()

    for i in range(endi):
        bench()

        gc.collect()

        rss = p.memory_info().rss
        malloc, peak = tracemalloc.get_traced_memory()
        nobjs = len(gc.get_objects())
        garbage = len(gc.garbage)
        open_files = len(p.open_files())
        print("{0: 4d}: pymalloc {1: 10d}, rss {2: 10d}, nobjs {3: 10d}, "
              "garbage {4: 4d}, files: {5: 4d}".format(i, malloc, rss, nobjs,
                                                       garbage, open_files))

        malloc_arr[i] = malloc
        rss_arr[i] = rss
        if rss > rss_peak:
            rss_peak = rss
        rss_peaks[i] = rss_peak
        nobjs_arr[i] = nobjs
        garbage_arr[i] = garbage
        open_files_arr[i] = open_files

    print('Average memory consumed per loop: {:1.4f} bytes\n'.format(
        np.sum(rss_peaks[starti + 1:] - rss_peaks[starti:-1]) /
        (endi - starti)))

    from matplotlib import pyplot as plt
    fig, (ax1, ax2, ax3) = plt.subplots(3)
    ax1b = ax1.twinx()
    ax1.plot(malloc_arr, 'r')
    ax1b.plot(rss_arr, 'b')
    ax1.set_ylabel('pymalloc', color='r')
    ax1b.set_ylabel('rss', color='b')

    ax2b = ax2.twinx()
    ax2.plot(nobjs_arr, 'r')
    ax2b.plot(garbage_arr, 'b')
    ax2.set_ylabel('total objects', color='r')
    ax2b.set_ylabel('garbage objects', color='b')

    ax3.plot(open_files_arr)
    ax3.set_ylabel('open file handles')

    if not report.endswith('.pdf'):
        report = report + '.pdf'
    fig.tight_layout()
    fig.savefig(report, format='pdf')
Example #47
0
    def __init__(self):
        # type: () -> None

        tracemalloc.start()

        self.total = None  # type: Optional[int]
Example #48
0
def _test_some_code_for_memory_leaks(
    desc: str,
    init: Optional[Callable[[], None]],
    code: Callable[[], None],
    repeats: int,
    max_num_trials: int = 1,
) -> List[Suspect]:
    """Runs given code (and init code) n times and checks for memory leaks.

    Args:
        desc: A descriptor of the test.
        init: Optional code to be executed initially.
        code: The actual code to be checked for producing memory leaks.
        repeats: How many times to repeatedly execute `code`.
        max_num_trials: The maximum number of trials to run. A new trial is only
            run, if the previous one produced a memory leak. For all non-1st trials,
            `repeats` calculates as: actual_repeats = `repeats` * (trial + 1), where
            the first trial is 0.

    Returns:
        A list of Suspect objects, describing possible memory leaks. If list
        is empty, no leaks have been found.
    """

    def _i_print(i):
        if (i + 1) % 10 == 0:
            print(".", end="" if (i + 1) % 100 else f" {i + 1}\n", flush=True)

    # Do n trials to make sure a found leak is really one.
    suspicious = set()
    suspicious_stats = []
    for trial in range(max_num_trials):
        # Store up to n frames of each call stack.
        tracemalloc.start(20)

        table = defaultdict(list)

        # Repeat running code for n times.
        # Increase repeat value with each trial to make sure stats are more
        # solid each time (avoiding false positives).
        actual_repeats = repeats * (trial + 1)

        print(f"{desc} {actual_repeats} times.")

        # Initialize if necessary.
        if init is not None:
            init()
        # Run `code` n times, each time taking a memory snapshot.
        for i in range(actual_repeats):
            _i_print(i)
            code()
            _take_snapshot(table, suspicious)
        print("\n")

        # Check, which traces have moved up in their memory consumption
        # constantly over time.
        suspicious.clear()
        suspicious_stats.clear()
        # Suspicious memory allocation found?
        suspects = _find_memory_leaks_in_table(table)
        for suspect in sorted(suspects, key=lambda s: s.memory_increase, reverse=True):
            # Only print out the biggest offender:
            if len(suspicious) == 0:
                _pprint_suspect(suspect)
                print("-> added to retry list")
            suspicious.add(suspect.traceback)
            suspicious_stats.append(suspect)

        tracemalloc.stop()

        # Some suspicious memory allocations found.
        if len(suspicious) > 0:
            print(f"{len(suspicious)} suspects found. Top-ten:")
            for i, s in enumerate(suspicious_stats):
                if i > 10:
                    break
                print(
                    f"{i}) line={s.traceback[-1]} mem-increase={s.memory_increase}B "
                    f"slope={s.slope}B/detection rval={s.rvalue}"
                )
        # Nothing suspicious found -> Exit trial loop and return.
        else:
            print("No remaining suspects found -> returning")
            break

    # Print out final top offender.
    if len(suspicious_stats) > 0:
        _pprint_suspect(suspicious_stats[0])

    return suspicious_stats
Example #49
0
 def start_trace(self):
     assert not self.__traces
     self.__traces = True
     tracemalloc.start()
    def test_is_tracing(self):
        tracemalloc.stop()
        self.assertFalse(tracemalloc.is_tracing())

        tracemalloc.start()
        self.assertTrue(tracemalloc.is_tracing())
Example #51
0
 def __enter__(self):
     self._begin = time.time()
     if self._measure_memory:
         tracemalloc.start(10)
     return self
    def setUp(self):
        if tracemalloc.is_tracing():
            self.skipTest("tracemalloc must be stopped before the test")

        tracemalloc.start(1)
Example #53
0
    def trainModel(self):
        for thread in self.imageGenerationThreads:
            thread.start()

        self.model, imageNet = self.createCoreModel()

        bestAccuracy = None
        allAccuracies = []

        def epochCallback(epoch, logs):
            nonlocal bestAccuracy
            self.measuringAccuracy = True
            if self.parameters['trainingAugmentation']['rotationEasing'][
                    'easing'] == 'epoch':
                self.updateRotationValues(epoch)

            if epoch % 5 == 0:
                imageNet.save(f"model-epoch-{epoch}.h5")
                imageNet.save_weights(f"model-epoch-{epoch}-weights.h5")
            imageNet.save(f"model-current.h5")
            imageNet.save_weights(f"model-current-weights.h5")

            if epoch % self.parameters['epochsBeforeAccuracyMeasurement'] == (
                    self.parameters['epochsBeforeAccuracyMeasurement'] - 1):
                accuracy = self.measureAccuracy(self.model)
                if bestAccuracy is None or accuracy > bestAccuracy:
                    bestAccuracy = accuracy
                    imageNet.save_weights(f"model-best-weights.h5")
                allAccuracies.append(accuracy)
            self.measuringAccuracy = False

        rollingAverage9 = None
        rollingAverage95 = None
        rollingAverage99 = None
        rollingAverage995 = None

        def batchCallback(batch, log):
            nonlocal rollingAverage9, rollingAverage95, rollingAverage99, rollingAverage995, currentSnapshot
            # if batch % 100 == 0:
            #     self.memory_tracker.print_diff()
            if rollingAverage9 is None:
                rollingAverage95 = log['loss']
                rollingAverage9 = log['loss']
                rollingAverage99 = log['loss']
                rollingAverage995 = log['loss']

            rollingAverage9 = log['loss'] * 0.1 + rollingAverage9 * 0.9
            rollingAverage95 = log['loss'] * 0.05 + rollingAverage95 * 0.95
            rollingAverage99 = log['loss'] * 0.01 + rollingAverage99 * 0.99
            rollingAverage995 = log['loss'] * 0.005 + rollingAverage995 * 0.995

            trend95 = '+' if rollingAverage9 > rollingAverage95 else '-'
            trend99 = '+' if rollingAverage95 > rollingAverage99 else '-'
            trend995 = '+' if rollingAverage99 > rollingAverage995 else '-'
            trend = trend95 + trend99 + trend995

            if batch % 10 == 0:
                gc.collect()

            if self.trackMemory:
                if batch % 50 == 0 and len(
                        self.augmentedRealImages) >= self.maxImagesToGenerate:
                    gc.collect()

                    self.memory_tracker.print_diff()
                    snapshot = tracemalloc.take_snapshot()
                    if currentSnapshot is not None:
                        top_stats = snapshot.compare_to(
                            currentSnapshot, 'traceback')

                        for stat in sorted(top_stats,
                                           key=lambda stat: stat.size_diff,
                                           reverse=True)[:10]:
                            print("count", stat.count_diff, "size",
                                  stat.size_diff)
                            print('\n'.join(stat.traceback.format()))

                        for stat in sorted(top_stats,
                                           key=lambda stat: stat.size_diff,
                                           reverse=False)[:10]:
                            print("count", stat.count_diff, "size",
                                  stat.size_diff)
                            print('\n'.join(stat.traceback.format()))
                    currentSnapshot = snapshot

            print("  batch loss", log['loss'], "  rl9  ", rollingAverage9,
                  "  rl99", rollingAverage99, "  trend ", trend)

        testNearestNeighbor = LambdaCallback(on_epoch_end=epochCallback,
                                             on_batch_end=batchCallback)

        learningRateScheduler = LearningRateScheduler(
            lambda epoch, lr: self.learningRateForEpoch(epoch))

        callbacks = [testNearestNeighbor, learningRateScheduler]
        optimizer = None

        if 'loadFile' in self.parameters:
            imageNet.load_weights(self.parameters['loadFile'])

        if self.enableTensorboard:
            tensorBoardCallback = CustomTensorBoard(
                user_defined_freq=1,
                log_dir='./logs',
                histogram_freq=5,
                batch_size=self.parameters['neuralNetwork']['batchSize'],
                write_graph=True,
                write_grads=False,
                write_images=False,
                embeddings_freq=0,
                embeddings_layer_names=None,
                embeddings_metadata=None,
                embeddings_data=None,
                update_freq='batch')
            callbacks.append(tensorBoardCallback)

        if self.trainFinalLayersFirst:
            imageNet.layers[0].trainable = False

            if self.parameters['neuralNetwork']['optimizer'][
                    'optimizerName'] == 'adam':
                optimizer = Adam(self.learningRateForEpoch(self.startEpoch),
                                 beta_1=0.99,
                                 beta_2=0.999)
            elif self.parameters['neuralNetwork']['optimizer'][
                    'optimizerName'] == 'nadam':
                optimizer = Nadam(self.learningRateForEpoch(self.startEpoch),
                                  beta_1=0.99,
                                  beta_2=0.999)
            elif self.parameters['neuralNetwork']['optimizer'][
                    'optimizerName'] == 'rmsprop':
                optimizer = RMSprop(self.learningRateForEpoch(self.startEpoch))
            elif self.parameters['neuralNetwork']['optimizer'][
                    'optimizerName'] == 'sgd':
                optimizer = SGD(self.learningRateForEpoch(self.startEpoch))

            self.model.compile(loss=self.createTripletLoss(),
                               optimizer=optimizer)

            self.model.summary()
            self.model.count_params()
            self.running = True

            testingGenerator = self.generateBatch(testing=True)
            trainingGenerator = self.generateBatch(testing=False)
            self.model.fit_generator(
                generator=trainingGenerator,
                steps_per_epoch=self.parameters['stepsPerEpoch'],
                epochs=self.pretrainEpochs,
                validation_data=testingGenerator,
                validation_steps=self.parameters['validationSteps'],
                workers=1,
                use_multiprocessing=False,
                max_queue_size=self.parameters['maxQueueSize'],
                callbacks=callbacks,
                initial_epoch=self.startEpoch)

            imageNet.layers[0].trainable = True

        if self.parameters['neuralNetwork']['optimizer'][
                'optimizerName'] == 'adam':
            optimizer = Adam(self.learningRateForEpoch(self.startEpoch),
                             beta_1=0.99,
                             beta_2=0.999)
        elif self.parameters['neuralNetwork']['optimizer'][
                'optimizerName'] == 'nadam':
            optimizer = Nadam(self.learningRateForEpoch(self.startEpoch),
                              beta_1=0.99,
                              beta_2=0.999)
        elif self.parameters['neuralNetwork']['optimizer'][
                'optimizerName'] == 'rmsprop':
            optimizer = RMSprop(self.learningRateForEpoch(self.startEpoch))
        elif self.parameters['neuralNetwork']['optimizer'][
                'optimizerName'] == 'sgd':
            optimizer = SGD(self.learningRateForEpoch(self.startEpoch))

        self.model.compile(loss=self.createTripletLoss(), optimizer=optimizer)

        self.model.summary()
        self.model.count_params()
        self.running = True

        if self.trackMemory:
            tracemalloc.start(5)
            currentSnapshot = None

        currentEpoch = self.startEpoch
        while currentEpoch < self.epochs:
            testingGenerator = self.generateBatch(testing=True)
            trainingGenerator = self.generateBatch(testing=False)
            self.model.fit_generator(
                generator=trainingGenerator,
                steps_per_epoch=self.parameters['stepsPerEpoch'],
                epochs=currentEpoch + 1,
                validation_data=testingGenerator,
                validation_steps=self.parameters['validationSteps'],
                workers=1,
                use_multiprocessing=False,
                max_queue_size=self.parameters['maxQueueSize'],
                callbacks=callbacks,
                initial_epoch=currentEpoch)
            currentEpoch += 1

        imageNet.save(f"model-final.h5")
        imageNet.save_weights(f"model-final-weights.h5")

        self.finished = True
        time.sleep(5)
        self.imageGenerationExecutor.shutdown()
        del self.imageGenerationExecutor
        K.clear_session()
        self.session.close()
        del self.session
        time.sleep(5)

        slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(
            list(range(len(allAccuracies))), allAccuracies)

        return min(1, max(0, slope * 20))
Example #54
0
def _run(**kwargs):
    # Make sure that, if one worker crashes, the entire MPI process is aborted
    def handle_exception(exc_type, exc_value, exc_traceback):
        sys.__excepthook__(exc_type, exc_value, exc_traceback)
        sys.stderr.flush()
        if hvd.size() > 1:
            mpi.COMM_WORLD.Abort(1)

    sys.excepthook = handle_exception

    track_memory = kwargs['trace_memory']
    disable_logging = bool(kwargs['disable_logging'])
    warm_up_cycles = kwargs['warm_up_cycles']
    log_after_warm_up = kwargs['log_after_warm_up']
    screenshot_merge = kwargs['screenshot_merge']
    clear_checkpoints = list(
        filter(None, kwargs['clear_checkpoints'].split(':')))

    if 'all' in clear_checkpoints:
        clear_checkpoints = CHECKPOINT_ABBREVIATIONS.keys()

    if track_memory:
        tracemalloc.start(25)

    kwargs = del_out_of_setup_args(kwargs)
    expl, log_par = setup(**kwargs)
    local_logger.info('setup done')

    # We only need one MPI worker to log the results
    local_logger.info('Initializing logger')
    logger = None
    traj_logger = None
    if hvd.rank() == 0 and not disable_logging:
        logger = SimpleLogger(log_par.base_path + '/log.txt')
        traj_logger = SimpleLogger(log_par.base_path + '/traj_log.txt')

    ########################
    # START THE EXPERIMENT #
    ########################

    local_logger.info('Starting experiment')
    checkpoint_tracker = CheckpointTracker(log_par, expl)
    prev_checkpoint = None
    merged_dict = {}
    sil_trajectories = []
    if screenshot_merge[0:9] == 'from_dir:':
        screen_shot_dir = screenshot_merge[9:]
    else:
        screen_shot_dir = f'{log_par.base_path}/screen_shots'

    local_logger.info('Initiate cycle')
    expl.init_cycle()
    local_logger.info('Initiating Cycle done')

    if kwargs['expl_state'] is not None:
        local_logger.info('Performing warm up cycles...')
        expl.start_warm_up()
        for i in range(warm_up_cycles):
            if hvd.rank() == 0:
                local_logger.info(f'Running warm up cycle: {i}')
            expl.run_cycle()
        expl.end_warm_up()
        checkpoint_tracker.n_iters = expl.cycles
        checkpoint_tracker.log_warmup = log_after_warm_up
        local_logger.info('Performing warm up cycles... done')

    while checkpoint_tracker.should_continue():
        # Run one iteration
        if hvd.rank() == 0:
            local_logger.info(f'Running cycle: {checkpoint_tracker.n_iters}')

        checkpoint_tracker.pre_cycle()
        expl.run_cycle()
        checkpoint_tracker.post_cycle()

        write_checkpoint = None
        if hvd.rank() == 0:
            write_checkpoint = checkpoint_tracker.calc_write_checkpoint()
        write_checkpoint = mpi.get_comm_world().bcast(write_checkpoint, root=0)
        checkpoint_tracker.set_should_write_checkpoint(write_checkpoint)

        # Code that should be executed by all workers at a checkpoint generation
        if checkpoint_tracker.should_write_checkpoint():
            local_logger.debug(
                f'Rank: {hvd.rank()} is exchanging screenshots for checkpoint: {expl.frames_compute}'
            )
            screenshots = expl.trajectory_gatherer.env.recursive_getattr(
                'rooms')
            if screenshot_merge == 'mpi':
                screenshots = flatten_lists(
                    mpi.COMM_WORLD.allgather(screenshots))
            merged_dict = {}
            for screenshot_dict in screenshots:
                for key, value in screenshot_dict.items():
                    if key not in merged_dict:
                        merged_dict[key] = value
                    else:
                        after_threshold_screenshot_taken_merged = merged_dict[
                            key][0]
                        after_threshold_screenshot_taken_current = screenshot_dict[
                            key][0]
                        if after_threshold_screenshot_taken_current and not after_threshold_screenshot_taken_merged:
                            merged_dict[key] = value

            if screenshot_merge == 'disk':
                for key, value in merged_dict.items():
                    filename = f'{screen_shot_dir}/{key}_{hvd.rank()}.png'
                    os.makedirs(screen_shot_dir, exist_ok=True)
                    if not os.path.isfile(filename):
                        im = Image.fromarray(value[1])
                        im.save(filename)
                        im_array = imageio.imread(filename)
                        assert (im_array == value[1]).all()

                mpi.COMM_WORLD.barrier()

            local_logger.debug('Merging SIL trajectories')
            sil_trajectories = [expl.prev_selected_traj]
            if hvd.size() > 1:
                sil_trajectories = flatten_lists(
                    mpi.COMM_WORLD.allgather(sil_trajectories))
            local_logger.debug(
                f'Rank: {hvd.rank()} is done merging trajectories for checkpoint: {expl.frames_compute}'
            )

            expl.sync_before_checkpoint()
            local_logger.debug(
                f'Rank: {hvd.rank()} is done synchronizing for checkpoint: {expl.frames_compute}'
            )

        # Code that should be executed only by the master
        if hvd.rank() == 0 and not disable_logging:
            gatherer = expl.trajectory_gatherer
            return_success_rate = -1
            if gatherer.nb_return_goals_chosen > 0:
                return_success_rate = gatherer.nb_return_goals_reached / gatherer.nb_return_goals_chosen
            exploration_success_rate = -1
            if gatherer.nb_exploration_goals_chosen > 0:
                exploration_success_rate = gatherer.nb_exploration_goals_reached / gatherer.nb_exploration_goals_chosen

            cum_success_rate = 0
            for reached in expl.archive.cells_reached_dict.values():
                success_rate = sum(reached) / len(reached)
                cum_success_rate += success_rate
            mean_success_rate = cum_success_rate / len(expl.archive.archive)

            logger.write('it', checkpoint_tracker.n_iters)
            logger.write('score', expl.archive.max_score)
            logger.write('cells', len(expl.archive.archive))
            logger.write('ret_suc', return_success_rate)
            logger.write('exp_suc', exploration_success_rate)
            logger.write('rew_mean', gatherer.reward_mean)
            logger.write('len_mean', gatherer.length_mean)
            logger.write('ep', gatherer.nb_of_episodes)
            logger.write('arch_suc', mean_success_rate)
            logger.write('cum_suc', cum_success_rate)
            logger.write('frames', expl.frames_compute)

            if len(gatherer.loss_values) > 0:
                loss_values = np.mean(gatherer.loss_values, axis=0)
                assert len(loss_values) == len(gatherer.model.loss_names)
                for (loss_value, loss_name) in zip(loss_values,
                                                   gatherer.model.loss_names):
                    logger.write(loss_name, loss_value)

            stored_frames = 0
            for traj in expl.archive.cell_trajectory_manager.full_trajectories.values(
            ):
                stored_frames += len(traj)

            logger.write('sil_frames', stored_frames)

            nb_no_score_cells = len(expl.archive.archive)
            for weight in expl.archive.cell_selector.selector_weights:
                if hasattr(weight, 'max_score_dict'):
                    nb_no_score_cells = len(weight.max_score_dict)
            logger.write('no_score_cells', nb_no_score_cells)

            cells_found_ret = 0
            cells_found_rand = 0
            cells_found_policy = 0
            for cell_key in expl.archive.archive:
                cell_info = expl.archive.archive[cell_key]
                if cell_info.ret_discovered == global_const.EXP_STRAT_NONE:
                    cells_found_ret += 1
                elif cell_info.ret_discovered == global_const.EXP_STRAT_RAND:
                    cells_found_rand += 1
                elif cell_info.ret_discovered == global_const.EXP_STRAT_POLICY:
                    cells_found_policy += 1

            logger.write('cells_found_ret', cells_found_ret)
            logger.write('cells_found_rand', cells_found_rand)
            logger.write('cells_found_policy', cells_found_policy)
            logger.flush()

            traj_manager = expl.archive.cell_trajectory_manager
            new_trajectories = sorted(
                traj_manager.new_trajectories,
                key=lambda t: traj_manager.cell_trajectories[t].frame_finished)
            for traj_id in new_trajectories:
                traj_info = traj_manager.cell_trajectories[traj_id]
                traj_logger.write('it', checkpoint_tracker.n_iters)
                traj_logger.write('frame', traj_info.frame_finished)
                traj_logger.write('exp_strat', traj_info.exp_strat)
                traj_logger.write('exp_new_cells', traj_info.exp_new_cells)
                traj_logger.write('ret_new_cells', traj_info.ret_new_cells)
                traj_logger.write('score', traj_info.score)
                traj_logger.write('total_actions', traj_info.total_actions)
                traj_logger.write('id', traj_info.id)
                traj_logger.flush()

            # Code that should be executed by only the master at a checkpoint generation
            if checkpoint_tracker.should_write_checkpoint():
                local_logger.info(
                    f'Rank: {hvd.rank()} is writing checkpoint: {expl.frames_compute}'
                )
                filename = f'{log_par.base_path}/{expl.frames_compute:0{log_par.n_digits}}'

                # Save pictures
                if len(log_par.save_pictures) > 0:
                    if screenshot_merge == 'disk':
                        for file_name in os.listdir(screen_shot_dir):
                            if file_name.endswith('.png'):
                                room = int(file_name.split('_')[0])
                                if room not in merged_dict:
                                    screen_shot = imageio.imread(
                                        f'{screen_shot_dir}/{file_name}')
                                    merged_dict[room] = (True, screen_shot)

                    elif screenshot_merge[0:9] == 'from_dir:':
                        for file_name in os.listdir(screen_shot_dir):
                            if file_name.endswith('.png'):
                                room = int(file_name.split('.')[0])
                                if room not in merged_dict:
                                    screen_shot = imageio.imread(
                                        f'{screen_shot_dir}/{file_name}')
                                    merged_dict[room] = (True, screen_shot)

                    render_pictures(log_par, expl, filename, prev_checkpoint,
                                    merged_dict, sil_trajectories)

                # Save archive state
                if log_par.save_archive:
                    save_state(expl.get_state(), filename + ARCHIVE_POSTFIX)
                    expl.archive.cell_trajectory_manager.dump(filename +
                                                              TRAJ_POSTFIX)

                # Save model
                if log_par.save_model:
                    expl.trajectory_gatherer.save_model(filename +
                                                        MODEL_POSTFIX)

                # Clean up previous checkpoint.
                if prev_checkpoint:
                    for checkpoint_type in clear_checkpoints:
                        if checkpoint_type in CHECKPOINT_ABBREVIATIONS:
                            postfix = CHECKPOINT_ABBREVIATIONS[checkpoint_type]
                        else:
                            postfix = checkpoint_type
                        with contextlib.suppress(FileNotFoundError):
                            local_logger.debug(
                                f'Removing old checkpoint: {prev_checkpoint + postfix}'
                            )
                            os.remove(prev_checkpoint + postfix)
                prev_checkpoint = filename

                if track_memory:
                    snapshot = tracemalloc.take_snapshot()
                    display_top(snapshot)

                if PROFILER:
                    local_logger.info(
                        f'ITERATION: {checkpoint_tracker.n_iters}')
                    PROFILER.disable()
                    PROFILER.dump_stats(filename + '.stats')
                    PROFILER.enable()

    local_logger.info(f'Rank {hvd.rank()} finished experiment')
    mpi.get_comm_world().barrier()
Example #55
0
def Main(params_dict):
    '''
    Entry points.
    
    Args:
        params_dict:    `dict` of parameters.
    '''
    from algowars.extractablehistoricaldata.facade_alpha_vantage import FacadeAlphaVantage
    # not logging but logger.
    from logging import getLogger, StreamHandler, NullHandler, DEBUG, ERROR

    import tracemalloc
    tracemalloc.start()

    with open(params_dict["api_key"], "r") as f:
        params_dict["api_key"] = f.read()
        f.close()

    if params_dict["verbose"] is True:
        print("Load historical data.")

    logger = getLogger("accelbrainbase")
    handler = StreamHandler()
    if params_dict["verbose"] is True:
        handler.setLevel(DEBUG)
        logger.setLevel(DEBUG)
    else:
        handler.setLevel(ERROR)
        logger.setLevel(ERROR)
    logger.addHandler(handler)

    logger = getLogger("pygan")
    handler = StreamHandler()
    if params_dict["verbose"] is True:
        handler.setLevel(DEBUG)
        logger.setLevel(DEBUG)
    else:
        handler.setLevel(ERROR)
        logger.setLevel(ERROR)
    logger.addHandler(handler)

    extractable_historical_data = FacadeAlphaVantage(
        api_key=params_dict["api_key"],
        logs_dir=params_dict["logs_dir"],
    )
    stock_master_df = pd.read_csv(params_dict["ticker_master_data"])

    if params_dict["stock_choiced"] != "all":
        if params_dict["stock_choiced"] == "random":
            ticker_key_arr = np.arange(stock_master_df.shape[0])
            np.random.shuffle(ticker_key_arr)
            extracted_num = np.random.randint(low=5,
                                              high=ticker_key_arr.shape[0])
            stock_master_df = stock_master_df.iloc[
                ticker_key_arr[:extracted_num]]
        else:
            ticker_key_list = params_dict["stock_choiced"].split(",")
            stock_master_df = stock_master_df[stock_master_df.ticker.isin(
                ticker_key_list)]

    ticker_list = stock_master_df.ticker.values.tolist()

    volatility_GAN = VolatilityGAN(
        extractable_historical_data=extractable_historical_data,
        ticker_list=ticker_list,
        start_date=params_dict["start_date"],
        end_date=params_dict["end_date"],
        batch_size=params_dict["batch_size"],
        seq_len=params_dict["seq_len"],
        learning_rate=params_dict["learning_rate"],
        g_params_path=params_dict["g_params_path"],
        re_e_params_path=params_dict["re_e_params_path"],
        d_params_path=params_dict["d_params_path"],
        transfer_flag=params_dict["transfer_flag"],
    )

    if params_dict["verbose"] is True:
        print("Build volatility GAN.")

    try:
        volatility_GAN.learn(iter_n=params_dict["item_n"],
                             k_step=params_dict["k_step"])
    except KeyboardInterrupt:
        print("KeyboardInterrupt.")

    volatility_GAN.save_parameters(
        g_params_path=params_dict["g_params_path"],
        re_e_params_path=params_dict["re_e_params_path"],
        d_params_path=params_dict["d_params_path"],
    )

    d_logs_list, g_logs_list = volatility_GAN.extract_logs()
    feature_matching_arr = volatility_GAN.extract_feature_matching_logs()

    if params_dict["verbose"] is True:
        print("Training volatility AAE is end.")
        print("-" * 100)
        print("D logs:")
        print(d_logs_list[-5:])
        print("-" * 100)
        print("G logs:")
        print(g_logs_list[-5:])

    generated_stock_df_list, true_stock_df, rest_generated_stock_df_list = volatility_GAN.inference(
        params_dict["generated_start_date"],
        params_dict["generated_end_date"],
        ticker_list,
    )

    pd.concat(generated_stock_df_list).to_csv(params_dict["logs_dir"] +
                                              "generated_volatility.csv",
                                              index=False)
    pd.concat(rest_generated_stock_df_list).to_csv(
        params_dict["logs_dir"] + "generated_rest_volatility.csv", index=False)
    true_stock_df.to_csv(params_dict["logs_dir"] + "true_volatility.csv",
                         index=False)
    np.save(params_dict["logs_dir"] + "d_logs", d_logs_list)
    np.save(params_dict["logs_dir"] + "g_logs", g_logs_list)
    np.save(params_dict["logs_dir"] + "feature_matching_logs",
            feature_matching_arr)

    print("end.")

    snapshot = tracemalloc.take_snapshot()
    top_stats = snapshot.statistics('lineno')

    print("[ Top 10 ]")
    for stat in top_stats[:10]:
        print(stat)
Example #56
0
        maxv = float(input('Ingrese el valor maximo que desea: '))
        print("Analizando datos")
        print(ct.req1(catalog, caract.lower(), minv, maxv))
    elif int(inputs[0]) == 3:
        asda
    elif int(inputs[0]) == 4:
        mini = float(input('Ingrese el valor minimo de instrumentalness: '))
        maxi = float(input('Ingrese el valor maximo de instrumentalness: '))
        mint = float(input('Ingrese el valor minimo de tempo: '))
        maxt = float(input('Ingrese el valor maximo de tempo: '))
        ct.req3(catalog, mini, maxi, mint, maxt)
    elif int(inputs[0]) == 5:
        delta_time = -1.0
        delta_memory = -1.0

        tracemalloc.start()
        start_time = getTime()
        start_memory = getMemory()
        l = 0
        h = True
        while h:
            g = input('Ingrese el genero que desea: ')
            l += ct.req4v1(catalog, g)
            s = int(
                input(
                    'Si desea otro genero ingrese 1 si no desea continuar ingrese 0:'
                ))
            if s == 0:
                p = int(
                    input(
                        'Si desea un genero personalizado ingrese 1 si no desea ingrese 0: '
Example #57
0
def blockMerge(ALLBLOCKFILE, BLOCKSTOMERGE, BLOCKPATH, spimi_index):
    print(
        "=============== Merging SPIMI blocks into final inverted index... ==============="
    )
    tracemalloc.start()
    Filewrite = open('Merge/invert_index.txt', "w+")
    iterlist = []
    term = ""
    current_term = ""
    startmerge = time.process_time()
    for BLOCKFILE in ALLBLOCKFILE:
        print("File Name:", BLOCKFILE)
        print("-- Reading into memory... ", BLOCKFILE.split(".txt", 1)[0])

        finaldict = {}

        l = open(BLOCKPATH + BLOCKFILE)
        Fileread = open('Merge/invert_index.txt')
        Initialfile = Fileread.read()
        if (Initialfile.strip()):
            lst = Initialfile.strip().split('\n')
            for i in range(len(lst)):
                val = lst[i].split(" -----------> ")
                finaldict[val[0]] = val[1]
        else:
            finaldict = {}

        iterlist = (l.read().strip().split('\n'))
        for l2 in range(len(iterlist)):
            ksplit = iterlist[l2].split(
                " -----------> "
            )  # ['aaaaaq', '[[2136, 1]]'] OR ['aam', '[[1782, 1], [1786, 1]]']
            if (finaldict.get(ksplit[0]) != None):
                postlingvalold = json.loads(finaldict.get(
                    ksplit[0]))  # [[1,4],[2,5]]
                newblock = json.loads(ksplit[1])
                for i in range(len(newblock)):
                    if newblock[i] not in postlingvalold:
                        #print("THIS IS THE NEWBLOCK MATCHING THE CONDITION : ", newblock[i])
                        postlingvalold.append(newblock[i])
                finaldict[ksplit[0]] = str(postlingvalold)
            else:
                current_term = ksplit[0]
                term = term + current_term.capitalize()
                finaldict[ksplit[0]] = ksplit[1]

        sorted(finaldict)

        Filewrite = open('Merge/invert_index.txt', "w+")
        Filewrite1 = open('Merge/invert_actual_index.txt', "w+")
        indexwriter1 = open('Merge/index_cp_1.txt', "w+")
        indexwriter1.write(term)
        for key, value in sorted(finaldict.items()):
            Filewrite.write(key + " -----------> " + value + "\n")
            Filewrite1.write(key + " -----------> " + value + "\n")
        print("Finished merging block: ",
              BLOCKFILE.split(".txt", 1)[0], " and writing to disk")
        endmerge = time.process_time()
        eachmerge = endmerge - startmerge
        print("\n Time taken after each Block merge : ", eachmerge, "\n")
        Fileread.close()
        Filewrite.close()
        Filewrite1.close()
        indexwriter1.close()
        current, peak = tracemalloc.get_traced_memory()
        print(f" After merge : Current memory usage is {current / 10**6}MB")
        tracemalloc.stop()
def activated_tracemalloc():
    tracemalloc.start()
    try:
        yield
    finally:
        tracemalloc.stop()
Example #59
0
def main():
    config = ConfigParser.ConfigParser()
    my_path = Path(__file__).parent.parent
    ini_path = os.path.join(my_path, 'config', 'server.ini')
    config.read(ini_path)
    YAML_DIR = config['SERVICE']['yaml_directory']
    METRIC_YAML = config['SERVICE']['metrics_yaml']
    METRIC_YML_PATH = os.path.join(my_path, YAML_DIR, METRIC_YAML)
    SPDX_URL = config['EXTERNAL']['spdx_license_github']
    DATACITE_API_REPO = config['EXTERNAL']['datacite_api_repo']
    RE3DATA_API = config['EXTERNAL']['re3data_api']
    METADATACATALOG_API = config['EXTERNAL']['metadata_catalog']
    isDebug = config.getboolean('SERVICE', 'debug_mode')
    data_files_limit = int(config['SERVICE']['data_files_limit'])
    metric_specification = config['SERVICE']['metric_specification']

    preproc = Preprocessor()
    preproc.retrieve_metrics_yaml(METRIC_YML_PATH, data_files_limit,
                                  metric_specification)
    print('Total metrics defined: {}'.format(preproc.get_total_metrics()))

    isDebug = config.getboolean('SERVICE', 'debug_mode')
    preproc.retrieve_licenses(SPDX_URL, isDebug)
    preproc.retrieve_datacite_re3repos(RE3DATA_API, DATACITE_API_REPO, isDebug)
    preproc.retrieve_metadata_standards(METADATACATALOG_API, isDebug)
    preproc.retrieve_science_file_formats(isDebug)
    preproc.retrieve_long_term_file_formats(isDebug)

    print('Total SPDX licenses : {}'.format(preproc.get_total_licenses()))
    print('Total re3repositories found from datacite api : {}'.format(
        len(preproc.getRE3repositories())))
    print('Total subjects area of imported metadata standards : {}'.format(
        len(preproc.metadata_standards)))
    start = False
    usedatacite = True
    tracemalloc.start()
    n = 1
    for identifier in testpids:

        print(identifier)
        print(n)
        n += 1
        if identifier == startpid or not startpid:
            start = True
        if start:
            ft = FAIRCheck(uid=identifier,
                           test_debug=debug,
                           metadata_service_url=metadata_service_endpoint,
                           metadata_service_type=metadata_service_type,
                           use_datacite=usedatacite)

            #ft = FAIRCheck(uid=identifier,  test_debug=True, use_datacite=usedatacite)

            uid_result, pid_result = ft.check_unique_persistent()
            ft.retrieve_metadata_embedded(ft.extruct_result)
            include_embedded = True
            if ft.repeat_pid_check:
                uid_result, pid_result = ft.check_unique_persistent()
            ft.retrieve_metadata_external()

            core_metadata_result = ft.check_minimal_metatadata()
            content_identifier_included_result = ft.check_content_identifier_included(
            )
            access_level_result = ft.check_data_access_level()
            license_result = ft.check_license()
            relatedresources_result = ft.check_relatedresources()
            check_searchable_result = ft.check_searchable()
            data_content_metadata = ft.check_data_content_metadata()
            data_file_format_result = ft.check_data_file_format()
            community_standards_result = ft.check_community_metadatastandards()
            data_provenance_result = ft.check_data_provenance()
            formal_representation_result = ft.check_formal_metadata()
            semantic_vocabulary_result = ft.check_semantic_vocabulary()
            metadata_preserved_result = ft.check_metadata_preservation()
            standard_protocol_metadata_result = ft.check_standardised_protocol_metadata(
            )
            standard_protocol_data_result = ft.check_standardised_protocol_data(
            )

            results = [
                uid_result, pid_result, core_metadata_result,
                content_identifier_included_result, check_searchable_result,
                access_level_result, formal_representation_result,
                semantic_vocabulary_result, license_result,
                data_file_format_result, data_provenance_result,
                relatedresources_result, community_standards_result,
                data_content_metadata, metadata_preserved_result,
                standard_protocol_data_result,
                standard_protocol_metadata_result
            ]
            #results=[core_metadata_result,uid_result, pid_result]
            #print(ft.metadata_merged)
            debug_messages = ft.get_log_messages_dict()
            ft.logger_message_stream.flush()
            ft.get_assessment_summary(results)
            for res_k, res_v in enumerate(results):
                if ft.isDebug:
                    debug_list = debug_messages.get(res_v['metric_identifier'])
                    #debug_list= ft.msg_filter.getMessage(res_v['metric_identifier'])
                    if debug_list is not None:
                        results[res_k]['test_debug'] = debug_messages.get(
                            res_v['metric_identifier'])
                    else:
                        results[res_k]['test_debug'] = [
                            'INFO: No debug messages received'
                        ]
                else:
                    results[res_k]['test_debug'] = ['INFO: Debugging disabled']
                    debug_messages = {}
            print(json.dumps(results, indent=4, sort_keys=True))
            #remove unused logger handlers and filters to avoid memory leaks
            ft.logger.handlers = [ft.logger.handlers[-1]]
            #ft.logger.filters = [ft.logger.filters]
            current, peak = tracemalloc.get_traced_memory()
            print(
                f"Current memory usage is {current / 10 ** 6}MB; Peak was {peak / 10 ** 6}MB"
            )
            snapshot = tracemalloc.take_snapshot()
            top_stats = snapshot.statistics('traceback')

            # pick the biggest memory block
            stat = top_stats[0]
            print("%s memory blocks: %.1f KiB" %
                  (stat.count, stat.size / 1024))
            for line in stat.traceback.format():
                print(line)

            for i, stat in enumerate(snapshot.statistics('filename')[:5], 1):
                print(i, str(stat))

            #preproc.logger.
            gc.collect()
    tracemalloc.stop()
def config_logger(
    name: str,
    level: int = logging.INFO,
    write_to_file: bool = True,
    use_stackdriver: bool = False,
    stackdriver_level: int = logging.INFO,
    stackdriver_name: Optional[str] = None,
    tracemalloc: bool = False,
    upload_func: Optional[Callable[[str, str], None]] = None,
    upload_frequency: Optional[float] = None,
    custom_loggers_config: Optional[Dict[str, Dict]] = None,
    format: str = LOG_FORMAT,
    logdir: str = "logs",
) -> None:

    logger = logging.getLogger()

    if name.endswith(".py"):
        name = name.rsplit(".")[0]

    handlers: Dict[str, LogConfig] = {
        "default": {
            "level": logging.getLevelName(level),
            "formatter": "standard",
            "class": "logging.StreamHandler",
        }
    }
    if write_to_file:
        os.makedirs(logdir, exist_ok=True)
        handlers.update({
            "file": {
                "level": "INFO",
                "formatter": "standard",
                "class": "logging.handlers.RotatingFileHandler",
                "filename": f"{logdir}/{name}.log",
                "maxBytes": 1024 * 1024 * 100,
                "backupCount": 3,
                "delay": True,
            },
            "file_debug": {
                "level": "DEBUG",
                "formatter": "standard",
                "class": "logging.handlers.RotatingFileHandler",
                "filename": f"{logdir}/{name}.debug.log",
                "maxBytes": 1024 * 1024 * 100,
                "backupCount": 3,
                "delay": True,
            },
            "web_access": {
                "level": "DEBUG",
                "formatter": "",
                "class": "logging.handlers.RotatingFileHandler",
                "filename": f"{logdir}/access.log",
                "maxBytes": 1024,
                "backupCount": 0,
                "delay": True,
            },
        })
    else:
        handlers.update({
            "file": {
                "class": "logging.NullHandler",
            },
            "file_debug": {
                "class": "logging.NullHandler",
            },
            "web_access": {
                "class": "logging.NullHandler",
            },
        })

    logging.config.dictConfig({
        "version": 1,
        "disable_existing_loggers": False,
        "formatters": {
            "standard": {
                "format": format
            },
        },
        "handlers": handlers,
        "loggers": {
            "": {
                "handlers": ["default", "file", "file_debug"],
                "level": "DEBUG",
                "propagate": True,
            },
            "cherrypy.access": {
                "handlers": ["web_access"],
                "level": "WARN",
                "propagate": False,
            },
            "sanic.access": {
                "handlers": ["web_access"],
                "level": "WARN",
                "propagate": False,
            },
            "libav.AVBSFContext": {
                "handlers": ["default", "file", "file_debug"],
                "level": "CRITICAL",
                "propagate": False,
            },
            "libav.swscaler": {
                "handlers": ["default", "file", "file_debug"],
                "level": "CRITICAL",
                "propagate": False,
            },
            "datadog.api": {
                "handlers": [],
                "level": "ERROR",
                "propagate": False
            },
            **(custom_loggers_config or {}),
        },
    })

    if use_stackdriver:
        import google.cloud.logging
        from google.cloud.logging.handlers import CloudLoggingHandler
        from google.cloud.logging.handlers.handlers import EXCLUDED_LOGGER_DEFAULTS

        # noinspection PyUnresolvedReferences
        client = google.cloud.logging.Client()
        # client.setup_logging()

        handler = CloudLoggingHandler(client, name=stackdriver_name or name)
        handler.setLevel(stackdriver_level)
        logger.addHandler(handler)
        for logger_name in EXCLUDED_LOGGER_DEFAULTS + (
                "urllib3.connectionpool", ):
            exclude = logging.getLogger(logger_name)
            exclude.propagate = False
            # exclude.addHandler(logging.StreamHandler())

    if tracemalloc:
        import tracemalloc

        tracemalloc.start()

        tracemalloc_logger = logging.getLogger("tracemalloc")

        def tracemalloc_loop():
            while True:
                time.sleep(5 * 60)
                snapshot = tracemalloc.take_snapshot()
                top_stats = snapshot.statistics("lineno")
                tracemalloc_logger.info(f"tracemalloc:")
                for stat in top_stats[:10]:
                    tracemalloc_logger.info(f"  {stat}")

        Thread(target=tracemalloc_loop, name="tracemalloc",
               daemon=True).start()

    # if use_stackdriver_error:
    #     from google.cloud import error_reporting
    #     client = error_reporting.Client()

    # if use_datadog:
    #     import datadog
    #     from datadog_logger import DatadogLogHandler
    #     datadog.initialize(api_key=os.environ['DATADOG_API_KEY'], app_key=os.environ['DATADOG_APP_KEY'])
    #     datadog_handler = DatadogLogHandler(
    #         tags=[
    #             f'host:{socket.gethostname()}',
    #             f'pid:{os.getpid()}',
    #             f'stack:{name}',
    #             'type:log'],
    #         mentions=[],
    #         level=logging.INFO
    #     )
    #     logger.addHandler(datadog_handler)

    for _ in range(3):
        logger.info("")
    logger.info(
        f'Command: "{" ".join(sys.argv)}", pid={os.getpid()}, name={name}')
    if use_stackdriver:
        logger.info(
            f"Connected to google cloud logging. Using name={name!r}. Logging class: {logging.getLoggerClass()}"
        )

    upload_logs_settings["write_to_file"] = write_to_file
    if write_to_file and upload_func and upload_frequency:
        upload_logs_settings["upload_func"] = upload_func
        file: str = handlers["file"]["filename"]
        file_debug: str = handlers["file_debug"]["filename"]
        # noinspection PyTypeChecker
        upload_logs_settings["args"] = file, file_debug

        def upload_loop() -> None:
            while True:
                assert upload_frequency
                assert upload_func
                time.sleep(upload_frequency)
                upload_func(handlers["file"]["filename"],
                            handlers["file_debug"]["filename"])

        logger.info(f"Uploading log files every {upload_frequency}s")
        Thread(target=upload_loop, daemon=True).start()

    logging.getLogger("tensorflow").setLevel(logging.ERROR)
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"