Exemple #1
0
    def test_memory_cleanup_with_till(self):
        objgraph.growth()

        root = Signal()
        for i in range(100000):
            if i % 1000 == 0:
                Log.note("at {{num}} tills", num=i)
            root = root | Till(seconds=100000)
            mid_mem = psutil.Process(os.getpid()).memory_info().rss
            if mid_mem > 1000 * 1000 * 1000:
                Log.note("{{num}} Till triggers created", num=i)
                break
        trigger = Signal()
        root = root | trigger

        growth = objgraph.growth(limit=4)
        growth and Log.note("More object\n{{growth}}", growth=growth)

        trigger.go()
        root.wait()  # THERE SHOULD BE NO DELAY HERE

        for _ in range(0, 20):
            try:
                Till(seconds=0.1).wait()  # LET TIMER DAEMON CLEANUP
                current = [(t, objgraph.count(t), objgraph.count(t) - c)
                           for t, c, d in growth]
                Log.note("Object count\n{{current}}", current=current)

                # NUMBER OF OBJECTS CLEANED UP SHOULD MATCH NUMBER OF OBJECTS CREATED
                for (_, _, cd), (_, _, gd) in zip(current, growth):
                    self.assertAlmostEqual(-cd, gd, places=2)
                return
            except Exception as e:
                pass
        Log.error("object counts did not go down")
Exemple #2
0
 def test_growth(self):
     objgraph.growth(limit=None)
     x = type('MyClass', (), {'__module__': 'mymodule'})()  # noqa
     growth_info = objgraph.growth(limit=None)
     cared = [record for record in growth_info if record[0] == 'MyClass']
     self.assertEqual(1, len(cared))
     self.assertEqual(1, cared[0][2])
    def test_no_leak(self):
        import tracemalloc
        tracemalloc.start(25)
        snapshot = tracemalloc.take_snapshot()
        doc = {'field1': "value1"}
        for i in range(100):
            key = str(i)
            self.bucket.default_collection().upsert(key, doc)

        if self.is_realserver:
            statement = "SELECT * FROM default:`default` USE KEYS[$1];".format(
                self.cluster_info.bucket_name, self.coll._self_scope.name,
                self.coll._self_name)
        else:
            statement = "'SELECT mockrow'"
        counts = Counter({"builtins.dict": 1, "builtins.list": 2})

        objgraph.growth(shortnames=False)

        for i in range(5):
            args = [str(i)] if self.is_realserver else []
            print("PRE: key: {}".format(i))
            result = self.cluster.query(statement, *args)
            try:
                stuff = list(result)
                metadata = result.meta
                del stuff
                del result
                del metadata
                gc.collect()
                print("POST: key: {}".format(i))
            except:
                pass
            growth = objgraph.growth(shortnames=False)
            print("growth is {}".format(growth))
            if i > 0:
                for entry in growth:
                    key = entry[0]
                    if key in ('builtins.dict', 'builtins.list'):
                        self.assertLessEqual(
                            entry[2], counts[key],
                            "{} count should not grow more than {}".format(
                                key, counts[key]))
            print("\n")
            del growth
            gc.collect()
        snapshot2 = tracemalloc.take_snapshot()

        top_stats = snapshot2.compare_to(snapshot, 'lineno')
        import logging
        logging.error("[ Top 10 differences ]")
        for stat in top_stats[:10]:
            logging.error(stat)
        # pick the biggest memory block
        top_stats = snapshot2.statistics('traceback')
        stat = top_stats[0]
        logging.error("%s memory blocks: %.1f KiB" %
                      (stat.count, stat.size / 1024))
        for line in stat.traceback.format():
            logging.error(line)
Exemple #4
0
 def test_growth(self):
     objgraph.growth(limit=None)
     x = type('MyClass', (), {'__module__': 'mymodule'})()  # noqa
     growth_info = objgraph.growth(limit=None)
     cared = [record for record in growth_info if record[0] == 'MyClass']
     self.assertEqual(1, len(cared))
     self.assertEqual(1, cared[0][2])
Exemple #5
0
def test_no_memory_leaks():
    with instance_for_test(
            overrides={
                "run_coordinator": {
                    "module": "dagster.core.run_coordinator",
                    "class": "QueuedRunCoordinator",
                },
                "run_launcher": {
                    "class": "DefaultRunLauncher",
                    "module": "dagster.core.launcher.default_run_launcher",
                    "config": {
                        "wait_for_processes": False,
                    },
                },
            }) as instance:
        with get_example_repo(instance) as repo:

            external_schedule = repo.get_external_schedule(
                "always_run_schedule")
            external_sensor = repo.get_external_sensor("always_on_sensor")

            instance.start_schedule(external_schedule)
            instance.start_sensor(external_sensor)

            with daemon_controller_from_instance(
                    instance,
                    workspace_load_target=workspace_load_target(),
                    wait_for_processes_on_exit=True,
            ) as controller:
                start_time = time.time()

                growth = objgraph.growth(
                    limit=10,
                    filter=lambda obj: inspect.getmodule(obj) and "dagster" in
                    inspect.getmodule(obj).__name__,
                )
                while True:
                    time.sleep(30)

                    controller.check_daemon_threads()
                    controller.check_daemon_heartbeats()

                    growth = objgraph.growth(
                        limit=10,
                        filter=lambda obj: inspect.getmodule(obj) and "dagster"
                        in inspect.getmodule(obj).__name__,
                    )
                    if not growth:
                        print(  # pylint: disable=print-call
                            f"Memory stopped growing after {int(time.time() - start_time)} seconds"
                        )
                        break

                    if (time.time() - start_time) > 300:
                        raise Exception(
                            "Memory still growing after 5 minutes. Most recent growth: "
                            + str(growth))

                    print("Growth: " + str(growth))  # pylint: disable=print-call
 def process(self, input_interface):
     logging.info("Start the assertion generation module")
     generated_facts = []
     logging.info(objgraph.growth())
     # For now in sequence, we could make it be parallel
     for submodule in self._submodules:
         input_temp = submodule.process(input_interface)
         generated_facts += input_temp.get_generated_facts()
         submodule.clean()
         logging.info(objgraph.growth())
         logging.info(objgraph.most_common_types())
     return input_interface.add_generated_facts(generated_facts)
Exemple #7
0
def _log_objects(*_):
    # Imports deferred to avoid loading modules
    # in memory since usually only one part of this
    # integration is used at a time
    import objgraph  # pylint: disable=import-outside-toplevel

    _LOGGER.critical("Memory Growth: %s", objgraph.growth(limit=100))
Exemple #8
0
def growth(collect=True, shortnames=False, tags=None):
    log.debug('growth(%s, %s)', collect, shortnames)
    if collect:
        log.debug(' forcing garbage collection')
        gc.collect()

    pid = os.getpid()
    growth = objgraph.growth(shortnames=shortnames)

    if STATE['growth']:
        log.debug(' first query')
        STATE['growth'] = False

    elif growth:
        log.debug(' found growth')
        growth = [{
            'time': _now(),
            'pid': pid,
            'type': t,
            'count': c,
            'delta': d
        } for t, c, d in growth]
        _apply_tags(tags, *growth)
        return {'sourcetype': 'memleak.growth', 'events': growth}

    else:
        log.debug(' no new growth found')
Exemple #9
0
    def get_new_objects(lst, fn, *args, **kwargs):
        """
        Collect types and numbers of new objects left over after the given function is called.

        If lst is not empty after the call, this MAY indicate a memory leak, but not necessarily,
        since some functions are intended to create new objects for later use.

        Parameters
        ----------
        lst : list
            List used to collect objects and deltas.
        fn : function
            The function being checked for possible memory leaks.
        *args : tuple
            Positional args passed to fn.
        **kwargs : dict
            Named args to be passed to fn.

        Returns
        -------
        object
            The object returned by the call to fn.
        """
        gc.collect()
        start_objs = objgraph.typestats()
        start_objs['frame'] += 1
        start_objs['function'] += 1
        start_objs['builtin_function_or_method'] += 1
        start_objs['cell'] += 1
        ret = fn(*args, **kwargs)
        lst.extend([(str(o), delta) for o, _, delta in objgraph.growth(peak_stats=start_objs)])
        return ret
Exemple #10
0
def _dump_memory_diff(request: pyramid.request.Request) -> List[Any]:
    auth.auth_view(request)
    limit = int(request.params.get("limit", "30"))
    if "path" in request.matchdict:
        # deprecated
        path = "/" + "/".join(request.matchdict["path"])
    else:
        path = request.params["path"]

    sub_request = request.copy()
    split_path = path.split("?")
    sub_request.path_info = split_path[0]
    if len(split_path) > 1:
        sub_request.query_string = split_path[1]

    # warmup run
    try:
        if "no_warmup" not in request.params:
            request.invoke_subrequest(sub_request)
    except Exception:  # nosec  # pylint: disable=broad-except
        pass

    LOG.debug("checking memory growth for %s", path)

    peak_stats: Dict[Any, Any] = {}
    for i in range(3):
        gc.collect(i)

    objgraph.growth(limit=limit, peak_stats=peak_stats, shortnames=False)

    response = None
    try:
        response = request.invoke_subrequest(sub_request)
        LOG.debug("response was %d", response.status_code)

    except HTTPException as ex:
        LOG.debug("response was %s", str(ex))

    del response

    for i in range(3):
        gc.collect(i)

    return objgraph.growth(limit=limit,
                           peak_stats=peak_stats,
                           shortnames=False)  # type: ignore
Exemple #11
0
 def wrapper(*args, **kwargs):
     start_objs = objgraph.typestats()
     start_objs['frame'] += 1
     start_objs['cell'] += 1
     ret = fn(*args, **kwargs)
     for obj, _, delta_objs in objgraph.growth(peak_stats=start_objs):
         print(str(fn), "added %s %+d" % (obj, delta_objs))
     return ret
Exemple #12
0
 def process(self, input_interface):
     logging.info("Start the assertion validation module")
     for submodule in self._submodules:
         input_interface = submodule.process(input_interface)
         submodule.clean()
         logging.info(objgraph.growth())
         logging.info(objgraph.most_common_types())
     return input_interface
Exemple #13
0
def test_no_memory_leaks():
    with instance_for_test(
            overrides={
                "run_coordinator": {
                    "module": "dagster.core.run_coordinator",
                    "class": "QueuedRunCoordinator",
                }
            }) as instance, get_example_repo() as repo:

        external_schedule = repo.get_external_schedule("always_run_schedule")
        external_sensor = repo.get_external_sensor("always_on_sensor")

        instance.start_schedule_and_update_storage_state(external_schedule)
        instance.start_sensor(external_sensor)

        with daemon_controller_from_instance(instance) as controller:
            start_time = time.time()

            growth = objgraph.growth(
                limit=10,
                filter=lambda obj: inspect.getmodule(obj) and "dagster" in
                inspect.getmodule(obj).__name__,
            )
            while True:
                time.sleep(30)

                controller.check_daemons()

                growth = objgraph.growth(
                    limit=10,
                    filter=lambda obj: inspect.getmodule(obj) and "dagster" in
                    inspect.getmodule(obj).__name__,
                )
                if not growth:
                    print(  # pylint: disable=print-call
                        f"Memory stopped growing after {int(time.time() - start_time)} seconds"
                    )
                    break

                if (time.time() - start_time) > 300:
                    raise Exception(
                        "Memory still growing after 5 minutes. Most recent growth: "
                        + str(growth))

                print("Growth: " + str(growth))  # pylint: disable=print-call
Exemple #14
0
def _trace_return(frame, arg, stack, context):
    """
    This is called when a matched function returns.

    This only happens if show_return is True when setup() is called.
    """
    global time0

    (qual_cache, method_counts, class_counts, id2count, verbose, memory, leaks,
     stream, show_ptrs) = context
    funcname = find_qualified_name(frame.f_code.co_filename,
                                   frame.f_code.co_firstlineno, qual_cache)

    self = frame.f_locals['self']
    try:
        pname = "(%s)" % self.pathname
    except AttributeError:
        pname = ""

    sname = "%s#%d%s" % (self.__class__.__name__, id2count[id(self)], pname)

    indent = tab * len(stack)
    if memory is not None:
        current_mem = mem_usage()
        last_mem = memory.pop()
        if current_mem != last_mem:
            delta = current_mem - last_mem
            _printer(
                "%s<-- %s (time: %8.5f) (total: %6.3f MB) (diff: %+.0f KB)" %
                (indent, '.'.join((sname, funcname)), time.time() - time0,
                 current_mem, delta * 1024.))

            # add this delta to all callers so when they calculate their own delta, this
            # delta won't be included
            for i in range(len(memory) - 1, -1, -1):
                memory[i] += delta
        else:
            _printer("%s<-- %s (time: %8.5f) (total: %6.3f MB)" %
                     (indent, '.'.join(
                         (sname, funcname)), time.time() - time0, current_mem))
    else:
        _printer("%s<-- %s" % (indent, '.'.join((sname, funcname))))

    if verbose:
        if arg is not None:
            s = "%s     %s" % (indent, arg)
            if not show_ptrs and ' object at ' in s:
                s = addr_regex.sub('', s)
            _printer(s)

    if leaks is not None:
        last_objs = leaks.pop()
        for name, _, delta_objs in objgraph.growth(peak_stats=last_objs):
            _printer("%s   %s %+d" % (indent, name, delta_objs))

    stream.flush()
Exemple #15
0
    def check_iter_leaks(niter, func, *args, **kwargs):
        """
        Run func niter times and collect info on new objects left over after each iteration.

        Parameters
        ----------
        niter : int
            Number of times to run func.
        func : function
            A function that takes no arguments.
        *args : tuple
            Positional args passed to func.
        **kwargs : dict
            Named args to be passed to func.

        Returns
        -------
        set
            set of tuples of the form (typename, count)
        """
        if niter < 2:
            raise RuntimeError(
                "Must run the function at least twice, but niter={}".format(
                    niter))
        iters = []
        gc.collect()
        start_objs = objgraph.typestats()
        if 'frame' in start_objs:
            start_objs['frame'] += 1
        start_objs['function'] += 1
        start_objs['builtin_function_or_method'] += 1
        start_objs['cell'] += 1
        for i in range(niter):
            func(*args, **kwargs)
            gc.collect()
            lst = [(str(o), delta)
                   for o, _, delta in objgraph.growth(peak_stats=start_objs)]
            iters.append(lst)

        set1 = set(iters[-2])
        set2 = set(iters[-1])

        return set2 - set1
Exemple #16
0
    def _run(self):
        if self.queue.processing_depth() > 0:
            logging.info('enter recovery mode')
            while self.queue.processing_depth > 0:
                msg = self.queue.dequeue_recovery()
                if msg is None:
                    break

                self.process(msg[1])
            logging.info('exit recovery mode')
        while self.running:
            self.logger.info('polling {}, queue depth: {}, processing depth: {}'.format(self.queue.get_processing_list_nm(),
                                                                                        self.queue.queue_depth(),
                                                                                        self.queue.processing_depth()))
            self.logger.info('mem growth: {}'.format(str(objgraph.growth(limit=10))))

            msg = self.queue.dequeue(lifo=True)
            if msg is None:
                continue
            self.process(msg)
Exemple #17
0
    async def growth(self, ctx, limit: int = 10):
        g = objgraph.growth(limit)
        max_class = 0
        max_count = 0
        max_growth = 0
        for item in g:
            len_class = len(item[0])
            len_count = int(math.log10(item[1])) + 1
            len_growth = int(math.log10(item[2])) + 1

            if len_class > max_class:
                max_class = len_class
            if len_count > max_count:
                max_count = len_count
            if len_growth > max_growth:
                max_growth = len_growth

        gr = "\n".join((
            f"{i[0]: <{max_class}}     {i[1]: >{max_count}}     {i[2]: >+{max_growth+1}}"
            for i in g))
        await ctx.send(f"```\n{gr}\n```")
Exemple #18
0
                           methods=[str('GET'), str('POST')])
    flask_app.add_url_rule(str('/<path:path>'),
                           None,
                           tuid_endpoint,
                           methods=[str('GET'), str('POST')])

    try:
        config = startup.read_settings(filename=os.environ.get('TUID_CONFIG'))
        constants.set(config.constants)
        Log.start(config.debug)

        service = TUIDService(config.tuid)

        # Log memory info while running
        initial_growth = {}
        objgraph.growth(peak_stats={})
        objgraph.growth(peak_stats=initial_growth)
        service.statsdaemon.initial_growth = initial_growth

        Log.note("Started TUID Service")
        Log.note("Current free memory: {{mem}} Mb",
                 mem=service.statsdaemon.get_free_memory())
    except BaseException as e:  # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT
        try:
            Log.error(
                "Serious problem with TUID service construction!  Shutdown!",
                cause=e)
        finally:
            Log.stop()

    if config.flask:
Exemple #19
0
 def _growth(self):
     return objgraph.growth(limit=None, peak_stats=self.peak_stats, filter=self._ignore_object_p)
Exemple #20
0
def _log_objects(*_):
    _LOGGER.critical("Memory Growth: %s", objgraph.growth(limit=100))
Exemple #21
0
def growth():
    return objgraph.growth(limit=None, peak_stats=peak)
Exemple #22
0
    def test_memory_cleanup_with_signal(self):
        """
        LOOKING FOR A MEMORY LEAK THAT HAPPENS ONLY DURING THREADING

        ACTUALLY, THE PARTICULAR LEAK FOUND CAN BE RECREATED WITHOUT THREADS
        BUT IT IS TOO LATE TO CHANGE THIS TEST
        """
        NUM_CYCLES = 100
        gc.collect()
        start_mem = psutil.Process(os.getpid()).memory_info().rss
        Log.note("Start memory {{mem|comma}}", mem=start_mem)

        queue = mo_threads.Queue("", max=1000000)

        def _consumer(please_stop):
            while not please_stop:
                v = queue.pop(till=please_stop)
                if Random.int(1000) == 0:
                    Log.note("got " + v)

        def _producer(t, please_stop=None):
            for i in range(2):
                queue.add(str(t) + ":" + str(i))
                Till(seconds=0.01).wait()

        consumer = Thread.run("", _consumer)

        objgraph.growth(limit=None)

        no_change = 0
        for g in range(NUM_CYCLES):
            mid_mem = psutil.Process(os.getpid()).memory_info().rss
            Log.note("{{group}} memory {{mem|comma}}", group=g, mem=mid_mem)
            if USE_PYTHON_THREADS:
                threads = [
                    threading.Thread(target=_producer, args=(i, ))
                    for i in range(500)
                ]
                for t in threads:
                    t.start()
            else:
                threads = [Thread.run("", _producer, i) for i in range(500)]

            for t in threads:
                t.join()
            del threads

            gc.collect()
            results = objgraph.growth(limit=3)
            if not results:
                no_change += 1
            else:
                if DEBUG_SHOW_BACKREFS:
                    for typ, count, delta in results:
                        Log.note("%-*s%9d %+9d\n" % (18, typ, count, delta))
                        obj_list = objgraph.by_type(typ)
                        if obj_list:
                            obj = obj_list[-1]
                            objgraph.show_backrefs(obj, max_depth=10)
                else:
                    Log.note("growth = \n{{results}}", results=results)

        consumer.please_stop.go()
        consumer.join()

        self.assertGreater(
            no_change, NUM_CYCLES / 2
        )  # IF MOST CYCLES DO NOT HAVE MORE OBJCETS, WE ASSUME THERE IS NO LEAK
Exemple #23
0
    def objgraph_growth(self, args):
        """
        This function can be used to display the new objects for a running fuglu instance which can
        help finding memory leaks. For now this works best for fuglu with thread backend.

        Fuglu has to be running as a daemon.
        "fuglu_control" is used to communicate with the fuglu instance.

        Examples:
            (1) show fuglu objects after new fuglu start
            ---------------------------------------------

            $ fuglu_control objgraph_growth '{"must_contain": ["fuglu"], "nresults": 5}'

            --------------
            Object growth:
            --------------

            params:
            * nresults: 5
            * lowercase: True
            * dont_startwith:
            * must_startwith:
            * dont_contain:
            * must_contain: fuglu

            fuglu.extensions.filearchives.classproperty        6        +6
            fuglu.connectors.smtpconnector.SMTPServer          2        +2
            fuglu.threadpool.Worker                            2        +2
            fuglu.addrcheck.Default                            1        +1
            fuglu.addrcheck.Addrcheck                          1        +1

            (2) show new fuglu objects after fuglu processed a message
            ------------------------------------------------------------

            $ fuglu_control objgraph_growth '{"must_contain": ["fuglu"], "nresults": 5}'
            --------------
            Object growth:
            --------------

            params:
            * nresults: 5
            * lowercase: True
            * dont_startwith:
            * must_startwith:
            * dont_contain:
            * must_contain: fuglu

            fuglu.extensions.filetype.MIME_types_base        2        +1
            fuglu.plugins.attachment.RulesCache              1        +1
            fuglu.shared.SuspectFilter                       1        +1

        """
        res = u"--------------\n" \
              + u"Object growth:\n" \
              + u"--------------\n\n"

        if OBJGRAPH_EXTENSION_ENABLED:
            defaults = {
                "nresults": 20,
                "lowercase": True,
                "dont_startwith": ["builtins", "_"],
                "dont_contain": [],
                "must_startwith": [],
                "must_contain": []
            }

            if not args:
                args = {}

            # fill filter lists and other vars from dict
            res, inputdict = ControlSession.prepare_objectgraph_list_from_dict(
                args, res, defaults)

            finalfilter = None
            result = None
            try:

                # build filter
                finalfilter = ControlSession.buildfilter(
                    dont_contain=inputdict["dont_contain"],
                    dont_startwith=inputdict["dont_startwith"],
                    must_contain=inputdict["must_contain"],
                    must_startwith=inputdict["must_startwith"],
                    lowercase=inputdict["lowercase"])

                result = objgraph.growth(inputdict["nresults"],
                                         shortnames=False,
                                         filter=finalfilter)

                if result:
                    width = max(len(name) for name, _, _ in result)
                    for name, count, delta in result:
                        res += u'%-*s%9d %+9d\n' % (width, name, count, delta)
                else:
                    res += u'no growth captured'
            except Exception as e:
                res += force_uString(e)
                self.logger.exception(e)
            finally:
                if finalfilter:
                    del finalfilter
                if result:
                    del result
        else:
            res = u"please install module 'objgraph'"
        return res
Exemple #24
0
    flask_app.add_url_rule(str('/'), None, tuid_endpoint, defaults={'path': ''}, methods=[str('GET'), str('POST')])
    flask_app.add_url_rule(str('/<path:path>'), None, tuid_endpoint, methods=[str('GET'), str('POST')])


    try:
        config = startup.read_settings(
            filename=os.environ.get('TUID_CONFIG')
        )
        constants.set(config.constants)
        Log.start(config.debug)

        service = TUIDService(config.tuid)

        # Log memory info while running
        initial_growth = {}
        objgraph.growth(peak_stats={})
        objgraph.growth(peak_stats=initial_growth)
        service.statsdaemon.initial_growth = initial_growth

        Log.note("Started TUID Service")
        Log.note("Current free memory: {{mem}} Mb", mem=service.statsdaemon.get_free_memory())
    except BaseException as e:  # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT
        try:
            Log.error("Serious problem with TUID service construction!  Shutdown!", cause=e)
        finally:
            Log.stop()

    if config.flask:
        if config.flask.port and config.args.process_num:
            config.flask.port += config.args.process_num
        Log.note("Running Flask...")
Exemple #25
0
 def _growth(self):
     return objgraph.growth(limit=None,
                            peak_stats=self.peak_stats,
                            filter=self._include_object_p)