コード例 #1
0
def profile(sort):
    if not sort:
        yield
        return
    sort_columns = (
        "calls",
        "cumtime",
        "file",
        "ncalls",
        "pcalls",
        "line",
        "name",
        "nfl",
        "stdname",
        "time",
        "tottime",
    )
    if sort not in sort_columns:
        raise RuntimeError(
            "{} not a valid sort column. Please use one of {}".format(
                sort, ", ".join(sort_columns)))
    try:
        from cProfile import Profile  # type: ignore
    except ImportError:
        from Profile import Profile  # type: ignore
    prof = Profile()
    prof.enable()
    yield
    prof.create_stats()
    prof.print_stats(sort=sort)
コード例 #2
0
ファイル: profilers.py プロジェクト: fourshade/spectra_lexer
 def run(self, func, *args) -> None:
     """ Evaluate a function using cProfile.Profile. """
     pr = Profile()
     pr.enable()
     func(*args)
     pr.disable()
     pr.create_stats()
     self._stats.append(pr)
コード例 #3
0
 def _call(self, *args, **kw):
     profile = RawProfile()
     def _run():
         with DisableGc():
             for _ in range(self._iterations):
                 _run.result = super(Profile, self)._call(*args, **kw)
     profile.runctx('_run()', {}, {'_run': _run})
     profile.create_stats()
     stats = Stats(profile)
     stats.sort_stats('cumulative')
     stats.fcn_list = stats.fcn_list[:self._max_lines]
     self._reporter(stats)
     return _run.result
コード例 #4
0
    def _call(self, *args, **kw):
        profile = RawProfile()

        def _run():
            with DisableGc():
                for _ in range(self._iterations):
                    _run.result = super(Profile, self)._call(*args, **kw)

        profile.runctx('_run()', {}, {'_run': _run})
        profile.create_stats()
        stats = Stats(profile)
        stats.sort_stats('cumulative')
        stats.fcn_list = stats.fcn_list[:self._max_lines]
        self._reporter(stats)
        return _run.result
コード例 #5
0
ファイル: middleware.py プロジェクト: temaput/practica.ru
class ProfileMiddleware(object):
    """
    Displays hotshot profiling for any view.
    http://yoursite.com/yourview/?prof

    Add the "prof" key to query string by appending ?prof (or &prof=)
    and you'll see the profiling results in your browser.

    WARNING: It uses cProfile

    """

    def process_request(self, request):
        if self.is_on(request):
            self.prof = Profile()

    def process_view(self, request, callback, callback_args, callback_kwargs):
        if self.is_on(request):
            log.debug("%s called", callback)
            callback_args = (request,) + callback_args
            return self.prof.runcall(
                callback, *callback_args, **callback_kwargs)

    def process_response(self, request, response):
        if self.is_on(request):
            self.prof.create_stats()
            out = StringIO.StringIO()
            stats = Stats(self.prof, stream=out)

            stats.sort_stats(*sort_tuple)

            stats.print_stats()

            stats_str = out.getvalue()

            if response and response.content and stats_str:
                response.content = "<pre>" + stats_str + "</pre>"
        return response

    def is_on(self, request):
        return 'prof' in request.GET
コード例 #6
0
ファイル: middleware.py プロジェクト: temaput/practica.ru
class ProfileMiddleware(object):
    """
    Displays hotshot profiling for any view.
    http://yoursite.com/yourview/?prof

    Add the "prof" key to query string by appending ?prof (or &prof=)
    and you'll see the profiling results in your browser.

    WARNING: It uses cProfile

    """

    def process_request(self, request):
        if self.is_on(request):
            self.prof = Profile()

    def process_view(self, request, callback, callback_args, callback_kwargs):
        if self.is_on(request):
            log.debug("%s called", callback)
            callback_args = (request,) + callback_args
            return self.prof.runcall(
                callback, *callback_args, **callback_kwargs)

    def process_response(self, request, response):
        if self.is_on(request):
            self.prof.create_stats()
            out = StringIO.StringIO()
            stats = Stats(self.prof, stream=out)

            stats.sort_stats(*sort_tuple)

            stats.print_stats()

            stats_str = out.getvalue()

            if response and response.content and stats_str:
                response.content = "<pre>" + stats_str + "</pre>"
        return response

    def is_on(self, request):
        return 'prof' in request.GET
コード例 #7
0
ファイル: run_info_data.py プロジェクト: wholtz/BASTet
class run_info_dict(dict):
    """
    Simple dictionary class for collecting runtime information

    The typical use is as follows:

    >> my_run_info = run_info_dict()
    >> my_run_info(my_function)(my_parameters)

    With this, all runtime information is automatically collected in my_run_info.
    We can enable time-and-usage and memory profiling simply by calling
    enable_profile_time_and_usage(...) or  enable_profile_memory(...), respectively,
    before we run our function.

    We can also use the data structure directly and control the population ourselves,
    however, memory profiling is not supported by default in this case but we need to
    set and run the memory profiler ourselves, since memory_profiler expects that it
    can wrap the function

    """
    DEFAULT_TIME_FORMAT = '%Y-%m-%d %H:%M:%S.%f'

    def __init__(self, *args, **kwargs):
        super(run_info_dict, self).__init__(*args, **kwargs)
        self.__profile_time_and_usage = False
        self.__profile_memory = False
        self.__time_and_use_profiler = None
        self.__memory_profiler = None
        self.mpi_comm = mpi_helper.get_comm_world()
        self.mpi_root = 0
        self.gather_data = True

    def __call__(self, func):
        """

        :param func: The function to be wrapped for execution
        :return: A wrapped function for which we track the runtime information in self
        """
        from functools import wraps

        @wraps(func)
        def wrapper(*args, **kwargs):
            # Pre-execute recording
            self.clear()  # Clear all runtime information data and profilers
            self.record_preexecute(
            )  # Record system provenance and pre-execution data
            start_time = time.time()  # Start the execution timer
            # Execute the function
            if not self.get_profile_memory():
                result = func(
                    *args,
                    **kwargs)  # Execute the function without memory profiling
            else:
                self.__memory_profiler = memory_profiler.LineProfiler()
                result = self.__memory_profiler(func)(
                    *args,
                    **kwargs)  # Execute the function with memory profiling
            # Post-execute recording
            execution_time = time.time(
            ) - start_time  # Compute the execution time
            self.record_postexecute(
                execution_time=execution_time)  # Record post-execution data
            self.clean_up()  # Clean up empty data
            if self.gather_data:
                self.gather()  # Gather the data from all MPI ranks
            # Return the result
            return result

        # Return our wrapped function
        return wrapper

    def clear(self):
        """
        Clear the dictionary and other internal parameters

        Side Effects

            * Remove all key/value pairs from the dict
            * Set self.__time_and_use_profiler to None
            * Set self.__memory_profiler to None
            * Set self.__profile_memory to False if invalid (i.e, if set to True but memory profiling is unavailable)
            * Set self.__profile_time_and_usage to False if invalid (i.e., if set to True but profiling is unavailable)
        """
        # Make sure profiling settings are valid
        if self.get_profile_memory() and not PROFILE_MEMORY_AVAILABLE:
            self.enable_profile_time_and_usage(False)
        if self.get_profile_time_and_usage() and not PROFILE_AVAILABLE:
            self.enable_profile_memory(False)
        # Remove old profilers
        self.__time_and_use_profiler = None
        self.__memory_profiler = None
        # Clear all data from the dictionary
        return super(run_info_dict, self).clear()

    def enable_profile_memory(self, enable=True):
        """
        Enable/disable profiling of memory usage

        :param enable: boolean to enable (True) or disable (False) memory profiling

        """
        if PROFILE_MEMORY_AVAILABLE:
            if not enable and self.__profile_memory:
                log_helper.debug(__name__,
                                 "Disabled memory profiling. ",
                                 root=self.mpi_root,
                                 comm=self.mpi_comm)
            if enable and not self.__profile_memory:
                log_helper.debug(__name__,
                                 "Enabled memory profiling. ",
                                 root=self.mpi_root,
                                 comm=self.mpi_comm)
            self.__profile_memory = enable
        else:
            self.__profile_memory = False
            if enable:
                log_helper.warning(
                    __name__, 'Profiling of memory usage not available.' +
                    ' Missing memory_profiler or StringIO package')

    def enable_profile_time_and_usage(self, enable=True):
        """
        Enable/disable time and usage profiling

        :param enable: boolean to enable (True) or disable (False) time and usage profiling

        """
        if PROFILE_AVAILABLE:
            if not enable and self.__profile_time_and_usage:
                log_helper.debug(__name__,
                                 "Disabled time and usage profiling. ",
                                 root=self.mpi_root,
                                 comm=self.mpi_comm)
            if enable and not self.__profile_time_and_usage:
                log_helper.debug(__name__,
                                 "Enabled time and usage profiling. ",
                                 root=self.mpi_root,
                                 comm=self.mpi_comm)
            self.__profile_time_and_usage = enable
        else:
            self.__profile_time_and_usage = False
            if enable:
                log_helper.warning(
                    __name__, 'Profiling of time and usage not available.' +
                    ' Missing profile and/or pstats package')

    def get_profile_time_and_usage(self):
        """
        Check whether time and usage profiling is enabled

        :return: Boolean indicating whether time and usage profiling is enabled
        """
        return self.__profile_time_and_usage

    def get_profile_memory(self):
        """
        Check whether profiling of memory usage is enabled

        :return: Boolean indicating whether memory profiling is enabled
        """
        return self.__profile_memory

    def record_preexecute(self):
        """
        Record basic runtime information in this dict before the exeuction is started.


        Function used to record runtime information prior to executing the process we want to track, e.g.,
        the `execute_analysis(...)` of a standard analysis.

        The function may be overwritten in child classes to add recording of
        additional runtime information. All runtime data should be recorded in the
        main dict (i.e, self). This ensures in the case of standard analysis that
        the data is stored in the HDF5 file. Other data should be stored in separate
        variables that we may add to the object.

        When overwriting the function we should typically call super(...,self).runinfo_record_pretexecute()
        last in the custom version to ensure that the start_time is properly recorded right before
        the execution of the analysis.

        """
        log_helper.debug(__name__,
                         'Recording pre-execution runtime data',
                         root=self.mpi_root,
                         comm=self.mpi_comm)
        # Record basic runtime environment information using the platform module
        try:
            self['architecture'] = unicode(platform.architecture())
            self['java_ver'] = unicode(platform.java_ver())
            self['libc_ver'] = unicode(platform.libc_ver())
            self['linux_distribution'] = unicode(platform.linux_distribution())
            self['mac_ver'] = unicode(platform.mac_ver())
            self['machine'] = unicode(platform.machine())
            self['node'] = unicode(platform.node())
            self['platform'] = unicode(platform.platform())
            self['processor'] = unicode(platform.processor())
            self['python_branch'] = unicode(platform.python_branch())
            self['python_build'] = unicode(platform.python_build())
            self['python_compiler'] = unicode(platform.python_compiler())
            self['python_implementation'] = unicode(
                platform.python_implementation())
            self['python_revision'] = unicode(platform.python_revision())
            self['python_version'] = unicode(platform.python_version())
            self['release'] = unicode(platform.release())
            self['system'] = unicode(platform.system())
            self['uname'] = unicode(platform.uname())
            self['version'] = unicode(platform.version())
            self['win32_ver'] = unicode(platform.win32_ver())
        except:
            warnings.warn(
                "WARNING: Recording of platform provenance failed: " +
                str(sys.exc_info()))

        # Attempt to record the svn version information
        try:
            import subprocess
            self['svn_ver'] = subprocess.check_output('svnversion').rstrip(
                '\n')
        except ImportError:
            log_helper.warning(
                __name__,
                'Recording of svn version not possible. subprocess not installed',
                root=self.mpi_root,
                comm=self.mpi_comm)
        except:
            warnings.warn("Recording of svn version information failed: " +
                          str(sys.exc_info()))

        # Attempt to record software library version
        try:
            import numpy as np
            self['numpy_version_full_version'] = unicode(
                np.version.full_version)
            self['numpy_version_release'] = unicode(np.version.release)
            self['numpy_version_git_revision'] = unicode(
                np.version.git_revision)
        except ImportError:
            log_helper.warning(__name__,
                               'Recording of numpy version not possible.',
                               root=self.mpi_root,
                               comm=self.mpi_comm)

        # Attempt to record psutil data
        try:
            import psutil
            self['logical_cpu_count'] = unicode(psutil.cpu_count())
            self['cpu_count'] = unicode(psutil.cpu_count(logical=False))
            process = psutil.Process()
            self['open_files'] = unicode(process.open_files())
            self['memory_info_before'] = unicode(process.memory_info())
        except ImportError:
            log_helper.warning(
                __name__,
                'psutil not installed. Recording of part of runtime information not possible',
                root=self.mpi_root,
                comm=self.mpi_comm)
        except:
            warnings.warn(
                "Recording of psutil-based runtime information failed: " +
                str(sys.exc_info()))

        # Record the start time for the analysis
        self['start_time'] = unicode(datetime.datetime.now())

        # Enable time and usage profiling if requested
        if self.__profile_time_and_usage:
            self.__time_and_use_profiler = Profile()
            self.__time_and_use_profiler.enable()

    def record_postexecute(self, execution_time=None):
        """
        Function used to record runtime information after the task we want to track is comleted, e.g.
        the `execute_analysis(...)` function of a standard analysis.

        The function may be overwritten in child classes to add recording of
        additional runtime information.

        When overwriting the function we should call super(...,self).runinfo_record_postexecute(execution_time)
        in the custom version to ensure that the execution and end_time are properly
        recorded.

        :param execution_time: The total time it took to execute the analysis. May be None, in which
            case the function will attempt to compute the execution time based on the start_time
            (if available) and the the current time.

        :param comm: Used for logging only. The MPI communicator to be used. Default value is None,
            in which case MPI.COMM_WORLD is used.

        """
        log_helper.debug(__name__,
                         'Recording post-execution runtime data',
                         root=self.mpi_root,
                         comm=self.mpi_comm)
        # Finalize recording of post execution provenance
        self['end_time'] = unicode(datetime.datetime.now())
        if execution_time is not None:
            self['execution_time'] = unicode(execution_time)
        elif 'start_time' in self:
            start_time = run_info_dict.string_to_time(self['start_time'])
            stop_time = run_info_dict.string_to_time(self['end_time'])
            self['execution_time'] = unicode(
                stop_time - start_time
            )  # TODO: This only gives execution time in full seconds right now
        else:
            self['execution_time'] = None
        # Attempt to record psutil data
        try:
            import psutil
            process = psutil.Process()
            self['memory_info_after'] = unicode(process.memory_info())
        except ImportError:
            log_helper.warning(
                __name__,
                'psutil not installed. Recording of part of runtime information not possible',
                root=self.mpi_root,
                comm=self.mpi_comm)
        except:
            warnings.warn(
                "Recording of psutil-based runtime information failed: " +
                str(sys.exc_info()))

        # Record the time and use profiling data if possible
        if self.__time_and_use_profiler is not None:
            self.__time_and_use_profiler.disable()
            self.__time_and_use_profiler.create_stats()
            self['profile'] = unicode(self.__time_and_use_profiler.stats)
            # Save the summary statistics for the profiling data
            stats_io = StringIO.StringIO()
            profiler_stats = pstats.Stats(
                self.__time_and_use_profiler,
                stream=stats_io).sort_stats('cumulative')
            profiler_stats.print_stats()
            self['profile_stats'] = stats_io.getvalue()

        # Record the memory profiling data if possible
        if self.__memory_profiler is not None and self.get_profile_memory():
            log_helper.debug(__name__,
                             'Recording memory profiling data',
                             root=self.mpi_root,
                             comm=self.mpi_comm)
            mem_stats_io = StringIO.StringIO()
            memory_profiler.show_results(self.__memory_profiler,
                                         stream=mem_stats_io)
            self['profile_mem'] = unicode(self.__memory_profiler.code_map)
            self['profile_mem_stats'] = mem_stats_io.getvalue()

    def clean_up(self):
        """
        Clean up the runinfo object. In particular remove empty keys that
        either recorded None or recorded just an empty string.

        This function may be overwritten to also do clean-up needed
        due to additional custom runtime instrumentation.

        When overwriting this function we should call super(..., self).runinfo_clean_up()
        at the end of the function to ensure that the runinfo dictionary
        is clean, i.e., does not contain any empty entries.

        """
        log_helper.debug(__name__,
                         'Clean up runtime data',
                         root=self.mpi_root,
                         comm=self.mpi_comm)
        # Remove empty items from the run_info dict
        for ri_key, ri_value in self.items():
            try:
                if ri_value is None or len(ri_value) == 0:
                    self.pop(ri_key)
            except:
                pass

    def gather(self):
        """
        Simple helper function to gather the runtime information---that has been collected on
        multiple processes when running using MPI---on a single root process

        :return: If we have more than one processes then this function returns a
            dictionary with the same keys as usual for the run_info but the
            values are now lists with one entry per mpi processes. If we only have
            a single process, then the run_info object will be returned without
            changes. NOTE: Similar to mpi gather, the function only collects
            information on the root. All other processes will return just their
            own private runtime information.

        """
        if mpi_helper.MPI_AVAILABLE:
            if self.mpi_comm.Get_size() > 1:
                log_helper.debug(__name__,
                                 'Gather runtime data from parallel tasks',
                                 root=self.mpi_root,
                                 comm=self.mpi_comm)
                self['mpi_rank'] = self.mpi_comm.Get_rank()
                run_data = self.mpi_comm.gather(self, self.mpi_root)
                if self.mpi_comm.Get_rank() == self.mpi_root:
                    merged_run_data = {}
                    for run_dict in run_data:
                        for key in run_dict:
                            try:
                                merged_run_data[key].append(run_dict[key])
                            except KeyError:
                                merged_run_data[key] = [run_dict[key]]
                    return merged_run_data
        return self

    def get_profile_stats_object(self, consolidate=True, stream=None):
        """
        Based on the execution profile of the execute_analysis(..) function get
        ``pstats.Stats`` object to help with the interpretation of the data.

        :param consolidate: Boolean flag indicating whether multiple stats (e.g., from multiple cores)
            should be consolidated into a single stats object. Default is True.
        :param stream: The optional stream parameter to be used fo the pstats.Stats object.

        :return: A single pstats.Stats object if consolidate is True. Otherwise the function
            returns a list of pstats.Stats objects, one per recorded statistic. None is returned
            in case that the stats objects cannot be created or no profiling data is available.
        """
        from ast import literal_eval
        if stream is None:
            import sys
            stream = sys.stdout

        if 'profile' in self:
            # Parse the profile data (which is stored as a string) or in the case of MPI we may
            # have a list of strings from each MPI processes
            if isinstance(self['profile'], list):
                # Convert the profile from each MPI process independently
                profile_data = [
                    literal_eval(profile) for profile in self['profile']
                ]
            else:
                # If we only have a single stat, then convert our data to a list, so that we can
                # handle the single and multiple statistics case in the same way in the remainder of this function
                profile_data = [
                    literal_eval(self['profile']),
                ]

            # Create a list of profile objects that the pstats.Stats class understands
            profile_dummies = []
            for profile_i in profile_data:
                # Here we are creating for each statistic a dummy class on the fly that holds our
                # profile_data in the stats attributes and has an empty create_stats function.
                # This trick allows us to create a pstats.Stats object without having to write our
                # stats data to file or having to create a cProfile.Profile object first. Writing
                # the data to file involves overhead and is ugly and creating a profiler and
                # overwriting its stats is potentially problematic
                profile_dummies.append(
                    type('Profile', (object, ), {
                        'stats': profile_i,
                        'create_stats': lambda x: None
                    })())

            # Create the statistics object and return it
            if consolidate:
                profile_stats = pstats.Stats(*profile_dummies, stream=stream)
                return profile_stats
            else:
                profile_stats = [
                    pstats.Stats(profile_i, stream=stream)
                    for profile_i in profile_dummies
                ]
                return profile_stats
        else:
            return None

    @staticmethod
    def string_to_structime(time_string, time_format=None):
        """
        Covert a time string to a time.struct_time using time.strptime

        :param time_string: String with the time, e.g, with the start time of a program.
        :param time_format: The time format to be used or None in which case run_info_dict.DEFAULT_TIME_FORMAT
            will be used.

        """
        return time.strptime(
            time_string, time_format
            if time_format is not None else run_info_dict.DEFAULT_TIME_FORMAT)

    @staticmethod
    def string_to_time(time_string, time_format=None):
        """
        Convert a time string to local time object using time.mktime.

        :param time_string: String with the time, e.g, with the start time of a program.
        :param time_format: The time format to be used or None in which case run_info_dict.DEFAULT_TIME_FORMAT
            will be used.

        """
        return time.mktime(
            time.strptime(
                time_string, time_format if time_format is not None else
                run_info_dict.DEFAULT_TIME_FORMAT))
コード例 #8
0
            'saveDB': 'all_dv3',
            'draw': None,
            'saveFile': setting.CONFIG.SAVE_PATH,
            'saveSignal': 'stock_signal_dv3'
        }, filter)


#########################################################
###这个文件每天执行一次,只回测runEveryWeek的输出结果集合,刷新最新的回测结果
if __name__ == '__main__':
    #对全部股票标的中,产生过交易并且属于沪深300,并且分红年份达标的标的回测
    # ncalls,是指相应代码 / 函数被调用的次数;
    # tottime,是指对应代码 / 函数总共执行所需要的时间(注意,并不包括它调用的其他代码 / 函数的执行时间);
    # percall,就是上述两者相除的结果,也就是 tottime / ncalls;
    # cumtime,则是指对应代码 / 函数总共执行所需要的时间,这里包括了它调用的其他代码 / 函数的执行时间;
    # cumtime percall,则是 cumtime 和 ncalls 相除的平均结果。

    prof = Profile()
    prof.enable()
    RunHS300AndDVYears()
    prof.create_stats()

    p = pstats.Stats(prof)
    # p.print_stats()
    # p.sort_stats('calls').print_stats(20)
    p.sort_stats('cumulative').print_stats(20)
    # p.print_callees()
    # cProfile.run('RunHS300AndDVYears()')
    # RunHS300AndDVYears()
    #ch = msvcrt.getch()
コード例 #9
0
class ResourceMeasure(object):
    """
    リソースの使用状況を出力する
    """

    CMD_BASH = "/bin/bash"
    CMD_MEASURE = os.path.join(HERE, "resource_measure")

    _instance = None
    _interval = 5
    _profiling = False
    _outdir = os.path.join(
        HERE,
        "measure_result/{}".format(datetime.now().strftime("%Y%m%d.%H%M%S")))

    @classmethod
    def get_instance(cls):
        """ シングルトンインスタンスを取得する """
        if cls._instance is None:
            cls._instance = ResourceMeasure(cls._interval, cls._outdir,
                                            cls._profiling)
        return cls._instance

    @classmethod
    def config(cls, interval=None, profiling=None, outdir=None):
        """ 各種設定値を設定する """
        if interval is not None:
            cls._interval = interval

        if outdir is not None:
            cls._outdir = outdir

        if profiling is not None:
            cls._profiling = profiling

    @classmethod
    @contextmanager
    def cls_measure(cls, title):
        """ 測定を実行する(コンテキストマネージャ) """
        raise NotImplementedError()

    @classmethod
    def cls_measured(cls, title=None):
        raise NotImplementedError()

    def __init__(self, interval, outdir, profiling):
        self.interval = interval
        self.outdir = outdir
        self.profiling = profiling
        self.profile = Profile() if self.profiling else None
        # self.iostat_filepath = os.path.join(self.outdir, "iostat.txt")
        # self.vmstat_filepath = os.path.join(self.outdir, "vmstat.txt")
        # self.ndstat_filepath = os.path.join(self.outdir, "ndstat.txt")
        # self.free_filepath = os.path.join(self.outdir, "free.txt")
        # self.ps_filepath = os.path.join(self.outdir, "ps.txt")
        self.section_filepath = os.path.join(self.outdir, "section.tsv")
        self.profile_filepath = os.path.join(self.outdir, "profile.prof")
        self.sections = []  # 測定区間ごとに(タイトル, 開始時刻, 終了時刻)を入れる
        self.measure_start = time.time()  # 測定開始時間

        # 測定スクリプト起動
        self.proc = Popen([
            self.CMD_BASH, self.CMD_MEASURE,
            str(os.getpid()),
            str(self.interval), self.outdir
        ])
        time.sleep(1)

        # プロファイリング開始
        if self.profiling:
            self.profile.enable()

    @contextmanager
    def measure(self, title):
        """ 測定を実行する(コンテキストマネージャ) """
        start = datetime.now()
        yield
        end = datetime.now()
        seconds = (end - start).total_seconds()
        self.sections.append((title, start, end, seconds))

    def measured(self, title=None):
        """ 測定を実行する(デコレータ) """
        def decorator(func):
            @functools.wraps(func)
            def wrapper(*args, **kwargs):
                start = datetime.now()
                ret = func(*args, **kwargs)
                end = datetime.now()
                seconds = (end - start).total_seconds()
                self.sections.append(
                    (func.func_name if title is None else title, start, end,
                     seconds))
                return ret

            return wrapper

        return decorator

    def finish(self):
        """ 終了する """
        self.output_section_file()

        # プロファイリング終了
        if self.profiling:
            self.profile.create_stats()
            self.profile.dump_stats(self.profile_filepath)

        # 測定スクリプト停止
        self.proc.terminate()
        self.proc.wait()

    def __del__(self):
        self.finish()

    def output_section_file(self):
        """ 区間ごとの処理時間集計ファイルを出力する """
        tform = "%Y/%m/%d %H:%M:%S"
        with open(self.section_filepath, "w") as fout:
            for title, start, end, seconds in self.sections:
                print("\t".join([
                    title,
                    start.strftime(tform),
                    end.strftime(tform),
                    str(seconds)
                ]),
                      file=fout)
コード例 #10
0
class frame(IFrame):
    framelist = set()

    def __init__(self,
                 address="http://127.0.0.1:12000/",
                 time_step=500,
                 instrument=False,
                 profiling=False,
                 wire_format="cbor",
                 compress=False):
        frame.framelist.add(self)
        self.thread = None
        self.__app = None
        self.__appname = ""
        self.__host_typemap = {}
        self.__host_wire_format = {}
        self.__typemap = {}
        self.__name2type = {}
        self.object_store = dataframe()
        self.object_store.start_recording = True
        if not address.endswith('/'):
            address += '/'
        self.__address = address
        self.__default_wire_format = wire_format
        self.__compress = compress
        self.__time_step = (float(time_step) / 1000)
        self.__new = {}
        self.__mod = {}
        self.__del = {}
        self.__observed_types = set()
        self.__observed_types_new = set()
        self.__observed_types_mod = set()
        self.__curtime = time.time()
        self.__curstep = 0
        self.__start_time = time.strftime("%Y-%m-%d_%H-%M-%S")
        self.__instrumented = instrument
        self.__profiling = profiling
        self.__sessions = {}
        self.__host_to_push_groupkey = {}
        if instrument:
            self._instruments = {}
            self._instrument_headers = []
            self._instrument_headers.append('bytes sent')
            self._instrument_headers.append('bytes received')

    def __register_app(self, app):
        self.logger = self.__setup_logger("spacetime@" + self.__appname)
        self.__host_typemap = {}
        for address, tpmap in self.__app.__declaration_map__.items():
            if address == "default":
                address = self.__address
            fulladdress = address + self.__appname
            if fulladdress not in self.__host_typemap:
                self.__host_typemap[fulladdress] = tpmap
            else:
                for declaration in tpmap:
                    self.__host_typemap[fulladdress].setdefault(
                        declaration, set()).update(set(tpmap[declaration]))

        self.__default_wire_format = (
            self.__app.__special_wire_format__["default"]
            if "default" in self.__app.__special_wire_format__ else
            self.__default_wire_format)
        for host in self.__host_typemap:
            self.__host_wire_format[host] = (
                self.__app.__special_wire_format__[host]
                if host in self.__app.__special_wire_format__ else
                self.__default_wire_format)
        all_types = set()
        for host in self.__host_typemap:
            wire_format = self.__host_wire_format[host]
            jobj = dict([(k, [tp.__realname__ for tp in v])
                         for k, v in self.__host_typemap[host].items()])
            producing, getting, gettingsetting, deleting, setting, tracking = (
                self.__host_typemap[host].setdefault(Modes.Producing, set()),
                self.__host_typemap[host].setdefault(Modes.Getter, set()),
                self.__host_typemap[host].setdefault(Modes.GetterSetter,
                                                     set()),
                self.__host_typemap[host].setdefault(Modes.Deleter, set()),
                self.__host_typemap[host].setdefault(Modes.Setter, set()),
                self.__host_typemap[host].setdefault(Modes.Tracker, set()))
            self.__typemap.setdefault(Modes.Producing, set()).update(producing)
            self.__typemap.setdefault(Modes.Getter, set()).update(getting)
            self.__typemap.setdefault(Modes.GetterSetter,
                                      set()).update(gettingsetting)
            self.__typemap.setdefault(Modes.Deleter, set()).update(deleting)
            self.__typemap.setdefault(Modes.Setter, set()).update(setting)
            self.__typemap.setdefault(Modes.Tracker, set()).update(tracking)

            all_types_host = tracking.union(producing).union(getting).union(
                gettingsetting).union(deleting).union(setting)
            all_types.update(all_types_host)
            self.__observed_types.update(all_types_host)
            self.__observed_types_new.update(
                self.__host_typemap[host][Modes.Tracker].union(
                    self.__host_typemap[host][Modes.Getter]).union(
                        self.__host_typemap[host][Modes.GetterSetter]))

            self.__observed_types_mod.update(
                self.__host_typemap[host][Modes.Getter].union(
                    self.__host_typemap[host][Modes.GetterSetter]))

            jsonobj = json.dumps({
                "sim_typemap": jobj,
                "wire_format": wire_format,
                "app_id": self.__app.app_id
            })
            try:
                self.__sessions[host] = Session()
                if platform.system() == 'Java':
                    ignoreJavaSSL()
                    self.logger.info("Using custom HTTPAdapter for Jython")
                    self.__sessions[host].mount(host, MyJavaHTTPAdapter())
                    self.__sessions[host].verify = False
                resp = requests.put(
                    host,
                    data=jsonobj,
                    headers={'content-type': 'application/json'})
            except HTTPError as exc:
                self.__handle_request_errors(resp, exc)
                return False
            except ConnectionError:
                self.logger.exception("Cannot connect to host.")
                self.__disconnected = True
                return False
        self.__name2type = dict([(tp.__realname__, tp) for tp in all_types])
        self.object_store.add_types(all_types)
        for host in self.__host_typemap:
            self.__host_to_push_groupkey[host] = set([
                self.object_store.get_group_key(tp)
                for tp in self.__host_typemap[host][Modes.GetterSetter].union(
                    self.__host_typemap[host][Modes.Setter]).union(
                        self.__host_typemap[host][Modes.Producing]).union(
                            self.__host_typemap[host][Modes.Deleter])
            ])

        return True

    @staticmethod
    def loop():
        SpacetimeConsole().cmdloop()

    def get_instrumented(self):
        """
        Returns if frame is running instrumentation. (True/False)
        """
        return self.__instrumented

    def get_curtime(self):
        """
        Returns the timestamp of the current step.
        """
        return self.__curtime

    def get_curstep(self):
        """
        Returns the current step value of the simulation.
        """
        return self.__curstep

    def get_timestep(self):
        """
        Returns the time-step value in milliseconds.
        """
        return self.__time_step

    def get_app(self):
        """
        Returns a reference to the application.
        """
        return self.__app

    def attach_app(self, app):
        """
        Receives reference to application (implementing IApplication).

        Arguments:
        app : spacetime-conformant Application

        Exceptions:
        None
        """
        self.__app = app
        self.__appname = app.__class__.__name__ + "_" + self.__app.app_id

    def run_async(self):
        """
        Starts application in non-blocking mode.

        Arguments:
        None

        Exceptions:
        None
        """
        self.thread = Parallel(target=self.__run)
        self.thread.daemon = True
        self.thread.start()

    def run_main(self):
        self.__run()

    def run(self):
        """
        Starts application in blocking mode.

        Arguments:
        None

        Exceptions:
        None
        """
        self.thread = Parallel(target=self.__run)
        self.thread.daemon = True
        self.thread.start()
        self.thread.join()

    def __clear(self):
        self.__disconnected = False
        self.__app.done = False
        self.object_store.clear_all()
        self.__new = {}
        self.__mod = {}
        self.__del = {}

    def __run(self):
        self.__clear()
        if not self.__app:
            raise NotImplementedError("App has not been attached")
        success = self.__register_app(self.__app)
        if success:
            try:
                if self.__profiling:
                    try:
                        from cProfile import Profile  # @UnresolvedImport
                        if not os.path.exists('stats'):
                            os.mkdir('stats')
                        self.__profile = Profile()
                        self.__profile.enable()
                        self.logger.info("starting profiler for %s",
                                         self.__appname)
                    except:
                        self.logger.error(
                            "Could not import cProfile (not supported in Jython)."
                        )
                        self.__profile = None
                        self.__profiling = None

                self.__pull()
                self.__app.initialize()
                self.__push()
                while not self.__app.done:
                    st_time = time.time()
                    self.__pull()
                    self.__app.update()
                    self.__push()
                    end_time = time.time()
                    timespent = end_time - st_time
                    self.__curstep += 1
                    self.__curtime = time.time()
                    # time spent on execution loop
                    if timespent < self.__time_step:
                        time.sleep(float(self.__time_step - timespent))
                    else:
                        self.logger.info("loop exceeded maximum time: %s ms",
                                         timespent)

                    # Writes down total time spent in spacetime methods
                    if self.__instrumented:
                        si.record_instruments(timespent, self)
                # One last time, because _shutdown may delete objects from the store
                self.__pull()
                self._shutdown()
                self.__push()
                self.__unregister_app()
            except ConnectionError as cerr:
                self.logger.error("A connection error occurred: %s",
                                  cerr.message)
            except HTTPError as herr:
                self.logger.error(
                    "A fatal error has occurred while communicating with the server: %s",
                    herr.message)
            except:
                self.logger.exception("An unknown error occurred.")
                raise
            finally:
                if self.__profiling:
                    self.__profile.disable()
                    self.__profile.create_stats()
                    self.__profile.dump_stats(
                        os.path.join(
                            'stats', "%s_stats_%s.ps" %
                            (self.__start_time, self.__appname)))
        else:
            self.logger.info("Could not register, exiting run loop...")

    def app_done(self):
        """
        app_done

        Returns whether app has finished running or not
        """
        return self.__app.done

    def get(self, tp, oid=None):
        """
        Retrieves objects from local data storage. If id is provided, returns
        the object identified by id. Otherwise, returns the list of all objects
        matching type tp.

        Arguments:
        tp : PCC set type being fetched
        oid : primary key of an individual object.

        Exceptions:
        - ID does not exist in store
        - Application does not annotate that type
        """
        if tp in self.__observed_types:
            if oid:
                # Have to get this to work
                return self.object_store.get(tp, oid)
            return self.object_store.get(tp)
        else:
            raise Exception("Application %s does not annotate type %s" %
                            (self.__appname, tp))

    def add(self, obj):
        """
        Adds an object to be stored and tracked by spacetime.

        Arguments:
        obj : PCC object to stored

        Exceptions:
        - Application is not annotated as a producer
        """
        if obj.__class__ in self.__typemap[Modes.Producing]:
            self.object_store.append(obj.__class__, obj)
        else:
            raise Exception("Application %s is not a producer of type %s" %
                            (self.__appname, obj.__class__))

    def delete(self, tp, obj):
        """
        Deletes an object currently stored and tracked by spacetime.

        Arguments:
        tp: PCC type of object to be deleted
        obj : PCC object to be deleted

        Exceptions:
        - Application is not annotated as a Deleter
        """

        if tp in self.__typemap[Modes.Deleter]:
            self.object_store.delete(tp, obj)
        else:
            raise Exception("Application %s is not registered to delete %s" %
                            (self.__appname, tp))

    def get_new(self, tp):
        """
        Retrieves new objects of type 'tp' retrieved in last pull (i.e. since
        last tick).

        Arguments:
        tp: PCC type for retrieving list of new objects

        Exceptions:
        None

        Note:
        Application should be annotated as  a Getter, GetterSetter, or Tracker,
        otherwise result is always an empty list.
        """
        if tp in self.__observed_types_new:
            return self.object_store.get_new(tp)
        else:
            self.logger.warn(
                ("Checking for new objects of type %s, but not "
                 "a Getter, GetterSetter, or Tracker of type. Empty list "
                 "always returned"), tp)
            return []

    def get_mod(self, tp):
        """
        Retrieves objects of type 'tp' that were modified since last pull
        (i.e. since last tick).

        Arguments:
        tp: PCC type for retrieving list of modified objects

        Exceptions:
        None

        Note:
        Application should be annotated as a Getter,or GetterSetter, otherwise
        result is always an empty list.
        """
        if tp in self.__observed_types_mod:
            return self.object_store.get_mod(tp)
        else:
            self.logger.warn(("Checking for modifications in objects of type "
                              "%s, but not a Getter or GetterSetter of type. "
                              "Empty list always returned"), tp)
            return []

    def get_deleted(self, tp):
        """
        Retrieves objects of type 'tp' that were deleted since last pull
        (i.e. since last tick).

        Arguments:
        tp: PCC type for retrieving list of deleted objects

        Exceptions:
        None

        Note:
        Application should be annotated as a Getter, GetterSetter, or Tracker,
        otherwise result is always an empty list.
        """
        if tp in self.__observed_types_new:
            return self.object_store.get_deleted(tp)
        else:
            self.logger.warn(
                ("Checking for deleted objects of type %s, but "
                 "not a Getter, GetterSetter, or Tracker of type. Empty list "
                 "always returned"), tp)
            return []

    def __handle_request_errors(self, resp, exc):
        if resp.status_code == 401:
            self.logger.error(
                "This application is not registered at the server. Stopping..."
            )
            raise
        else:
            self.logger.warn("Non-success code received from server: %s %s",
                             resp.status_code, resp.reason)

    @timethis
    def __process_pull_resp(self, resp):
        if resp and "gc" in resp:
            self.object_store.apply_changes(resp)
            self.object_store.clear_record()

    @timethis
    def __pull(self):
        if self.__disconnected:
            return
        if self.__instrumented:
            self._instruments['bytes received'] = 0
        updates = DataframeChanges_Base()
        try:
            for host in self.__host_typemap:
                type_dict = {}
                # Need to give mechanism to selectively ask for some changes. Very hard to implement in current dataframe scheme.
                resp = self.__sessions[host].get(host + "/updated", data={})
                try:
                    resp.raise_for_status()
                    if self.__instrumented:
                        self._instruments['bytes received'] = len(resp.content)
                    data = resp.content
                    #print data
                    DF_CLS, content_type = FORMATS[
                        self.__host_wire_format[host]]
                    dataframe_change = DF_CLS()
                    dataframe_change.ParseFromString(data)
                    updates.CopyFrom(dataframe_change)
                except HTTPError as exc:
                    self.__handle_request_errors(resp, exc)
            #json.dump(updates, open("pull_" + self.__appname + ".json", "a") , sort_keys = True, separators = (',', ': '), indent = 4)
            self.__process_pull_resp(updates)
        except ConnectionError:
            self.logger.exception("Disconnected from host.")
            self.__disconnected = True
            self._stop()

    @timethis
    def __push(self):
        if self.__disconnected:
            return
        if self.__instrumented:
            self._instruments['bytes sent'] = 0
        changes = self.object_store.get_record()
        #json.dump(changes, open("push_" + self.__appname + ".json", "a") , sort_keys = True, separators = (',', ': '), indent = 4)

        for host in self.__host_typemap:
            try:
                DF_CLS, content_type = FORMATS[self.__host_wire_format[host]]
                changes_for_host = DF_CLS()
                changes_for_host["gc"] = RecursiveDictionary([
                    (gck, gc) for gck, gc in changes["gc"].items()
                    if gck in self.__host_to_push_groupkey[host]
                ])
                if "types" in changes:
                    changes_for_host["types"] = changes["types"]
                dictmsg = changes_for_host.SerializeToString()
                #update_dict = {"update_dict": protomsg}
                if self.__instrumented:
                    self._instruments['bytes sent'] = sys.getsizeof(dictmsg)
                headers = {'content-type': content_type}
                if self.__compress:
                    headers['content-encoding'] = 'gzip'
                    dictmsg = zlib.compress(dictmsg)
                resp = self.__sessions[host].post(host + "/updated",
                                                  data=dictmsg,
                                                  headers=headers)
            except TypeError:
                self.logger.exception("error encoding obj. Object: %s",
                                      changes_for_host)
            except HTTPError as exc:
                self.__handle_request_errors(resp, exc)
            except ConnectionError:
                self.logger.exception("Disconnected from host.")
                self.__disconnected = True
                self._stop()

        self.object_store.clear_record()
        self.object_store.clear_buffer()

    def _shutdown(self):
        """
        _shutdown

        Called after the frame execution loop stops, in the last pull/push
        iteration
        """
        self.__app.shutdown()

    def _stop(self):
        """
        _stop

        Called by frame's command prompt on quit/exit
        """
        self.__app.done = True

    def __unregister_app(self):
        for host in self.__host_typemap:
            resp = requests.delete(host)
            self.logger.info("Successfully deregistered from %s", host)

    def __setup_logger(self, name, file_path=None):
        logger = logging.getLogger(name)
        # Set default logging handler to avoid "No handler found" warnings.
        logger.addHandler(NullHandler())
        logger.setLevel(logging.DEBUG)
        logger.debug("Starting logger for %s", name)
        return logger
コード例 #11
0
ファイル: run_info_data.py プロジェクト: biorack/BASTet
class run_info_dict(dict):
    """
    Simple dictionary class for collecting runtime information

    The typical use is as follows:

    >> my_run_info = run_info_dict()
    >> my_run_info(my_function)(my_parameters)

    With this, all runtime information is automatically collected in my_run_info.
    We can enable time-and-usage and memory profiling simply by calling
    enable_profile_time_and_usage(...) or  enable_profile_memory(...), respectively,
    before we run our function.

    We can also use the data structure directly and control the population ourselves,
    however, memory profiling is not supported by default in this case but we need to
    set and run the memory profiler ourselves, since memory_profiler expects that it
    can wrap the function

    """
    DEFAULT_TIME_FORMAT = '%Y-%m-%d %H:%M:%S.%f'

    def __init__(self, *args, **kwargs):
        super(run_info_dict, self).__init__(*args, **kwargs)
        self.__profile_time_and_usage = False
        self.__profile_memory = False
        self.__time_and_use_profiler = None
        self.__memory_profiler = None
        self.mpi_comm = mpi_helper.get_comm_world()
        self.mpi_root = 0
        self.gather_data = True

    def __call__(self, func):
        """

        :param func: The function to be wrapped for execution
        :return: A wrapped function for which we track the runtime information in self
        """
        from functools import wraps

        @wraps(func)
        def wrapper(*args, **kwargs):
            # Pre-execute recording
            self.clear()                # Clear all runtime information data and profilers
            self.record_preexecute()    # Record system provenance and pre-execution data
            start_time = time.time()    # Start the execution timer
            # Execute the function
            if not self.get_profile_memory():
                result = func(*args, **kwargs)  # Execute the function without memory profiling
            else:
                self.__memory_profiler = memory_profiler.LineProfiler()
                result = self.__memory_profiler(func)(*args, **kwargs)  # Execute the function with memory profiling
            # Post-execute recording
            execution_time = time.time() - start_time                   # Compute the execution time
            self.record_postexecute(execution_time=execution_time)      # Record post-execution data
            self.clean_up()                                             # Clean up empty data
            if self.gather_data:
                self.gather()                                           # Gather the data from all MPI ranks
            # Return the result
            return result

        # Return our wrapped function
        return wrapper

    def clear(self):
        """
        Clear the dictionary and other internal parameters

        Side Effects

            * Remove all key/value pairs from the dict
            * Set self.__time_and_use_profiler to None
            * Set self.__memory_profiler to None
            * Set self.__profile_memory to False if invalid (i.e, if set to True but memory profiling is unavailable)
            * Set self.__profile_time_and_usage to False if invalid (i.e., if set to True but profiling is unavailable)
        """
        # Make sure profiling settings are valid
        if self.get_profile_memory() and not PROFILE_MEMORY_AVAILABLE:
            self.enable_profile_time_and_usage(False)
        if self.get_profile_time_and_usage() and not PROFILE_AVAILABLE:
            self.enable_profile_memory(False)
        # Remove old profilers
        self.__time_and_use_profiler = None
        self.__memory_profiler = None
        # Clear all data from the dictionary
        return super(run_info_dict, self).clear()

    def enable_profile_memory(self, enable=True):
        """
        Enable/disable profiling of memory usage

        :param enable: boolean to enable (True) or disable (False) memory profiling

        """
        if PROFILE_MEMORY_AVAILABLE:
            if not enable and self.__profile_memory:
                log_helper.debug(__name__, "Disabled memory profiling. ",
                                 root=self.mpi_root, comm=self.mpi_comm)
            if enable and not self.__profile_memory:
                log_helper.debug(__name__, "Enabled memory profiling. ",
                                 root=self.mpi_root, comm=self.mpi_comm)
            self.__profile_memory = enable
        else:
            self.__profile_memory = False
            if enable:
                log_helper.warning(__name__, 'Profiling of memory usage not available.' +
                                   ' Missing memory_profiler or StringIO package')

    def enable_profile_time_and_usage(self, enable=True):
        """
        Enable/disable time and usage profiling

        :param enable: boolean to enable (True) or disable (False) time and usage profiling

        """
        if PROFILE_AVAILABLE:
            if not enable and self.__profile_time_and_usage:
                log_helper.debug(__name__, "Disabled time and usage profiling. ",
                                 root=self.mpi_root, comm=self.mpi_comm)
            if enable and not self.__profile_time_and_usage:
                log_helper.debug(__name__, "Enabled time and usage profiling. ",
                                 root=self.mpi_root, comm=self.mpi_comm)
            self.__profile_time_and_usage = enable
        else:
            self.__profile_time_and_usage = False
            if enable:
                log_helper.warning(__name__, 'Profiling of time and usage not available.' +
                                   ' Missing profile and/or pstats package')

    def get_profile_time_and_usage(self):
        """
        Check whether time and usage profiling is enabled

        :return: Boolean indicating whether time and usage profiling is enabled
        """
        return self.__profile_time_and_usage

    def get_profile_memory(self):
        """
        Check whether profiling of memory usage is enabled

        :return: Boolean indicating whether memory profiling is enabled
        """
        return self.__profile_memory

    def record_preexecute(self):
        """
        Record basic runtime information in this dict before the exeuction is started.


        Function used to record runtime information prior to executing the process we want to track, e.g.,
        the `execute_analysis(...)` of a standard analysis.

        The function may be overwritten in child classes to add recording of
        additional runtime information. All runtime data should be recorded in the
        main dict (i.e, self). This ensures in the case of standard analysis that
        the data is stored in the HDF5 file. Other data should be stored in separate
        variables that we may add to the object.

        When overwriting the function we should typically call super(...,self).runinfo_record_pretexecute()
        last in the custom version to ensure that the start_time is properly recorded right before
        the execution of the analysis.

        """
        log_helper.debug(__name__, 'Recording pre-execution runtime data', root=self.mpi_root, comm=self.mpi_comm)
        # Record basic runtime environment information using the platform module
        try:
            self['architecture'] = unicode(platform.architecture())
            self['java_ver'] = unicode(platform.java_ver())
            self['libc_ver'] = unicode(platform.libc_ver())
            self['linux_distribution'] = unicode(platform.linux_distribution())
            self['mac_ver'] = unicode(platform.mac_ver())
            self['machine'] = unicode(platform.machine())
            self['node'] = unicode(platform.node())
            self['platform'] = unicode(platform.platform())
            self['processor'] = unicode(platform.processor())
            self['python_branch'] = unicode(platform.python_branch())
            self['python_build'] = unicode(platform.python_build())
            self['python_compiler'] = unicode(platform.python_compiler())
            self['python_implementation'] = unicode(platform.python_implementation())
            self['python_revision'] = unicode(platform.python_revision())
            self['python_version'] = unicode(platform.python_version())
            self['release'] = unicode(platform.release())
            self['system'] = unicode(platform.system())
            self['uname'] = unicode(platform.uname())
            self['version'] = unicode(platform.version())
            self['win32_ver'] = unicode(platform.win32_ver())
        except:
            warnings.warn("WARNING: Recording of platform provenance failed: " + str(sys.exc_info()))

        # Attempt to record the svn version information
        try:
            import subprocess
            self['svn_ver'] = subprocess.check_output('svnversion').rstrip('\n')
        except ImportError:
            log_helper.warning(__name__, 'Recording of svn version not possible. subprocess not installed',
                               root=self.mpi_root, comm=self.mpi_comm)
        except:
            warnings.warn("Recording of svn version information failed: "+str(sys.exc_info()))

        # Attempt to record software library version
        try:
            import numpy as np
            self['numpy_version_full_version'] = unicode(np.version.full_version)
            self['numpy_version_release'] = unicode(np.version.release)
            self['numpy_version_git_revision'] = unicode(np.version.git_revision)
        except ImportError:
            log_helper.warning(__name__, 'Recording of numpy version not possible.',
                               root=self.mpi_root, comm=self.mpi_comm)

        # Attempt to record psutil data
        try:
            import psutil
            self['logical_cpu_count'] = unicode(psutil.cpu_count())
            self['cpu_count'] = unicode(psutil.cpu_count(logical=False))
            process = psutil.Process()
            self['open_files'] = unicode(process.open_files())
            self['memory_info_before'] = unicode(process.memory_info())
        except ImportError:
            log_helper.warning(__name__, 'psutil not installed. Recording of part of runtime information not possible',
                               root=self.mpi_root, comm=self.mpi_comm)
        except:
            warnings.warn("Recording of psutil-based runtime information failed: "+str(sys.exc_info()))

        # Record the start time for the analysis
        self['start_time'] = unicode(datetime.datetime.now())

        # Enable time and usage profiling if requested
        if self.__profile_time_and_usage:
            self.__time_and_use_profiler = Profile()
            self.__time_and_use_profiler.enable()

    def record_postexecute(self, execution_time=None):
        """
        Function used to record runtime information after the task we want to track is comleted, e.g.
        the `execute_analysis(...)` function of a standard analysis.

        The function may be overwritten in child classes to add recording of
        additional runtime information.

        When overwriting the function we should call super(...,self).runinfo_record_postexecute(execution_time)
        in the custom version to ensure that the execution and end_time are properly
        recorded.

        :param execution_time: The total time it took to execute the analysis. May be None, in which
            case the function will attempt to compute the execution time based on the start_time
            (if available) and the the current time.

        :param comm: Used for logging only. The MPI communicator to be used. Default value is None,
            in which case MPI.COMM_WORLD is used.

        """
        log_helper.debug(__name__, 'Recording post-execution runtime data', root=self.mpi_root, comm=self.mpi_comm)
        # Finalize recording of post execution provenance
        self['end_time'] = unicode(datetime.datetime.now())
        if execution_time is not None:
            self['execution_time'] = unicode(execution_time)
        elif 'start_time' in self:
            start_time = run_info_dict.string_to_time(self['start_time'])
            stop_time = run_info_dict.string_to_time(self['end_time'])
            self['execution_time'] = unicode(stop_time - start_time)    # TODO: This only gives execution time in full seconds right now
        else:
            self['execution_time'] = None
        # Attempt to record psutil data
        try:
            import psutil
            process = psutil.Process()
            self['memory_info_after'] = unicode(process.memory_info())
        except ImportError:
            log_helper.warning(__name__, 'psutil not installed. Recording of part of runtime information not possible',
                               root=self.mpi_root, comm=self.mpi_comm)
        except:
            warnings.warn("Recording of psutil-based runtime information failed: "+str(sys.exc_info()))

        # Record the time and use profiling data if possible
        if self.__time_and_use_profiler is not None:
            self.__time_and_use_profiler.disable()
            self.__time_and_use_profiler.create_stats()
            self['profile'] = unicode(self.__time_and_use_profiler.stats)
            # Save the summary statistics for the profiling data
            stats_io = StringIO.StringIO()
            profiler_stats = pstats.Stats(self.__time_and_use_profiler, stream=stats_io).sort_stats('cumulative')
            profiler_stats.print_stats()
            self['profile_stats'] = stats_io.getvalue()

        # Record the memory profiling data if possible
        if self.__memory_profiler is not None and self.get_profile_memory():
            log_helper.debug(__name__, 'Recording memory profiling data', root=self.mpi_root, comm=self.mpi_comm)
            mem_stats_io = StringIO.StringIO()
            memory_profiler.show_results(self.__memory_profiler, stream=mem_stats_io)
            self['profile_mem'] = unicode(self.__memory_profiler.code_map)
            self['profile_mem_stats'] = mem_stats_io.getvalue()

    def clean_up(self):
        """
        Clean up the runinfo object. In particular remove empty keys that
        either recorded None or recorded just an empty string.

        This function may be overwritten to also do clean-up needed
        due to additional custom runtime instrumentation.

        When overwriting this function we should call super(..., self).runinfo_clean_up()
        at the end of the function to ensure that the runinfo dictionary
        is clean, i.e., does not contain any empty entries.

        """
        log_helper.debug(__name__, 'Clean up runtime data', root=self.mpi_root, comm=self.mpi_comm)
        # Remove empty items from the run_info dict
        for ri_key, ri_value in self.items():
            try:
                if ri_value is None or len(ri_value) == 0:
                    self.pop(ri_key)
            except:
                pass

    def gather(self):
        """
        Simple helper function to gather the runtime information---that has been collected on
        multiple processes when running using MPI---on a single root process

        :return: If we have more than one processes then this function returns a
            dictionary with the same keys as usual for the run_info but the
            values are now lists with one entry per mpi processes. If we only have
            a single process, then the run_info object will be returned without
            changes. NOTE: Similar to mpi gather, the function only collects
            information on the root. All other processes will return just their
            own private runtime information.

        """
        if mpi_helper.MPI_AVAILABLE:
            if self.mpi_comm.Get_size() > 1:
                log_helper.debug(__name__, 'Gather runtime data from parallel tasks',
                                 root=self.mpi_root, comm=self.mpi_comm)
                self['mpi_rank'] = self.mpi_comm.Get_rank()
                run_data = self.mpi_comm.gather(self, self.mpi_root)
                if self.mpi_comm.Get_rank() == self.mpi_root:
                    merged_run_data = {}
                    for run_dict in run_data:
                        for key in run_dict:
                            try:
                                merged_run_data[key].append(run_dict[key])
                            except KeyError:
                                merged_run_data[key] = [run_dict[key]]
                    return merged_run_data
        return self

    def get_profile_stats_object(self, consolidate=True, stream=None):
        """
        Based on the execution profile of the execute_analysis(..) function get
        ``pstats.Stats`` object to help with the interpretation of the data.

        :param consolidate: Boolean flag indicating whether multiple stats (e.g., from multiple cores)
            should be consolidated into a single stats object. Default is True.
        :param stream: The optional stream parameter to be used fo the pstats.Stats object.

        :return: A single pstats.Stats object if consolidate is True. Otherwise the function
            returns a list of pstats.Stats objects, one per recorded statistic. None is returned
            in case that the stats objects cannot be created or no profiling data is available.
        """
        from ast import literal_eval
        if stream is None:
            import sys
            stream = sys.stdout

        if 'profile' in self:
            # Parse the profile data (which is stored as a string) or in the case of MPI we may
            # have a list of strings from each MPI processes
            if isinstance(self['profile'], list):
                # Convert the profile from each MPI process independently
                profile_data = [literal_eval(profile) for profile in self['profile']]
            else:
                # If we only have a single stat, then convert our data to a list, so that we can
                # handle the single and multiple statistics case in the same way in the remainder of this function
                profile_data = [literal_eval(self['profile']), ]

            # Create a list of profile objects that the pstats.Stats class understands
            profile_dummies = []
            for profile_i in profile_data:
                # Here we are creating for each statistic a dummy class on the fly that holds our
                # profile_data in the stats attributes and has an empty create_stats function.
                # This trick allows us to create a pstats.Stats object without having to write our
                # stats data to file or having to create a cProfile.Profile object first. Writing
                # the data to file involves overhead and is ugly and creating a profiler and
                # overwriting its stats is potentially problematic
                profile_dummies.append(type('Profile',
                                            (object,),
                                            {'stats': profile_i, 'create_stats': lambda x: None})())

            # Create the statistics object and return it
            if consolidate:
                profile_stats = pstats.Stats(*profile_dummies, stream=stream)
                return profile_stats
            else:
                profile_stats = [pstats.Stats(profile_i, stream=stream) for profile_i in profile_dummies]
                return profile_stats
        else:
            return None

    @staticmethod
    def string_to_structime(time_string, time_format=None):
        """
        Covert a time string to a time.struct_time using time.strptime

        :param time_string: String with the time, e.g, with the start time of a program.
        :param time_format: The time format to be used or None in which case run_info_dict.DEFAULT_TIME_FORMAT
            will be used.

        """
        return time.strptime(time_string,
                             time_format if time_format is not None else run_info_dict.DEFAULT_TIME_FORMAT)

    @staticmethod
    def string_to_time(time_string, time_format=None):
        """
        Convert a time string to local time object using time.mktime.

        :param time_string: String with the time, e.g, with the start time of a program.
        :param time_format: The time format to be used or None in which case run_info_dict.DEFAULT_TIME_FORMAT
            will be used.

        """
        return time.mktime(time.strptime(time_string,
                                         time_format if time_format is not None else run_info_dict.DEFAULT_TIME_FORMAT))