def profile(sort): if not sort: yield return sort_columns = ( "calls", "cumtime", "file", "ncalls", "pcalls", "line", "name", "nfl", "stdname", "time", "tottime", ) if sort not in sort_columns: raise RuntimeError( "{} not a valid sort column. Please use one of {}".format( sort, ", ".join(sort_columns))) try: from cProfile import Profile # type: ignore except ImportError: from Profile import Profile # type: ignore prof = Profile() prof.enable() yield prof.create_stats() prof.print_stats(sort=sort)
def run(self, func, *args) -> None: """ Evaluate a function using cProfile.Profile. """ pr = Profile() pr.enable() func(*args) pr.disable() pr.create_stats() self._stats.append(pr)
def _call(self, *args, **kw): profile = RawProfile() def _run(): with DisableGc(): for _ in range(self._iterations): _run.result = super(Profile, self)._call(*args, **kw) profile.runctx('_run()', {}, {'_run': _run}) profile.create_stats() stats = Stats(profile) stats.sort_stats('cumulative') stats.fcn_list = stats.fcn_list[:self._max_lines] self._reporter(stats) return _run.result
class ProfileMiddleware(object): """ Displays hotshot profiling for any view. http://yoursite.com/yourview/?prof Add the "prof" key to query string by appending ?prof (or &prof=) and you'll see the profiling results in your browser. WARNING: It uses cProfile """ def process_request(self, request): if self.is_on(request): self.prof = Profile() def process_view(self, request, callback, callback_args, callback_kwargs): if self.is_on(request): log.debug("%s called", callback) callback_args = (request,) + callback_args return self.prof.runcall( callback, *callback_args, **callback_kwargs) def process_response(self, request, response): if self.is_on(request): self.prof.create_stats() out = StringIO.StringIO() stats = Stats(self.prof, stream=out) stats.sort_stats(*sort_tuple) stats.print_stats() stats_str = out.getvalue() if response and response.content and stats_str: response.content = "<pre>" + stats_str + "</pre>" return response def is_on(self, request): return 'prof' in request.GET
class run_info_dict(dict): """ Simple dictionary class for collecting runtime information The typical use is as follows: >> my_run_info = run_info_dict() >> my_run_info(my_function)(my_parameters) With this, all runtime information is automatically collected in my_run_info. We can enable time-and-usage and memory profiling simply by calling enable_profile_time_and_usage(...) or enable_profile_memory(...), respectively, before we run our function. We can also use the data structure directly and control the population ourselves, however, memory profiling is not supported by default in this case but we need to set and run the memory profiler ourselves, since memory_profiler expects that it can wrap the function """ DEFAULT_TIME_FORMAT = '%Y-%m-%d %H:%M:%S.%f' def __init__(self, *args, **kwargs): super(run_info_dict, self).__init__(*args, **kwargs) self.__profile_time_and_usage = False self.__profile_memory = False self.__time_and_use_profiler = None self.__memory_profiler = None self.mpi_comm = mpi_helper.get_comm_world() self.mpi_root = 0 self.gather_data = True def __call__(self, func): """ :param func: The function to be wrapped for execution :return: A wrapped function for which we track the runtime information in self """ from functools import wraps @wraps(func) def wrapper(*args, **kwargs): # Pre-execute recording self.clear() # Clear all runtime information data and profilers self.record_preexecute( ) # Record system provenance and pre-execution data start_time = time.time() # Start the execution timer # Execute the function if not self.get_profile_memory(): result = func( *args, **kwargs) # Execute the function without memory profiling else: self.__memory_profiler = memory_profiler.LineProfiler() result = self.__memory_profiler(func)( *args, **kwargs) # Execute the function with memory profiling # Post-execute recording execution_time = time.time( ) - start_time # Compute the execution time self.record_postexecute( execution_time=execution_time) # Record post-execution data self.clean_up() # Clean up empty data if self.gather_data: self.gather() # Gather the data from all MPI ranks # Return the result return result # Return our wrapped function return wrapper def clear(self): """ Clear the dictionary and other internal parameters Side Effects * Remove all key/value pairs from the dict * Set self.__time_and_use_profiler to None * Set self.__memory_profiler to None * Set self.__profile_memory to False if invalid (i.e, if set to True but memory profiling is unavailable) * Set self.__profile_time_and_usage to False if invalid (i.e., if set to True but profiling is unavailable) """ # Make sure profiling settings are valid if self.get_profile_memory() and not PROFILE_MEMORY_AVAILABLE: self.enable_profile_time_and_usage(False) if self.get_profile_time_and_usage() and not PROFILE_AVAILABLE: self.enable_profile_memory(False) # Remove old profilers self.__time_and_use_profiler = None self.__memory_profiler = None # Clear all data from the dictionary return super(run_info_dict, self).clear() def enable_profile_memory(self, enable=True): """ Enable/disable profiling of memory usage :param enable: boolean to enable (True) or disable (False) memory profiling """ if PROFILE_MEMORY_AVAILABLE: if not enable and self.__profile_memory: log_helper.debug(__name__, "Disabled memory profiling. ", root=self.mpi_root, comm=self.mpi_comm) if enable and not self.__profile_memory: log_helper.debug(__name__, "Enabled memory profiling. ", root=self.mpi_root, comm=self.mpi_comm) self.__profile_memory = enable else: self.__profile_memory = False if enable: log_helper.warning( __name__, 'Profiling of memory usage not available.' + ' Missing memory_profiler or StringIO package') def enable_profile_time_and_usage(self, enable=True): """ Enable/disable time and usage profiling :param enable: boolean to enable (True) or disable (False) time and usage profiling """ if PROFILE_AVAILABLE: if not enable and self.__profile_time_and_usage: log_helper.debug(__name__, "Disabled time and usage profiling. ", root=self.mpi_root, comm=self.mpi_comm) if enable and not self.__profile_time_and_usage: log_helper.debug(__name__, "Enabled time and usage profiling. ", root=self.mpi_root, comm=self.mpi_comm) self.__profile_time_and_usage = enable else: self.__profile_time_and_usage = False if enable: log_helper.warning( __name__, 'Profiling of time and usage not available.' + ' Missing profile and/or pstats package') def get_profile_time_and_usage(self): """ Check whether time and usage profiling is enabled :return: Boolean indicating whether time and usage profiling is enabled """ return self.__profile_time_and_usage def get_profile_memory(self): """ Check whether profiling of memory usage is enabled :return: Boolean indicating whether memory profiling is enabled """ return self.__profile_memory def record_preexecute(self): """ Record basic runtime information in this dict before the exeuction is started. Function used to record runtime information prior to executing the process we want to track, e.g., the `execute_analysis(...)` of a standard analysis. The function may be overwritten in child classes to add recording of additional runtime information. All runtime data should be recorded in the main dict (i.e, self). This ensures in the case of standard analysis that the data is stored in the HDF5 file. Other data should be stored in separate variables that we may add to the object. When overwriting the function we should typically call super(...,self).runinfo_record_pretexecute() last in the custom version to ensure that the start_time is properly recorded right before the execution of the analysis. """ log_helper.debug(__name__, 'Recording pre-execution runtime data', root=self.mpi_root, comm=self.mpi_comm) # Record basic runtime environment information using the platform module try: self['architecture'] = unicode(platform.architecture()) self['java_ver'] = unicode(platform.java_ver()) self['libc_ver'] = unicode(platform.libc_ver()) self['linux_distribution'] = unicode(platform.linux_distribution()) self['mac_ver'] = unicode(platform.mac_ver()) self['machine'] = unicode(platform.machine()) self['node'] = unicode(platform.node()) self['platform'] = unicode(platform.platform()) self['processor'] = unicode(platform.processor()) self['python_branch'] = unicode(platform.python_branch()) self['python_build'] = unicode(platform.python_build()) self['python_compiler'] = unicode(platform.python_compiler()) self['python_implementation'] = unicode( platform.python_implementation()) self['python_revision'] = unicode(platform.python_revision()) self['python_version'] = unicode(platform.python_version()) self['release'] = unicode(platform.release()) self['system'] = unicode(platform.system()) self['uname'] = unicode(platform.uname()) self['version'] = unicode(platform.version()) self['win32_ver'] = unicode(platform.win32_ver()) except: warnings.warn( "WARNING: Recording of platform provenance failed: " + str(sys.exc_info())) # Attempt to record the svn version information try: import subprocess self['svn_ver'] = subprocess.check_output('svnversion').rstrip( '\n') except ImportError: log_helper.warning( __name__, 'Recording of svn version not possible. subprocess not installed', root=self.mpi_root, comm=self.mpi_comm) except: warnings.warn("Recording of svn version information failed: " + str(sys.exc_info())) # Attempt to record software library version try: import numpy as np self['numpy_version_full_version'] = unicode( np.version.full_version) self['numpy_version_release'] = unicode(np.version.release) self['numpy_version_git_revision'] = unicode( np.version.git_revision) except ImportError: log_helper.warning(__name__, 'Recording of numpy version not possible.', root=self.mpi_root, comm=self.mpi_comm) # Attempt to record psutil data try: import psutil self['logical_cpu_count'] = unicode(psutil.cpu_count()) self['cpu_count'] = unicode(psutil.cpu_count(logical=False)) process = psutil.Process() self['open_files'] = unicode(process.open_files()) self['memory_info_before'] = unicode(process.memory_info()) except ImportError: log_helper.warning( __name__, 'psutil not installed. Recording of part of runtime information not possible', root=self.mpi_root, comm=self.mpi_comm) except: warnings.warn( "Recording of psutil-based runtime information failed: " + str(sys.exc_info())) # Record the start time for the analysis self['start_time'] = unicode(datetime.datetime.now()) # Enable time and usage profiling if requested if self.__profile_time_and_usage: self.__time_and_use_profiler = Profile() self.__time_and_use_profiler.enable() def record_postexecute(self, execution_time=None): """ Function used to record runtime information after the task we want to track is comleted, e.g. the `execute_analysis(...)` function of a standard analysis. The function may be overwritten in child classes to add recording of additional runtime information. When overwriting the function we should call super(...,self).runinfo_record_postexecute(execution_time) in the custom version to ensure that the execution and end_time are properly recorded. :param execution_time: The total time it took to execute the analysis. May be None, in which case the function will attempt to compute the execution time based on the start_time (if available) and the the current time. :param comm: Used for logging only. The MPI communicator to be used. Default value is None, in which case MPI.COMM_WORLD is used. """ log_helper.debug(__name__, 'Recording post-execution runtime data', root=self.mpi_root, comm=self.mpi_comm) # Finalize recording of post execution provenance self['end_time'] = unicode(datetime.datetime.now()) if execution_time is not None: self['execution_time'] = unicode(execution_time) elif 'start_time' in self: start_time = run_info_dict.string_to_time(self['start_time']) stop_time = run_info_dict.string_to_time(self['end_time']) self['execution_time'] = unicode( stop_time - start_time ) # TODO: This only gives execution time in full seconds right now else: self['execution_time'] = None # Attempt to record psutil data try: import psutil process = psutil.Process() self['memory_info_after'] = unicode(process.memory_info()) except ImportError: log_helper.warning( __name__, 'psutil not installed. Recording of part of runtime information not possible', root=self.mpi_root, comm=self.mpi_comm) except: warnings.warn( "Recording of psutil-based runtime information failed: " + str(sys.exc_info())) # Record the time and use profiling data if possible if self.__time_and_use_profiler is not None: self.__time_and_use_profiler.disable() self.__time_and_use_profiler.create_stats() self['profile'] = unicode(self.__time_and_use_profiler.stats) # Save the summary statistics for the profiling data stats_io = StringIO.StringIO() profiler_stats = pstats.Stats( self.__time_and_use_profiler, stream=stats_io).sort_stats('cumulative') profiler_stats.print_stats() self['profile_stats'] = stats_io.getvalue() # Record the memory profiling data if possible if self.__memory_profiler is not None and self.get_profile_memory(): log_helper.debug(__name__, 'Recording memory profiling data', root=self.mpi_root, comm=self.mpi_comm) mem_stats_io = StringIO.StringIO() memory_profiler.show_results(self.__memory_profiler, stream=mem_stats_io) self['profile_mem'] = unicode(self.__memory_profiler.code_map) self['profile_mem_stats'] = mem_stats_io.getvalue() def clean_up(self): """ Clean up the runinfo object. In particular remove empty keys that either recorded None or recorded just an empty string. This function may be overwritten to also do clean-up needed due to additional custom runtime instrumentation. When overwriting this function we should call super(..., self).runinfo_clean_up() at the end of the function to ensure that the runinfo dictionary is clean, i.e., does not contain any empty entries. """ log_helper.debug(__name__, 'Clean up runtime data', root=self.mpi_root, comm=self.mpi_comm) # Remove empty items from the run_info dict for ri_key, ri_value in self.items(): try: if ri_value is None or len(ri_value) == 0: self.pop(ri_key) except: pass def gather(self): """ Simple helper function to gather the runtime information---that has been collected on multiple processes when running using MPI---on a single root process :return: If we have more than one processes then this function returns a dictionary with the same keys as usual for the run_info but the values are now lists with one entry per mpi processes. If we only have a single process, then the run_info object will be returned without changes. NOTE: Similar to mpi gather, the function only collects information on the root. All other processes will return just their own private runtime information. """ if mpi_helper.MPI_AVAILABLE: if self.mpi_comm.Get_size() > 1: log_helper.debug(__name__, 'Gather runtime data from parallel tasks', root=self.mpi_root, comm=self.mpi_comm) self['mpi_rank'] = self.mpi_comm.Get_rank() run_data = self.mpi_comm.gather(self, self.mpi_root) if self.mpi_comm.Get_rank() == self.mpi_root: merged_run_data = {} for run_dict in run_data: for key in run_dict: try: merged_run_data[key].append(run_dict[key]) except KeyError: merged_run_data[key] = [run_dict[key]] return merged_run_data return self def get_profile_stats_object(self, consolidate=True, stream=None): """ Based on the execution profile of the execute_analysis(..) function get ``pstats.Stats`` object to help with the interpretation of the data. :param consolidate: Boolean flag indicating whether multiple stats (e.g., from multiple cores) should be consolidated into a single stats object. Default is True. :param stream: The optional stream parameter to be used fo the pstats.Stats object. :return: A single pstats.Stats object if consolidate is True. Otherwise the function returns a list of pstats.Stats objects, one per recorded statistic. None is returned in case that the stats objects cannot be created or no profiling data is available. """ from ast import literal_eval if stream is None: import sys stream = sys.stdout if 'profile' in self: # Parse the profile data (which is stored as a string) or in the case of MPI we may # have a list of strings from each MPI processes if isinstance(self['profile'], list): # Convert the profile from each MPI process independently profile_data = [ literal_eval(profile) for profile in self['profile'] ] else: # If we only have a single stat, then convert our data to a list, so that we can # handle the single and multiple statistics case in the same way in the remainder of this function profile_data = [ literal_eval(self['profile']), ] # Create a list of profile objects that the pstats.Stats class understands profile_dummies = [] for profile_i in profile_data: # Here we are creating for each statistic a dummy class on the fly that holds our # profile_data in the stats attributes and has an empty create_stats function. # This trick allows us to create a pstats.Stats object without having to write our # stats data to file or having to create a cProfile.Profile object first. Writing # the data to file involves overhead and is ugly and creating a profiler and # overwriting its stats is potentially problematic profile_dummies.append( type('Profile', (object, ), { 'stats': profile_i, 'create_stats': lambda x: None })()) # Create the statistics object and return it if consolidate: profile_stats = pstats.Stats(*profile_dummies, stream=stream) return profile_stats else: profile_stats = [ pstats.Stats(profile_i, stream=stream) for profile_i in profile_dummies ] return profile_stats else: return None @staticmethod def string_to_structime(time_string, time_format=None): """ Covert a time string to a time.struct_time using time.strptime :param time_string: String with the time, e.g, with the start time of a program. :param time_format: The time format to be used or None in which case run_info_dict.DEFAULT_TIME_FORMAT will be used. """ return time.strptime( time_string, time_format if time_format is not None else run_info_dict.DEFAULT_TIME_FORMAT) @staticmethod def string_to_time(time_string, time_format=None): """ Convert a time string to local time object using time.mktime. :param time_string: String with the time, e.g, with the start time of a program. :param time_format: The time format to be used or None in which case run_info_dict.DEFAULT_TIME_FORMAT will be used. """ return time.mktime( time.strptime( time_string, time_format if time_format is not None else run_info_dict.DEFAULT_TIME_FORMAT))
'saveDB': 'all_dv3', 'draw': None, 'saveFile': setting.CONFIG.SAVE_PATH, 'saveSignal': 'stock_signal_dv3' }, filter) ######################################################### ###这个文件每天执行一次,只回测runEveryWeek的输出结果集合,刷新最新的回测结果 if __name__ == '__main__': #对全部股票标的中,产生过交易并且属于沪深300,并且分红年份达标的标的回测 # ncalls,是指相应代码 / 函数被调用的次数; # tottime,是指对应代码 / 函数总共执行所需要的时间(注意,并不包括它调用的其他代码 / 函数的执行时间); # percall,就是上述两者相除的结果,也就是 tottime / ncalls; # cumtime,则是指对应代码 / 函数总共执行所需要的时间,这里包括了它调用的其他代码 / 函数的执行时间; # cumtime percall,则是 cumtime 和 ncalls 相除的平均结果。 prof = Profile() prof.enable() RunHS300AndDVYears() prof.create_stats() p = pstats.Stats(prof) # p.print_stats() # p.sort_stats('calls').print_stats(20) p.sort_stats('cumulative').print_stats(20) # p.print_callees() # cProfile.run('RunHS300AndDVYears()') # RunHS300AndDVYears() #ch = msvcrt.getch()
class ResourceMeasure(object): """ リソースの使用状況を出力する """ CMD_BASH = "/bin/bash" CMD_MEASURE = os.path.join(HERE, "resource_measure") _instance = None _interval = 5 _profiling = False _outdir = os.path.join( HERE, "measure_result/{}".format(datetime.now().strftime("%Y%m%d.%H%M%S"))) @classmethod def get_instance(cls): """ シングルトンインスタンスを取得する """ if cls._instance is None: cls._instance = ResourceMeasure(cls._interval, cls._outdir, cls._profiling) return cls._instance @classmethod def config(cls, interval=None, profiling=None, outdir=None): """ 各種設定値を設定する """ if interval is not None: cls._interval = interval if outdir is not None: cls._outdir = outdir if profiling is not None: cls._profiling = profiling @classmethod @contextmanager def cls_measure(cls, title): """ 測定を実行する(コンテキストマネージャ) """ raise NotImplementedError() @classmethod def cls_measured(cls, title=None): raise NotImplementedError() def __init__(self, interval, outdir, profiling): self.interval = interval self.outdir = outdir self.profiling = profiling self.profile = Profile() if self.profiling else None # self.iostat_filepath = os.path.join(self.outdir, "iostat.txt") # self.vmstat_filepath = os.path.join(self.outdir, "vmstat.txt") # self.ndstat_filepath = os.path.join(self.outdir, "ndstat.txt") # self.free_filepath = os.path.join(self.outdir, "free.txt") # self.ps_filepath = os.path.join(self.outdir, "ps.txt") self.section_filepath = os.path.join(self.outdir, "section.tsv") self.profile_filepath = os.path.join(self.outdir, "profile.prof") self.sections = [] # 測定区間ごとに(タイトル, 開始時刻, 終了時刻)を入れる self.measure_start = time.time() # 測定開始時間 # 測定スクリプト起動 self.proc = Popen([ self.CMD_BASH, self.CMD_MEASURE, str(os.getpid()), str(self.interval), self.outdir ]) time.sleep(1) # プロファイリング開始 if self.profiling: self.profile.enable() @contextmanager def measure(self, title): """ 測定を実行する(コンテキストマネージャ) """ start = datetime.now() yield end = datetime.now() seconds = (end - start).total_seconds() self.sections.append((title, start, end, seconds)) def measured(self, title=None): """ 測定を実行する(デコレータ) """ def decorator(func): @functools.wraps(func) def wrapper(*args, **kwargs): start = datetime.now() ret = func(*args, **kwargs) end = datetime.now() seconds = (end - start).total_seconds() self.sections.append( (func.func_name if title is None else title, start, end, seconds)) return ret return wrapper return decorator def finish(self): """ 終了する """ self.output_section_file() # プロファイリング終了 if self.profiling: self.profile.create_stats() self.profile.dump_stats(self.profile_filepath) # 測定スクリプト停止 self.proc.terminate() self.proc.wait() def __del__(self): self.finish() def output_section_file(self): """ 区間ごとの処理時間集計ファイルを出力する """ tform = "%Y/%m/%d %H:%M:%S" with open(self.section_filepath, "w") as fout: for title, start, end, seconds in self.sections: print("\t".join([ title, start.strftime(tform), end.strftime(tform), str(seconds) ]), file=fout)
class frame(IFrame): framelist = set() def __init__(self, address="http://127.0.0.1:12000/", time_step=500, instrument=False, profiling=False, wire_format="cbor", compress=False): frame.framelist.add(self) self.thread = None self.__app = None self.__appname = "" self.__host_typemap = {} self.__host_wire_format = {} self.__typemap = {} self.__name2type = {} self.object_store = dataframe() self.object_store.start_recording = True if not address.endswith('/'): address += '/' self.__address = address self.__default_wire_format = wire_format self.__compress = compress self.__time_step = (float(time_step) / 1000) self.__new = {} self.__mod = {} self.__del = {} self.__observed_types = set() self.__observed_types_new = set() self.__observed_types_mod = set() self.__curtime = time.time() self.__curstep = 0 self.__start_time = time.strftime("%Y-%m-%d_%H-%M-%S") self.__instrumented = instrument self.__profiling = profiling self.__sessions = {} self.__host_to_push_groupkey = {} if instrument: self._instruments = {} self._instrument_headers = [] self._instrument_headers.append('bytes sent') self._instrument_headers.append('bytes received') def __register_app(self, app): self.logger = self.__setup_logger("spacetime@" + self.__appname) self.__host_typemap = {} for address, tpmap in self.__app.__declaration_map__.items(): if address == "default": address = self.__address fulladdress = address + self.__appname if fulladdress not in self.__host_typemap: self.__host_typemap[fulladdress] = tpmap else: for declaration in tpmap: self.__host_typemap[fulladdress].setdefault( declaration, set()).update(set(tpmap[declaration])) self.__default_wire_format = ( self.__app.__special_wire_format__["default"] if "default" in self.__app.__special_wire_format__ else self.__default_wire_format) for host in self.__host_typemap: self.__host_wire_format[host] = ( self.__app.__special_wire_format__[host] if host in self.__app.__special_wire_format__ else self.__default_wire_format) all_types = set() for host in self.__host_typemap: wire_format = self.__host_wire_format[host] jobj = dict([(k, [tp.__realname__ for tp in v]) for k, v in self.__host_typemap[host].items()]) producing, getting, gettingsetting, deleting, setting, tracking = ( self.__host_typemap[host].setdefault(Modes.Producing, set()), self.__host_typemap[host].setdefault(Modes.Getter, set()), self.__host_typemap[host].setdefault(Modes.GetterSetter, set()), self.__host_typemap[host].setdefault(Modes.Deleter, set()), self.__host_typemap[host].setdefault(Modes.Setter, set()), self.__host_typemap[host].setdefault(Modes.Tracker, set())) self.__typemap.setdefault(Modes.Producing, set()).update(producing) self.__typemap.setdefault(Modes.Getter, set()).update(getting) self.__typemap.setdefault(Modes.GetterSetter, set()).update(gettingsetting) self.__typemap.setdefault(Modes.Deleter, set()).update(deleting) self.__typemap.setdefault(Modes.Setter, set()).update(setting) self.__typemap.setdefault(Modes.Tracker, set()).update(tracking) all_types_host = tracking.union(producing).union(getting).union( gettingsetting).union(deleting).union(setting) all_types.update(all_types_host) self.__observed_types.update(all_types_host) self.__observed_types_new.update( self.__host_typemap[host][Modes.Tracker].union( self.__host_typemap[host][Modes.Getter]).union( self.__host_typemap[host][Modes.GetterSetter])) self.__observed_types_mod.update( self.__host_typemap[host][Modes.Getter].union( self.__host_typemap[host][Modes.GetterSetter])) jsonobj = json.dumps({ "sim_typemap": jobj, "wire_format": wire_format, "app_id": self.__app.app_id }) try: self.__sessions[host] = Session() if platform.system() == 'Java': ignoreJavaSSL() self.logger.info("Using custom HTTPAdapter for Jython") self.__sessions[host].mount(host, MyJavaHTTPAdapter()) self.__sessions[host].verify = False resp = requests.put( host, data=jsonobj, headers={'content-type': 'application/json'}) except HTTPError as exc: self.__handle_request_errors(resp, exc) return False except ConnectionError: self.logger.exception("Cannot connect to host.") self.__disconnected = True return False self.__name2type = dict([(tp.__realname__, tp) for tp in all_types]) self.object_store.add_types(all_types) for host in self.__host_typemap: self.__host_to_push_groupkey[host] = set([ self.object_store.get_group_key(tp) for tp in self.__host_typemap[host][Modes.GetterSetter].union( self.__host_typemap[host][Modes.Setter]).union( self.__host_typemap[host][Modes.Producing]).union( self.__host_typemap[host][Modes.Deleter]) ]) return True @staticmethod def loop(): SpacetimeConsole().cmdloop() def get_instrumented(self): """ Returns if frame is running instrumentation. (True/False) """ return self.__instrumented def get_curtime(self): """ Returns the timestamp of the current step. """ return self.__curtime def get_curstep(self): """ Returns the current step value of the simulation. """ return self.__curstep def get_timestep(self): """ Returns the time-step value in milliseconds. """ return self.__time_step def get_app(self): """ Returns a reference to the application. """ return self.__app def attach_app(self, app): """ Receives reference to application (implementing IApplication). Arguments: app : spacetime-conformant Application Exceptions: None """ self.__app = app self.__appname = app.__class__.__name__ + "_" + self.__app.app_id def run_async(self): """ Starts application in non-blocking mode. Arguments: None Exceptions: None """ self.thread = Parallel(target=self.__run) self.thread.daemon = True self.thread.start() def run_main(self): self.__run() def run(self): """ Starts application in blocking mode. Arguments: None Exceptions: None """ self.thread = Parallel(target=self.__run) self.thread.daemon = True self.thread.start() self.thread.join() def __clear(self): self.__disconnected = False self.__app.done = False self.object_store.clear_all() self.__new = {} self.__mod = {} self.__del = {} def __run(self): self.__clear() if not self.__app: raise NotImplementedError("App has not been attached") success = self.__register_app(self.__app) if success: try: if self.__profiling: try: from cProfile import Profile # @UnresolvedImport if not os.path.exists('stats'): os.mkdir('stats') self.__profile = Profile() self.__profile.enable() self.logger.info("starting profiler for %s", self.__appname) except: self.logger.error( "Could not import cProfile (not supported in Jython)." ) self.__profile = None self.__profiling = None self.__pull() self.__app.initialize() self.__push() while not self.__app.done: st_time = time.time() self.__pull() self.__app.update() self.__push() end_time = time.time() timespent = end_time - st_time self.__curstep += 1 self.__curtime = time.time() # time spent on execution loop if timespent < self.__time_step: time.sleep(float(self.__time_step - timespent)) else: self.logger.info("loop exceeded maximum time: %s ms", timespent) # Writes down total time spent in spacetime methods if self.__instrumented: si.record_instruments(timespent, self) # One last time, because _shutdown may delete objects from the store self.__pull() self._shutdown() self.__push() self.__unregister_app() except ConnectionError as cerr: self.logger.error("A connection error occurred: %s", cerr.message) except HTTPError as herr: self.logger.error( "A fatal error has occurred while communicating with the server: %s", herr.message) except: self.logger.exception("An unknown error occurred.") raise finally: if self.__profiling: self.__profile.disable() self.__profile.create_stats() self.__profile.dump_stats( os.path.join( 'stats', "%s_stats_%s.ps" % (self.__start_time, self.__appname))) else: self.logger.info("Could not register, exiting run loop...") def app_done(self): """ app_done Returns whether app has finished running or not """ return self.__app.done def get(self, tp, oid=None): """ Retrieves objects from local data storage. If id is provided, returns the object identified by id. Otherwise, returns the list of all objects matching type tp. Arguments: tp : PCC set type being fetched oid : primary key of an individual object. Exceptions: - ID does not exist in store - Application does not annotate that type """ if tp in self.__observed_types: if oid: # Have to get this to work return self.object_store.get(tp, oid) return self.object_store.get(tp) else: raise Exception("Application %s does not annotate type %s" % (self.__appname, tp)) def add(self, obj): """ Adds an object to be stored and tracked by spacetime. Arguments: obj : PCC object to stored Exceptions: - Application is not annotated as a producer """ if obj.__class__ in self.__typemap[Modes.Producing]: self.object_store.append(obj.__class__, obj) else: raise Exception("Application %s is not a producer of type %s" % (self.__appname, obj.__class__)) def delete(self, tp, obj): """ Deletes an object currently stored and tracked by spacetime. Arguments: tp: PCC type of object to be deleted obj : PCC object to be deleted Exceptions: - Application is not annotated as a Deleter """ if tp in self.__typemap[Modes.Deleter]: self.object_store.delete(tp, obj) else: raise Exception("Application %s is not registered to delete %s" % (self.__appname, tp)) def get_new(self, tp): """ Retrieves new objects of type 'tp' retrieved in last pull (i.e. since last tick). Arguments: tp: PCC type for retrieving list of new objects Exceptions: None Note: Application should be annotated as a Getter, GetterSetter, or Tracker, otherwise result is always an empty list. """ if tp in self.__observed_types_new: return self.object_store.get_new(tp) else: self.logger.warn( ("Checking for new objects of type %s, but not " "a Getter, GetterSetter, or Tracker of type. Empty list " "always returned"), tp) return [] def get_mod(self, tp): """ Retrieves objects of type 'tp' that were modified since last pull (i.e. since last tick). Arguments: tp: PCC type for retrieving list of modified objects Exceptions: None Note: Application should be annotated as a Getter,or GetterSetter, otherwise result is always an empty list. """ if tp in self.__observed_types_mod: return self.object_store.get_mod(tp) else: self.logger.warn(("Checking for modifications in objects of type " "%s, but not a Getter or GetterSetter of type. " "Empty list always returned"), tp) return [] def get_deleted(self, tp): """ Retrieves objects of type 'tp' that were deleted since last pull (i.e. since last tick). Arguments: tp: PCC type for retrieving list of deleted objects Exceptions: None Note: Application should be annotated as a Getter, GetterSetter, or Tracker, otherwise result is always an empty list. """ if tp in self.__observed_types_new: return self.object_store.get_deleted(tp) else: self.logger.warn( ("Checking for deleted objects of type %s, but " "not a Getter, GetterSetter, or Tracker of type. Empty list " "always returned"), tp) return [] def __handle_request_errors(self, resp, exc): if resp.status_code == 401: self.logger.error( "This application is not registered at the server. Stopping..." ) raise else: self.logger.warn("Non-success code received from server: %s %s", resp.status_code, resp.reason) @timethis def __process_pull_resp(self, resp): if resp and "gc" in resp: self.object_store.apply_changes(resp) self.object_store.clear_record() @timethis def __pull(self): if self.__disconnected: return if self.__instrumented: self._instruments['bytes received'] = 0 updates = DataframeChanges_Base() try: for host in self.__host_typemap: type_dict = {} # Need to give mechanism to selectively ask for some changes. Very hard to implement in current dataframe scheme. resp = self.__sessions[host].get(host + "/updated", data={}) try: resp.raise_for_status() if self.__instrumented: self._instruments['bytes received'] = len(resp.content) data = resp.content #print data DF_CLS, content_type = FORMATS[ self.__host_wire_format[host]] dataframe_change = DF_CLS() dataframe_change.ParseFromString(data) updates.CopyFrom(dataframe_change) except HTTPError as exc: self.__handle_request_errors(resp, exc) #json.dump(updates, open("pull_" + self.__appname + ".json", "a") , sort_keys = True, separators = (',', ': '), indent = 4) self.__process_pull_resp(updates) except ConnectionError: self.logger.exception("Disconnected from host.") self.__disconnected = True self._stop() @timethis def __push(self): if self.__disconnected: return if self.__instrumented: self._instruments['bytes sent'] = 0 changes = self.object_store.get_record() #json.dump(changes, open("push_" + self.__appname + ".json", "a") , sort_keys = True, separators = (',', ': '), indent = 4) for host in self.__host_typemap: try: DF_CLS, content_type = FORMATS[self.__host_wire_format[host]] changes_for_host = DF_CLS() changes_for_host["gc"] = RecursiveDictionary([ (gck, gc) for gck, gc in changes["gc"].items() if gck in self.__host_to_push_groupkey[host] ]) if "types" in changes: changes_for_host["types"] = changes["types"] dictmsg = changes_for_host.SerializeToString() #update_dict = {"update_dict": protomsg} if self.__instrumented: self._instruments['bytes sent'] = sys.getsizeof(dictmsg) headers = {'content-type': content_type} if self.__compress: headers['content-encoding'] = 'gzip' dictmsg = zlib.compress(dictmsg) resp = self.__sessions[host].post(host + "/updated", data=dictmsg, headers=headers) except TypeError: self.logger.exception("error encoding obj. Object: %s", changes_for_host) except HTTPError as exc: self.__handle_request_errors(resp, exc) except ConnectionError: self.logger.exception("Disconnected from host.") self.__disconnected = True self._stop() self.object_store.clear_record() self.object_store.clear_buffer() def _shutdown(self): """ _shutdown Called after the frame execution loop stops, in the last pull/push iteration """ self.__app.shutdown() def _stop(self): """ _stop Called by frame's command prompt on quit/exit """ self.__app.done = True def __unregister_app(self): for host in self.__host_typemap: resp = requests.delete(host) self.logger.info("Successfully deregistered from %s", host) def __setup_logger(self, name, file_path=None): logger = logging.getLogger(name) # Set default logging handler to avoid "No handler found" warnings. logger.addHandler(NullHandler()) logger.setLevel(logging.DEBUG) logger.debug("Starting logger for %s", name) return logger
class run_info_dict(dict): """ Simple dictionary class for collecting runtime information The typical use is as follows: >> my_run_info = run_info_dict() >> my_run_info(my_function)(my_parameters) With this, all runtime information is automatically collected in my_run_info. We can enable time-and-usage and memory profiling simply by calling enable_profile_time_and_usage(...) or enable_profile_memory(...), respectively, before we run our function. We can also use the data structure directly and control the population ourselves, however, memory profiling is not supported by default in this case but we need to set and run the memory profiler ourselves, since memory_profiler expects that it can wrap the function """ DEFAULT_TIME_FORMAT = '%Y-%m-%d %H:%M:%S.%f' def __init__(self, *args, **kwargs): super(run_info_dict, self).__init__(*args, **kwargs) self.__profile_time_and_usage = False self.__profile_memory = False self.__time_and_use_profiler = None self.__memory_profiler = None self.mpi_comm = mpi_helper.get_comm_world() self.mpi_root = 0 self.gather_data = True def __call__(self, func): """ :param func: The function to be wrapped for execution :return: A wrapped function for which we track the runtime information in self """ from functools import wraps @wraps(func) def wrapper(*args, **kwargs): # Pre-execute recording self.clear() # Clear all runtime information data and profilers self.record_preexecute() # Record system provenance and pre-execution data start_time = time.time() # Start the execution timer # Execute the function if not self.get_profile_memory(): result = func(*args, **kwargs) # Execute the function without memory profiling else: self.__memory_profiler = memory_profiler.LineProfiler() result = self.__memory_profiler(func)(*args, **kwargs) # Execute the function with memory profiling # Post-execute recording execution_time = time.time() - start_time # Compute the execution time self.record_postexecute(execution_time=execution_time) # Record post-execution data self.clean_up() # Clean up empty data if self.gather_data: self.gather() # Gather the data from all MPI ranks # Return the result return result # Return our wrapped function return wrapper def clear(self): """ Clear the dictionary and other internal parameters Side Effects * Remove all key/value pairs from the dict * Set self.__time_and_use_profiler to None * Set self.__memory_profiler to None * Set self.__profile_memory to False if invalid (i.e, if set to True but memory profiling is unavailable) * Set self.__profile_time_and_usage to False if invalid (i.e., if set to True but profiling is unavailable) """ # Make sure profiling settings are valid if self.get_profile_memory() and not PROFILE_MEMORY_AVAILABLE: self.enable_profile_time_and_usage(False) if self.get_profile_time_and_usage() and not PROFILE_AVAILABLE: self.enable_profile_memory(False) # Remove old profilers self.__time_and_use_profiler = None self.__memory_profiler = None # Clear all data from the dictionary return super(run_info_dict, self).clear() def enable_profile_memory(self, enable=True): """ Enable/disable profiling of memory usage :param enable: boolean to enable (True) or disable (False) memory profiling """ if PROFILE_MEMORY_AVAILABLE: if not enable and self.__profile_memory: log_helper.debug(__name__, "Disabled memory profiling. ", root=self.mpi_root, comm=self.mpi_comm) if enable and not self.__profile_memory: log_helper.debug(__name__, "Enabled memory profiling. ", root=self.mpi_root, comm=self.mpi_comm) self.__profile_memory = enable else: self.__profile_memory = False if enable: log_helper.warning(__name__, 'Profiling of memory usage not available.' + ' Missing memory_profiler or StringIO package') def enable_profile_time_and_usage(self, enable=True): """ Enable/disable time and usage profiling :param enable: boolean to enable (True) or disable (False) time and usage profiling """ if PROFILE_AVAILABLE: if not enable and self.__profile_time_and_usage: log_helper.debug(__name__, "Disabled time and usage profiling. ", root=self.mpi_root, comm=self.mpi_comm) if enable and not self.__profile_time_and_usage: log_helper.debug(__name__, "Enabled time and usage profiling. ", root=self.mpi_root, comm=self.mpi_comm) self.__profile_time_and_usage = enable else: self.__profile_time_and_usage = False if enable: log_helper.warning(__name__, 'Profiling of time and usage not available.' + ' Missing profile and/or pstats package') def get_profile_time_and_usage(self): """ Check whether time and usage profiling is enabled :return: Boolean indicating whether time and usage profiling is enabled """ return self.__profile_time_and_usage def get_profile_memory(self): """ Check whether profiling of memory usage is enabled :return: Boolean indicating whether memory profiling is enabled """ return self.__profile_memory def record_preexecute(self): """ Record basic runtime information in this dict before the exeuction is started. Function used to record runtime information prior to executing the process we want to track, e.g., the `execute_analysis(...)` of a standard analysis. The function may be overwritten in child classes to add recording of additional runtime information. All runtime data should be recorded in the main dict (i.e, self). This ensures in the case of standard analysis that the data is stored in the HDF5 file. Other data should be stored in separate variables that we may add to the object. When overwriting the function we should typically call super(...,self).runinfo_record_pretexecute() last in the custom version to ensure that the start_time is properly recorded right before the execution of the analysis. """ log_helper.debug(__name__, 'Recording pre-execution runtime data', root=self.mpi_root, comm=self.mpi_comm) # Record basic runtime environment information using the platform module try: self['architecture'] = unicode(platform.architecture()) self['java_ver'] = unicode(platform.java_ver()) self['libc_ver'] = unicode(platform.libc_ver()) self['linux_distribution'] = unicode(platform.linux_distribution()) self['mac_ver'] = unicode(platform.mac_ver()) self['machine'] = unicode(platform.machine()) self['node'] = unicode(platform.node()) self['platform'] = unicode(platform.platform()) self['processor'] = unicode(platform.processor()) self['python_branch'] = unicode(platform.python_branch()) self['python_build'] = unicode(platform.python_build()) self['python_compiler'] = unicode(platform.python_compiler()) self['python_implementation'] = unicode(platform.python_implementation()) self['python_revision'] = unicode(platform.python_revision()) self['python_version'] = unicode(platform.python_version()) self['release'] = unicode(platform.release()) self['system'] = unicode(platform.system()) self['uname'] = unicode(platform.uname()) self['version'] = unicode(platform.version()) self['win32_ver'] = unicode(platform.win32_ver()) except: warnings.warn("WARNING: Recording of platform provenance failed: " + str(sys.exc_info())) # Attempt to record the svn version information try: import subprocess self['svn_ver'] = subprocess.check_output('svnversion').rstrip('\n') except ImportError: log_helper.warning(__name__, 'Recording of svn version not possible. subprocess not installed', root=self.mpi_root, comm=self.mpi_comm) except: warnings.warn("Recording of svn version information failed: "+str(sys.exc_info())) # Attempt to record software library version try: import numpy as np self['numpy_version_full_version'] = unicode(np.version.full_version) self['numpy_version_release'] = unicode(np.version.release) self['numpy_version_git_revision'] = unicode(np.version.git_revision) except ImportError: log_helper.warning(__name__, 'Recording of numpy version not possible.', root=self.mpi_root, comm=self.mpi_comm) # Attempt to record psutil data try: import psutil self['logical_cpu_count'] = unicode(psutil.cpu_count()) self['cpu_count'] = unicode(psutil.cpu_count(logical=False)) process = psutil.Process() self['open_files'] = unicode(process.open_files()) self['memory_info_before'] = unicode(process.memory_info()) except ImportError: log_helper.warning(__name__, 'psutil not installed. Recording of part of runtime information not possible', root=self.mpi_root, comm=self.mpi_comm) except: warnings.warn("Recording of psutil-based runtime information failed: "+str(sys.exc_info())) # Record the start time for the analysis self['start_time'] = unicode(datetime.datetime.now()) # Enable time and usage profiling if requested if self.__profile_time_and_usage: self.__time_and_use_profiler = Profile() self.__time_and_use_profiler.enable() def record_postexecute(self, execution_time=None): """ Function used to record runtime information after the task we want to track is comleted, e.g. the `execute_analysis(...)` function of a standard analysis. The function may be overwritten in child classes to add recording of additional runtime information. When overwriting the function we should call super(...,self).runinfo_record_postexecute(execution_time) in the custom version to ensure that the execution and end_time are properly recorded. :param execution_time: The total time it took to execute the analysis. May be None, in which case the function will attempt to compute the execution time based on the start_time (if available) and the the current time. :param comm: Used for logging only. The MPI communicator to be used. Default value is None, in which case MPI.COMM_WORLD is used. """ log_helper.debug(__name__, 'Recording post-execution runtime data', root=self.mpi_root, comm=self.mpi_comm) # Finalize recording of post execution provenance self['end_time'] = unicode(datetime.datetime.now()) if execution_time is not None: self['execution_time'] = unicode(execution_time) elif 'start_time' in self: start_time = run_info_dict.string_to_time(self['start_time']) stop_time = run_info_dict.string_to_time(self['end_time']) self['execution_time'] = unicode(stop_time - start_time) # TODO: This only gives execution time in full seconds right now else: self['execution_time'] = None # Attempt to record psutil data try: import psutil process = psutil.Process() self['memory_info_after'] = unicode(process.memory_info()) except ImportError: log_helper.warning(__name__, 'psutil not installed. Recording of part of runtime information not possible', root=self.mpi_root, comm=self.mpi_comm) except: warnings.warn("Recording of psutil-based runtime information failed: "+str(sys.exc_info())) # Record the time and use profiling data if possible if self.__time_and_use_profiler is not None: self.__time_and_use_profiler.disable() self.__time_and_use_profiler.create_stats() self['profile'] = unicode(self.__time_and_use_profiler.stats) # Save the summary statistics for the profiling data stats_io = StringIO.StringIO() profiler_stats = pstats.Stats(self.__time_and_use_profiler, stream=stats_io).sort_stats('cumulative') profiler_stats.print_stats() self['profile_stats'] = stats_io.getvalue() # Record the memory profiling data if possible if self.__memory_profiler is not None and self.get_profile_memory(): log_helper.debug(__name__, 'Recording memory profiling data', root=self.mpi_root, comm=self.mpi_comm) mem_stats_io = StringIO.StringIO() memory_profiler.show_results(self.__memory_profiler, stream=mem_stats_io) self['profile_mem'] = unicode(self.__memory_profiler.code_map) self['profile_mem_stats'] = mem_stats_io.getvalue() def clean_up(self): """ Clean up the runinfo object. In particular remove empty keys that either recorded None or recorded just an empty string. This function may be overwritten to also do clean-up needed due to additional custom runtime instrumentation. When overwriting this function we should call super(..., self).runinfo_clean_up() at the end of the function to ensure that the runinfo dictionary is clean, i.e., does not contain any empty entries. """ log_helper.debug(__name__, 'Clean up runtime data', root=self.mpi_root, comm=self.mpi_comm) # Remove empty items from the run_info dict for ri_key, ri_value in self.items(): try: if ri_value is None or len(ri_value) == 0: self.pop(ri_key) except: pass def gather(self): """ Simple helper function to gather the runtime information---that has been collected on multiple processes when running using MPI---on a single root process :return: If we have more than one processes then this function returns a dictionary with the same keys as usual for the run_info but the values are now lists with one entry per mpi processes. If we only have a single process, then the run_info object will be returned without changes. NOTE: Similar to mpi gather, the function only collects information on the root. All other processes will return just their own private runtime information. """ if mpi_helper.MPI_AVAILABLE: if self.mpi_comm.Get_size() > 1: log_helper.debug(__name__, 'Gather runtime data from parallel tasks', root=self.mpi_root, comm=self.mpi_comm) self['mpi_rank'] = self.mpi_comm.Get_rank() run_data = self.mpi_comm.gather(self, self.mpi_root) if self.mpi_comm.Get_rank() == self.mpi_root: merged_run_data = {} for run_dict in run_data: for key in run_dict: try: merged_run_data[key].append(run_dict[key]) except KeyError: merged_run_data[key] = [run_dict[key]] return merged_run_data return self def get_profile_stats_object(self, consolidate=True, stream=None): """ Based on the execution profile of the execute_analysis(..) function get ``pstats.Stats`` object to help with the interpretation of the data. :param consolidate: Boolean flag indicating whether multiple stats (e.g., from multiple cores) should be consolidated into a single stats object. Default is True. :param stream: The optional stream parameter to be used fo the pstats.Stats object. :return: A single pstats.Stats object if consolidate is True. Otherwise the function returns a list of pstats.Stats objects, one per recorded statistic. None is returned in case that the stats objects cannot be created or no profiling data is available. """ from ast import literal_eval if stream is None: import sys stream = sys.stdout if 'profile' in self: # Parse the profile data (which is stored as a string) or in the case of MPI we may # have a list of strings from each MPI processes if isinstance(self['profile'], list): # Convert the profile from each MPI process independently profile_data = [literal_eval(profile) for profile in self['profile']] else: # If we only have a single stat, then convert our data to a list, so that we can # handle the single and multiple statistics case in the same way in the remainder of this function profile_data = [literal_eval(self['profile']), ] # Create a list of profile objects that the pstats.Stats class understands profile_dummies = [] for profile_i in profile_data: # Here we are creating for each statistic a dummy class on the fly that holds our # profile_data in the stats attributes and has an empty create_stats function. # This trick allows us to create a pstats.Stats object without having to write our # stats data to file or having to create a cProfile.Profile object first. Writing # the data to file involves overhead and is ugly and creating a profiler and # overwriting its stats is potentially problematic profile_dummies.append(type('Profile', (object,), {'stats': profile_i, 'create_stats': lambda x: None})()) # Create the statistics object and return it if consolidate: profile_stats = pstats.Stats(*profile_dummies, stream=stream) return profile_stats else: profile_stats = [pstats.Stats(profile_i, stream=stream) for profile_i in profile_dummies] return profile_stats else: return None @staticmethod def string_to_structime(time_string, time_format=None): """ Covert a time string to a time.struct_time using time.strptime :param time_string: String with the time, e.g, with the start time of a program. :param time_format: The time format to be used or None in which case run_info_dict.DEFAULT_TIME_FORMAT will be used. """ return time.strptime(time_string, time_format if time_format is not None else run_info_dict.DEFAULT_TIME_FORMAT) @staticmethod def string_to_time(time_string, time_format=None): """ Convert a time string to local time object using time.mktime. :param time_string: String with the time, e.g, with the start time of a program. :param time_format: The time format to be used or None in which case run_info_dict.DEFAULT_TIME_FORMAT will be used. """ return time.mktime(time.strptime(time_string, time_format if time_format is not None else run_info_dict.DEFAULT_TIME_FORMAT))