def tearDown(self): """ """ p = Stats(self.prof) p.sort_stats("cumtime") if self.verbose: p.dump_stats("profiles/test_graph.py.prof") p.strip_dirs()
def handle(self, *args, **options): profile_file = options.pop('profile', None) if profile_file: profiler = Profile() profiler.runcall(self._handle, *args, **options) stats = Stats(profiler) stats.dump_stats(profile_file) else: self._handle(*args, **options)
def build_document(self, file_name): """This is the entry point for the NetcfBuilders from the ingestManager. These documents are id'd by fcstValidEpoch. The data section is an array each element of which contains variable data and a station name. To process this file we need to itterate the document by recNum and process the station name along with all the other variables in the variableList. Args: file_name (str): the name of the file being processed Returns: [dict]: document """ # noinspection PyBroadException try: # stash the file_name so that it can be used later self.file_name = os.path.basename(file_name) # pylint: disable=no-member self.ncdf_data_set = nc.Dataset(file_name) if len(self.station_names) == 0: result = self.cluster.query("""SELECT raw name FROM mdata WHERE type = 'MD' AND docType = 'station' AND subset = 'METAR' AND version = 'V01'; """) self.station_names = list(result) self.initialize_document_map() logging.info("%s building documents for file %s", self.__class__.__name__, file_name) if self.do_profiling: with cProfile.Profile() as _pr: self.handle_document() with open("profiling_stats.txt", "w") as stream: stats = Stats(_pr, stream=stream) stats.strip_dirs() stats.sort_stats("time") stats.dump_stats("profiling_stats.prof") stats.print_stats() else: self.handle_document() # pylint: disable=assignment-from-no-return document_map = self.get_document_map() data_file_id = self.create_data_file_id(file_name=file_name) data_file_doc = self.build_datafile_doc( file_name=file_name, data_file_id=data_file_id, ) document_map[data_file_doc["id"]] = data_file_doc return document_map except Exception as _e: # pylint:disable=broad-except logging.error( "%s: Exception with builder build_document: error: %s", self.__class__.__name__, str(_e), ) return {}
def concat(pattern, outfile, mpi=None): if mpi: from mpi4py import MPI pattern = pattern % MPI.COMM_WORLD.rank outfile = outfile % MPI.COMM_WORLD.rank files = glob(pattern) if files: s = Stats(files[0]) for f in files[1:]: s.add(f) s.dump_stats(outfile) for f in files: os.remove(f)
def profiler(enable, outfile): try: if enable: profiler = Profile() profiler.enable() yield finally: if enable: profiler.disable() stats = Stats(profiler) stats.sort_stats('tottime') stats.dump_stats(outfile)
def save_stats(self): with io.StringIO() as stream: stats = Stats(self.profiler, stream=stream).strip_dirs().sort_stats(self.order) stats.print_stats(self.top_results) self.logger.info(stream.getvalue()) try: if self.path is not None: stats.dump_stats(self.path) finally: self.profiler.enable()
def profile(fun, filename: str): ''' Runs function from first argument and profiles it with cProfile module. Resulting statistics are then saved to file (specified by filename second arg). ''' with cProfile.Profile() as pr: fun() with open(filename, 'w') as stream: stats = Stats(pr, stream=stream) stats.strip_dirs() stats.sort_stats('time') stats.dump_stats('.prof_stats') stats.print_stats()
def profile_request(path, cookie, f): a = app.configured_app() pr = cProfile.Profile() headers = {'Cookie': cookie} with a.test_request_context(path, headers=headers): pr.enable() f(7) pr.disable() pr.create_stats() s = Stats(pr).sort_stats('cumulative') s.dump_stats('profile.pstat') s.print_stats('.*Anfield.*')
def process_view(self, request, view_func, view_args, view_kwargs): from cProfile import Profile from pstats import Stats full_name = "{v.__module__}.{v.func_name}".format(v=view_func) if self.regex.match(full_name): profile = Profile() response = profile.runcall(view_func, request, *view_args, **view_kwargs) stats = Stats(profile) if os.path.exists(self.filename): stats.add(self.filename) stats.strip_dirs() stats.dump_stats(self.filename) return response
def profile_request(path, cookie, f): a = app.configured_app() pr = cProfile.Profile() headers = {'Cookie': cookie} with a.test_request_context(path, headers=headers): pr.enable() # r = f() # assert type(r) == str, r f(1) pr.disable() # pr.dump_stats('gua_profile.out') # pr.create_stats() # s = Stats(pr) pr.create_stats() s = Stats(pr).sort_stats('cumulative') s.dump_stats('user_detail.pstat') s.print_stats('.*pyForum.*')
def test_make_stub_files_issues(tmp_path, pytestconfig): # Deal with some files having issues source = pytestconfig.rootpath / "tests/data/stubs-issues" dest = tmp_path / "stubs" shutil.copytree(source, dest) PROBLEMATIC = 1 # number of files with issues if do_profiling: with cProfile.Profile() as pr: result = utils.generate_pyi_files(dest) with open("profiling_stats.txt", "w") as stream: stats = Stats(pr, stream=stream) stats.strip_dirs() stats.sort_stats("time") stats.dump_stats(".prof_stats") stats.print_stats() else: result = utils.generate_pyi_files(dest) py_count = len(list(Path(dest).glob("**/*.py"))) pyi_count = len(list(Path(dest).glob("**/*.pyi"))) assert py_count == pyi_count + PROBLEMATIC, "1:1 py:pyi" # for py missing pyi: py_files = list(dest.rglob("*.py")) pyi_files = list(dest.rglob("*.pyi")) for pyi in pyi_files: # remove all py files that have been stubbed successfully try: py_files.remove(pyi.with_suffix(".py")) except ValueError: pass assert len(py_files) == PROBLEMATIC, "py and pyi files should match 1:1 and stored in the same folder"
class Stat: """manage interface between pstat data and GUI item""" def __init__(self, pstat=None, path=None): # pStat profile statistics instance self.pstat = None self.load(pstat, path) self.itemArray = {} self.itemArray[TAB_FUNCTIONSTAT] = {} self.itemArray[TAB_SOURCE] = {} # Reference from pstat key to Qt object in the GUI self.pStatArray = {} self.pStatArray[TAB_FUNCTIONSTAT] = {} self.pStatArray[TAB_SOURCE] = {} def getTotalTime(self): if self.pstat: return self.pstat.total_tt else: return 0 def getCallNumber(self): if self.pstat: return self.pstat.total_calls else: return 0 def getPrimitiveCallRatio(self): if self.pstat: return 100.0 * float(self.pstat.prim_calls) / float( self.pstat.total_calls) else: return 0 def getStatNumber(self): if self.pstat: return len(self.pstat.stats) else: return 0 def getStatItems(self): if self.pstat: return list(self.pstat.stats.items()) def getStatKeys(self): if self.pstat: return list(self.pstat.stats.keys()) def getCalleesItems(self): if self.pstat: return list(self.pstat.all_callees.items()) def getStatTotalTime(self, pstatTriplet): if self.pstat: try: return self.pstat.stats[pstatTriplet][2] except KeyError: return 0 else: return 0 def getStatCumulativeTime(self, pstatTriplet): if self.pstat: try: return self.pstat.stats[pstatTriplet][3] except KeyError: return 0 else: return 0 def load(self, pstat=None, path=None): if pstat and path: print('''' Warning : both pstat and path parameter given. path override pstat !''') if pstat: self.pstat = pstat if path: self.pstat = Stats(str(path)) if self.pstat: self.pstat.calc_callees() def save(self, path): try: self.pstat.dump_stats(path) except: pass def setStatLink(self, guiItem, pstatTriplet, target): self.itemArray[target][guiItem] = pstatTriplet self.pStatArray[target][pstatTriplet] = guiItem def getPstatFromGui(self, guiItem, target): try: return self.itemArray[target][guiItem] except KeyError: return None def getGuiFromPstat(self, pStatTtriplet, target): try: return self.pStatArray[target][pStatTtriplet] except KeyError: return None def finish(self): ' clear when finish ' self.process.kill()
class Profiler(object): def __init__(self, name=None): self._lock = RLock() self._name = name or settings.PROFILE_DEFAULT_NAME self._location = settings.PROFILE_FILENAME_TEMPLATE % self._name self._stats = None self._local = ProfilerLocal() self._notch = time() - 1 self._updates = False def _set_status(self, value): settings.PROFILING = value status = property(lambda self: settings.PROFILING, _set_status) def __enter__(self): if not settings.PROFILING: return self if not self._local.counter: self._local.profile = Profile() self._local.profile.enable() self._local.counter += 1 return self def __exit__(self, extype, exvalue, extraceback): if not settings.PROFILING: return self._local.counter -= 1 if not self._local.counter: self._local.profile.disable() self.aggregate(self._local.profile) self._local.profile = None def aggregate(self, profile_or_stats): with self._lock: if self._stats is None: self._stats = Stats(profile_or_stats) else: self._stats.add(profile_or_stats) self._updates = True def clear(self): with self._lock: if self._stats is not None: self._stats = None @contextmanager def hold(self, location=None): if not settings.PROFILING: yield None else: if location is None: location = self._location with self._lock: self.save(location=location, force=True) yield location def load(self, location=None): if not settings.PROFILING: return None if location is None: location = self._location with self._lock: self.save(location=location, force=True) try: with open(location, "rb") as file: return file.read() except Exception as error: if isinstance(error, IOError) and error.errno == errno.ENOENT: return None else: raise def save(self, location=None, force=False): if not (settings.PROFILING and self._updates): return now = time() if not force and now < self._notch: return if location is None: location = self._location with self._lock: if self._stats: self._stats.dump_stats(location) if not force: server_log.write("Save profiling statistics to \"%s\"" % os.path.basename(location)) self._notch = now + settings.PROFILING_SAVE_PERIODICITY self._updates = False def autosave(self): self.save() for profiler in self._profilers.itervalues(): profiler.save()
def build_document(self, file_name): # pylint:disable=too-many-statements, disable=too-many-locals """ This is the entry point for the gribBuilders from the ingestManager. The ingest manager is giving us a grib file to process from the queue. These documents are id'd by time and fcstLen. The data section is an array each element of which contains variable data and a station name. To process this file we need to itterate the domain_stations list and process the station name along with all the required variables. 1) get the first epoch - if none was specified get the latest one from the db 2) transform the projection from the grib file 3) determine the stations for this domain, adding gridpoints to each station - build a station list 4) enable profiling if requested 5) handle_document - iterate the template and process all the keys and values 6) build a datafile document to record that this file has been processed """ try: # translate the projection from the grib file logging.getLogger().setLevel(logging.INFO) self.projection = gg.getGrid(file_name) self.grbs = pygrib.open(file_name) # pylint:disable=no-member self.grbm = self.grbs.message(1) self.spacing, max_x, max_y = gg.getAttributes(file_name) # Set the two projections to be used during the transformation (nearest neighbor method, what we use for everything with METARS) self.in_proj = pyproj.Proj(proj="latlon") self.out_proj = self.projection self.transformer = pyproj.Transformer.from_proj( proj_from=self.in_proj, proj_to=self.out_proj) self.transformer_reverse = pyproj.Transformer.from_proj( proj_from=self.out_proj, proj_to=self.in_proj) # reset the builders document_map for a new file self.initialize_document_map() # get stations from couchbase and filter them so # that we retain only the ones for this models domain which is derived from the projection # NOTE: this is not about regions, this is about models self.domain_stations = [] limit_clause = "" if self.number_stations != sys.maxsize: limit_clause = " limit {l}".format(l=self.number_stations) result = self.cluster.query( """SELECT mdata.geo.lat, mdata.geo.lon, name from mdata where type='MD' and docType='station' and subset='METAR' and version='V01' {limit_clause} """.format(limit_clause=limit_clause)) for row in result: if row["lat"] == -90 and row["lon"] == 180: # TODO need to fix this continue # don't know how to transform that station _x, _y = self.transformer.transform(row["lon"], row["lat"], radians=False) x_gridpoint, y_gridpoint = _x / self.spacing, _y / self.spacing try: # pylint: disable=c-extension-no-member if (math.floor(x_gridpoint) < 0 or math.ceil(x_gridpoint) >= max_x or math.floor(y_gridpoint) < 0 or math.ceil(y_gridpoint) >= max_y): continue except Exception as _e: # pylint: disable=broad-except logging.error( "%s: Exception with builder build_document processing station: error: %s", self.__class__.__name__, str(_e), ) continue station = copy.deepcopy(row) station["x_gridpoint"] = x_gridpoint station["y_gridpoint"] = y_gridpoint self.domain_stations.append(station) # if we have asked for profiling go ahead and do it if self.do_profiling: with cProfile.Profile() as _pr: self.handle_document() with open("profiling_stats.txt", "w") as stream: stats = Stats(_pr, stream=stream) stats.strip_dirs() stats.sort_stats("time") stats.dump_stats("profiling_stats.prof") stats.print_stats() else: self.handle_document() # pylint: disable=assignment-from-no-return document_map = self.get_document_map() data_file_id = self.create_data_file_id( model=self.template["model"], file_name=file_name) if data_file_id is None: logging.error("%s: Failed to create DataFile ID:", self.__class__.__name__) data_file_doc = self.build_datafile_doc( model=self.template["model"], file_name=file_name, data_file_id=data_file_id, ) document_map[data_file_doc["id"]] = data_file_doc return document_map except Exception as _e: # pylint:disable=broad-except logging.error( "%s: Exception with builder build_document: file_name: %s error: %s", self.__class__.__name__, file_name, str(_e), ) return {}
# First is input. model.add_layer(784, activation.sigmoid) model.add_layer(20, activation.sigmoid) model.add_layer(10, activation.sigmoid) import pandas as pd label_series = pd.Series(train_labels) train_labels = np.array(pd.get_dummies(label_series).values.tolist()) data = np.array(list(zip(train_data, train_labels))) model.train(epochs=1, training_data=data, learning_rate=0.01, min_precision=0.1) # Saving network to file. save_network(model) if __name__ == '__main__': do_profiling = False if do_profiling: import cProfile from pstats import Stats, SortKey with cProfile.Profile() as pr: main() with open('profiling_stats.txt', 'w') as stream: stats = Stats(pr, stream=stream) stats.strip_dirs() stats.sort_stats('time') stats.dump_stats('.prof_stats') stats.print_stats() else: main()
def build_document(self): """ This is the entry point for the ctcBuilders from the ingestManager. These documents are id'd by fcstValidEpoch and fcstLen. The data section is an array each element of which contains a map keyed by thresholds. The values are the hits, misses, false_alarms, adn correct_negatives for the stations in the region that is specified in the ingest_document. To process this file we need to itterate the list of valid fcstValidEpochs and process the region station list for each fcstValidEpoch and fcstLen. 1) get stations from couchbase and filter them so that we retain only the ones for this models region 2) get the latest fcstValidEpoch for the ctc's for this model and region. 3) get the intersection of the fcstValidEpochs that correspond for this model and the obs for all fcstValidEpochs greater than the first ctc. 4) if we have asked for profiling go ahead and do it 5) iterate the fcstValidEpochs an get the models and obs for each fcstValidEpoch 6) Within the fcstValidEpoch loop iterate the model fcstLen's and handle a document for each fcstValidEpoch and fcstLen. This will result in a document for each fcstLen within a fcstValidEpoch. 5) and 6) are enclosed in the handle_document() """ # noinspection PyBroadException try: logging.getLogger().setLevel(logging.INFO) # reset the builders document_map for a new file self.initialize_document_map() self.not_found_station_count = 0 try: self.domain_stations = self.get_legacy_stations_for_region( self.region) except Exception as _e: # pylint: disable=broad-except logging.error( "%s: Exception with builder build_document: error: %s", self.__class__.__name__, str(_e), ) # First get the latest fcstValidEpoch for the ctc's for this model and region. result = self.cluster.query( """SELECT RAW MAX(mdata.fcstValidEpoch) FROM mdata WHERE type='DD' AND docType='CTC' AND subDocType=$subDocType AND model=$model AND region=$region AND version='V01' AND subset='METAR'""", model=self.model, region=self.region, subDocType=self.sub_doc_type, read_only=True, ) max_ctc_fcst_valid_epochs = self.load_spec["first_last_params"][ "first_epoch"] if list(result)[0] is not None: max_ctc_fcst_valid_epochs = list(result)[0] # Second get the intersection of the fcstValidEpochs that correspond for this # model and the obs for all fcstValidEpochs greater than the first_epoch ctc # and less than the last_epoch. # this could be done with implicit join but this seems to be faster when the results are large. result = self.cluster.query( """SELECT fve.fcstValidEpoch, fve.fcstLen, meta().id FROM mdata fve WHERE fve.type='DD' AND fve.docType='model' AND fve.model=$model AND fve.version='V01' AND fve.subset='METAR' AND fve.fcstValidEpoch >= $first_epoch AND fve.fcstValidEpoch <= $last_epoch ORDER BY fve.fcstValidEpoch, fcstLen""", model=self.model, first_epoch=self.load_spec["first_last_params"]["first_epoch"], last_epoch=self.load_spec["first_last_params"]["last_epoch"], ) _tmp_model_fve = list(result) result1 = self.cluster.query( """SELECT raw obs.fcstValidEpoch FROM mdata obs WHERE obs.type='DD' AND obs.docType='obs' AND obs.version='V01' AND obs.subset='METAR' AND obs.fcstValidEpoch >= $max_fcst_epoch AND obs.fcstValidEpoch <= $last_epoch ORDER BY obs.fcstValidEpoch""", max_fcst_epoch=max_ctc_fcst_valid_epochs, last_epoch=self.load_spec["first_last_params"]["last_epoch"], ) _tmp_obs_fve = list(result1) # this will give us a list of {fcstValidEpoch:fve, fcslLen:fl, id:an_id} # where we know that each entry has a corresponding valid observation for fve in _tmp_model_fve: if fve["fcstValidEpoch"] in _tmp_obs_fve: self.model_fcst_valid_epochs.append(fve) # if we have asked for profiling go ahead and do it # pylint: disable=no-member if self.do_profiling: with cProfile.Profile() as _pr: self.handle_fcstValidEpochs() with open("profiling_stats.txt", "w") as stream: stats = Stats(_pr, stream=stream) stats.strip_dirs() stats.sort_stats("time") stats.dump_stats("profiling_stats.prof") stats.print_stats() else: self.handle_fcstValidEpochs() # pylint: disable=assignment-from-no-return logging.info("There were %s stations not found", self.not_found_station_count) document_map = self.get_document_map() return document_map except Exception as _e: # pylint: disable=broad-except logging.error( "%s: Exception with builder build_document: error: %s", self.__class__.__name__, str(_e), ) return {}
def close(self): self.base.disable() stats = Stats(self.base) stats.dump_stats(self.path)
class ProfileAnalyzer: def __init__(self, obj: Union[str, Stats, cProfile.Profile], path_replace=None): if isinstance(obj, cProfile.Profile): self._stats = Stats(obj) # TODO this produces a different order of entries # self._entries = obj.getstats() elif isinstance(obj, Stats): self._stats = obj else: self._stats = Stats(obj) self._entries = pstats2entries(self._stats) self._path_replace = path_replace self._df = None self._graph = None def save(self, filename): self._stats.dump_stats(filename) @property def entries(self): return self._entries[:] def __getitem__(self, entry): return self._entries[entry] @property def graph(self): if self._graph is None: self._graph = create_graph(self._entries, self._path_replace) return self._graph @property def df(self): if self._df is None: self._df = entries_to_df(self._entries, self._path_replace) return self._df def filter_df(self, method=None, path=None): f = np.array([True for _ in range(self.df.shape[0])]) if method is not None: f = np.logical_and(f, self.df.method.str.contains(method)) if path is not None: f = np.logical_and(f, self.df.path.str.contains(path)) return self.df[f] def get_root(self, single=True): nodes = [n for n, d in self.graph.in_degree() if d == 0] if not single: return nodes return self.df.loc[nodes].totaltime.idxmax() def get_total_time(self): nodes = self.get_root(single=False) return self.df.loc[nodes].totaltime.sum() def get_subgraph(self, root=None, depth=10, min_rel=1e-3, min_abs=1e-3, remove_cycles=False) -> nx.DiGraph: """Return a subgraph, centred around a root node """ root = root or self.get_root() g = self.graph.copy() g.remove_edges_from([ (u, v) for u, v, d in g.edges(data=True) if (d["fraction"] < min_rel) or (d["totaltime"] < min_abs) ]) g = nx.ego_graph(g, root, depth) if remove_cycles: g = break_cycles(g, copy=False) return g def guess_longest_path(self, root=None, edge_length="totaltime", ignore_path="~", skip_nodes=()): return guess_longest_path( self, root=root, edge_length=edge_length, ignore_path=ignore_path, skip_nodes=skip_nodes, ) def shortest_path(self, node, root=None, edge_length="totaltime"): root = root or self.get_root() return self.df.loc[nx.shortest_path(self.graph, root, node, weight=edge_length)]
from cProfile import Profile from pstats import Stats import sys from janome.tokenizer import Tokenizer repeat = 10 mmap = True dump_file = 'tokenizer.profile' if len(sys.argv) > 1 and sys.argv[1] == '-nommap': mmap = False dump_file = 'tokenizer_nommap.profile' t = Tokenizer(mmap=mmap) with open('text_lemon.txt') as f: s = f.read() profiler = Profile() profiler.runcall(lambda: [list(t.tokenize(s)) for i in range(repeat)]) stats = Stats(profiler) stats.strip_dirs() stats.sort_stats('tottime') stats.print_stats() stats.dump_stats(dump_file) print(f'Result was dumped to {dump_file}.')