def get_metric_by_function(inpfilename): ''' get depth level for each function in the call tree ''' print(f"Processing {inpfilename}...") call_tree = ct.get_call_tree(inpfilename) func_node = next(node for node in ct.iterate_on_call_tree(call_tree) if node.fname == funcname) children_info = [ node.fname + "," + str(node.cnode_id) for node in ct.iterate_on_call_tree(func_node, 1) ] ##### # output_i = mg.process_cubex(inpfilename, exclusive=exclincl) # We convert the Cnode IDs to short callpaths in the dataframe. df_i = ic.convert_index(output_i.df, output_i.ctree_df, target='Short Callpath') res_df = df_i.loc[children_info] res = res_df.reset_index()[[ 'Short Callpath', 'Thread ID', metric ]].groupby('Short Callpath').sum().sort_values([metric], ascending=False)[metric] res = res.head(11 if len(res) > 11 else len(res)) return res
def convert_df_to_inclusive(df_convertible, call_tree, tree_df=None): """ Converts a DataFrame from exclusive to inclusive. A level named ``Cnode ID``, ``Full Callpath`` or ``Short Callpath`` must be in the index. Parameters ---------- df_convertible : pandas.DataFrame A DataFrame containing only metrics that can be converted safely from exclusive to inclusive. call_tree: CallTreeNode A recursive representation of the call tree. tree_df : pandas.DataFrame or None In case ``df_convertible`` is not indexed by ``Cnode ID``, a dataframe that can be used to retrieve the ``Cnode ID`` from the index of ``df_convertible``. Returns ------- res : DataFrame A DataFrame """ old_index_name = ic.find_index_col(df_convertible) # dfcr = df_convertible_reindexed dfcr = ic.convert_index(df_convertible, tree_df, target="Cnode ID") levels_to_unstack = [ name for name in df_convertible.index.names if name != "Cnode ID" ] df_transposed = df_convertible.unstack(levels_to_unstack) def aggregate(root): value = df_transposed.loc[root.cnode_id, :] for child in root.children: value += aggregate(child) return value names = df_transposed.columns.names return (pd.concat( objs=[aggregate(n) for n in ct.iterate_on_call_tree(call_tree)], keys=[n.cnode_id for n in ct.iterate_on_call_tree(call_tree)], ).rename_axis(mapper=["Cnode ID"] + names, axis="index").unstack(names).pipe( ic.convert_index, tree_df, old_index_name).stack(levels_to_unstack))
def convert_series_to_inclusive(series, call_tree): ''' Converts a series having Cnode IDs as index from exclusive to inclusive. Takes as input a CallTreeNode object (hopefully the root). *Notice: The results may be nonsensical unless the metric acted upon is "INCLUSIVE convertible"* Parameters ---------- series : Series A series representing exclusive measurements call_tree : CallTreeNode A recursive representation of the call tree. Returns ------- res : Series A series having the same structure as the input, but with data summed over following the hierarchy given by the call_tree object. ''' if type(series.index) == pd.MultiIndex and len(series.index.levels) > 1: raise NotImplementedError("MultiIndex not supported for series") assert series.index.name == "Cnode ID", "MultiIndex not supported for series" # LRU cache does not work because of # TypeError: unhashable type: 'list' # from functools import lru_cache # @lru_cache def aggregate(root): value = series.loc[root.cnode_id] for child in root.children: value += aggregate(child) return value return (pd.DataFrame( data=[(node.cnode_id, aggregate(node)) for node in ct.iterate_on_call_tree(call_tree)], columns=["Cnode ID", "metric"], ).set_index("Cnode ID").metric)
data_dir = "../test_data" inpfilename = os.path.join(data_dir, "profile-5m-nproc40-nsteps10.cubex") metric = "time" exclincl = False rootfuncname = "ns3d_" ### Processing # Reading, parsing and loading data in the cubex file output_i = mg.process_cubex(inpfilename, exclusive=exclincl) call_tree = output_i.ctree func_node = next(node for node in ct.iterate_on_call_tree(call_tree) if node.fname == rootfuncname) children_info = [ node.fname + "," + str(node.cnode_id) for node in ct.iterate_on_call_tree(func_node, 1) ] # We convert the Cnode IDs to short callpaths in the dataframe. df_i = ic.convert_index(output_i.df, output_i.ctree_df, target="Short Callpath") res_df = df_i.loc[children_info] res = (res_df.reset_index()[[
extime = excommon.stack("run").time avgextime_rank = extime.groupby(["Short Callpath", "Thread ID"]).mean() avgextime = avgextime_rank.groupby("Short Callpath").sum() # total_time == avgintime.max(), only if all the data is considered. total_time = avgextime.sum() threshold = 0.01 significant_nodes_ex = avgextime > total_time * threshold significant_nodes_in = avgintime > total_time * threshold # focusing now on significant, exclusive statistics ## initialisation, known to be in function 'initia_' initialisation_node = next( n for n in ct.iterate_on_call_tree(exctree) if n.fname == "initia_" ) cnode_index = exnoncommon.unstack("Thread ID").index not_initialisation = pd.Series(data=True, index=cnode_index) for n in ct.iterate_on_call_tree(initialisation_node): not_initialisation[n.fname + "," + str(n.cnode_id)] = False ## MPI functions mpi_cnodes = pd.Series(data=cnode_index.str.contains("MPI"), index=cnode_index) ## Taking only data that ## - is significant ## - is not initialisation ## - is not mpi calls
import os data_dir = "../test_data" inpfilename = os.path.join(data_dir, "profile-5m-nproc40-nsteps10.cubex") metric = "time" exclincl = False callpathid = 56 funcname = "initia_" output_i = mg.process_cubex(inpfilename, exclusive=exclincl) call_tree = output_i.ctree func_node = next( node for node in ct.iterate_on_call_tree(call_tree) if node.fname == funcname ) # and selecting the names of the functions in the subtree that start with "MPI" children_names = [ node.fname + "," + str(node.cnode_id) for node in ct.iterate_on_call_tree(func_node, 1) if node.fname.startswith("MPI") ] # We convert the Cnode IDs to short callpaths in the dataframe. df_i = ic.convert_index(output_i.df, output_i.ctree_df, target="Short Callpath") res_df = df_i.loc[children_names] res = (