Example #1
0
    def get_metric_by_function(inpfilename):
        '''
        get depth level for each function in the call tree
        '''
        print(f"Processing {inpfilename}...")

        call_tree = ct.get_call_tree(inpfilename)

        func_node = next(node for node in ct.iterate_on_call_tree(call_tree)
                         if node.fname == funcname)

        children_info = [
            node.fname + "," + str(node.cnode_id)
            for node in ct.iterate_on_call_tree(func_node, 1)
        ]

        #####
        #
        output_i = mg.process_cubex(inpfilename, exclusive=exclincl)

        # We convert the Cnode IDs to short callpaths in the dataframe.
        df_i = ic.convert_index(output_i.df,
                                output_i.ctree_df,
                                target='Short Callpath')

        res_df = df_i.loc[children_info]

        res = res_df.reset_index()[[
            'Short Callpath', 'Thread ID', metric
        ]].groupby('Short Callpath').sum().sort_values([metric],
                                                       ascending=False)[metric]

        res = res.head(11 if len(res) > 11 else len(res))

        return res
Example #2
0
def convert_df_to_inclusive(df_convertible, call_tree, tree_df=None):
    """
    Converts a DataFrame from exclusive to inclusive. A level named
    ``Cnode ID``, ``Full Callpath`` or ``Short Callpath`` must be in the index.

    Parameters
    ----------
    df_convertible : pandas.DataFrame
        A DataFrame containing only metrics that can be converted safely from
        exclusive to inclusive.
    call_tree: CallTreeNode
        A recursive representation of the call tree.
    tree_df : pandas.DataFrame or None
        In case ``df_convertible`` is not indexed by ``Cnode ID``, a dataframe
        that can be used to retrieve the ``Cnode ID`` from the index of 
        ``df_convertible``.

    Returns
    -------
    res : DataFrame
        A DataFrame

    """

    old_index_name = ic.find_index_col(df_convertible)

    # dfcr = df_convertible_reindexed
    dfcr = ic.convert_index(df_convertible, tree_df, target="Cnode ID")

    levels_to_unstack = [
        name for name in df_convertible.index.names if name != "Cnode ID"
    ]
    df_transposed = df_convertible.unstack(levels_to_unstack)

    def aggregate(root):
        value = df_transposed.loc[root.cnode_id, :]
        for child in root.children:
            value += aggregate(child)
        return value

    names = df_transposed.columns.names

    return (pd.concat(
        objs=[aggregate(n) for n in ct.iterate_on_call_tree(call_tree)],
        keys=[n.cnode_id for n in ct.iterate_on_call_tree(call_tree)],
    ).rename_axis(mapper=["Cnode ID"] + names,
                  axis="index").unstack(names).pipe(
                      ic.convert_index, tree_df,
                      old_index_name).stack(levels_to_unstack))
Example #3
0
def convert_series_to_inclusive(series, call_tree):
    '''
    Converts a series having Cnode IDs as index from exclusive to inclusive.
    Takes as input a CallTreeNode object (hopefully the root).

    *Notice: The results may be nonsensical unless the metric acted upon is 
    "INCLUSIVE convertible"*

    Parameters
    ----------
    series : Series
        A series representing exclusive measurements
    call_tree : CallTreeNode
        A recursive representation of the call tree.

    Returns
    -------
    res : Series
        A series having the same structure as the input, but with data summed
        over following the hierarchy given by the call_tree object.

    '''
    if type(series.index) == pd.MultiIndex and len(series.index.levels) > 1:
        raise NotImplementedError("MultiIndex not supported for series")

    assert series.index.name == "Cnode ID", "MultiIndex not supported for series"

    # LRU cache does not work because of
    # TypeError: unhashable type: 'list'
    # from functools import lru_cache
    # @lru_cache
    def aggregate(root):
        value = series.loc[root.cnode_id]
        for child in root.children:
            value += aggregate(child)
        return value

    return (pd.DataFrame(
        data=[(node.cnode_id, aggregate(node))
              for node in ct.iterate_on_call_tree(call_tree)],
        columns=["Cnode ID", "metric"],
    ).set_index("Cnode ID").metric)
Example #4
0
    data_dir = "../test_data"
    inpfilename = os.path.join(data_dir, "profile-5m-nproc40-nsteps10.cubex")
    metric = "time"
    exclincl = False

    rootfuncname = "ns3d_"

    ### Processing

    # Reading, parsing and loading data in the cubex file
    output_i = mg.process_cubex(inpfilename, exclusive=exclincl)

    call_tree = output_i.ctree

    func_node = next(node for node in ct.iterate_on_call_tree(call_tree)
                     if node.fname == rootfuncname)

    children_info = [
        node.fname + "," + str(node.cnode_id)
        for node in ct.iterate_on_call_tree(func_node, 1)
    ]

    # We convert the Cnode IDs to short callpaths in the dataframe.
    df_i = ic.convert_index(output_i.df,
                            output_i.ctree_df,
                            target="Short Callpath")

    res_df = df_i.loc[children_info]

    res = (res_df.reset_index()[[
Example #5
0
    extime = excommon.stack("run").time
    avgextime_rank = extime.groupby(["Short Callpath", "Thread ID"]).mean()
    avgextime = avgextime_rank.groupby("Short Callpath").sum()

    # total_time == avgintime.max(), only if all the data is considered.
    total_time = avgextime.sum()
    threshold = 0.01
    significant_nodes_ex = avgextime > total_time * threshold
    significant_nodes_in = avgintime > total_time * threshold

    # focusing now on significant, exclusive statistics
    ## initialisation, known to be in function 'initia_'

    initialisation_node = next(
        n for n in ct.iterate_on_call_tree(exctree) if n.fname == "initia_"
    )

    cnode_index = exnoncommon.unstack("Thread ID").index
    not_initialisation = pd.Series(data=True, index=cnode_index)

    for n in ct.iterate_on_call_tree(initialisation_node):
        not_initialisation[n.fname + "," + str(n.cnode_id)] = False

    ## MPI functions
    mpi_cnodes = pd.Series(data=cnode_index.str.contains("MPI"), index=cnode_index)

    ## Taking only data that
    ## - is significant
    ## - is not initialisation
    ## - is not mpi calls
Example #6
0
    import os

    data_dir = "../test_data"
    inpfilename = os.path.join(data_dir, "profile-5m-nproc40-nsteps10.cubex")
    metric = "time"
    exclincl = False
    callpathid = 56

    funcname = "initia_"

    output_i = mg.process_cubex(inpfilename, exclusive=exclincl)

    call_tree = output_i.ctree

    func_node = next(
        node for node in ct.iterate_on_call_tree(call_tree) if node.fname == funcname
    )

    # and selecting the names of the functions in the subtree that start with "MPI"
    children_names = [
        node.fname + "," + str(node.cnode_id)
        for node in ct.iterate_on_call_tree(func_node, 1)
        if node.fname.startswith("MPI")
    ]

    # We convert the Cnode IDs to short callpaths in the dataframe.
    df_i = ic.convert_index(output_i.df, output_i.ctree_df, target="Short Callpath")

    res_df = df_i.loc[children_names]

    res = (