Beispiel #1
0
    def get_metric_by_function(inpfilename):
        '''
        get depth level for each function in the call tree
        '''
        print(f"Processing {inpfilename}...")

        call_tree = ct.get_call_tree(inpfilename)

        func_node = next(node for node in ct.iterate_on_call_tree(call_tree)
                         if node.fname == funcname)

        children_info = [
            node.fname + "," + str(node.cnode_id)
            for node in ct.iterate_on_call_tree(func_node, 1)
        ]

        #####
        #
        output_i = mg.process_cubex(inpfilename, exclusive=exclincl)

        # We convert the Cnode IDs to short callpaths in the dataframe.
        df_i = ic.convert_index(output_i.df,
                                output_i.ctree_df,
                                target='Short Callpath')

        res_df = df_i.loc[children_info]

        res = res_df.reset_index()[[
            'Short Callpath', 'Thread ID', metric
        ]].groupby('Short Callpath').sum().sort_values([metric],
                                                       ascending=False)[metric]

        res = res.head(11 if len(res) > 11 else len(res))

        return res
Beispiel #2
0
def test_convert_index():
    input_files = glob(f"{tu.SCALASCA_OUTPUT}/*/profile.cubex")
    
    output = mg.process_multi(input_files)
    
    common = output.common
    assert common.index.names == ['Cnode ID', 'Thread ID']
    assert common.columns.names == ['run', 'metric']
    
    noncommon = output.noncommon
    assert noncommon.index.names == ['Cnode ID', 'Thread ID']
    assert noncommon.columns.names == ['metric']
    
    tree_df = output.ctree_df
    
    # Checkin all conversions in cicrle
    
    for target in ['Full Callpath',   # one way...
                   'Short Callpath',
                   'Cnode ID',
                   'Short Callpath',  # .. and back.
                   'Full Callpath',
                   'Cnode ID' ]:
    
        print(f"Target : {target}")
        # Common metrics
        common = ic.convert_index(common,tree_df,target)
        print(common.index.names   )
        print(common.columns.names )
        
        assert common.index.names == [target, 'Thread ID']
        assert common.columns.names == ['run', 'metric']
        
        print("Column names are as expected for the 'common' dataframe.")
        
        # Non common metrics 
        
        noncommon = ic.convert_index(noncommon,tree_df,target)
        print(noncommon.index.names  )    
        print(noncommon.columns.names)
        
        assert noncommon.index.names == [target, 'Thread ID']
        assert noncommon.columns.names == ['metric']
        
        print("Column names are as expected for the 'non-common' dataframe.")
    def get_metric_by_level(inpfilename):
        """
        get depth level for each function in the call tree
        """
        print(f"Processing {inpfilename}")

        # This gives us a number of outputs
        # (see https://cupybelib.readthedocs.io/en/latest/merger.html)
        output_i = mg.process_cubex(inpfilename, exclusive=exclincl)

        parent_series = output_i.ctree_df.set_index(
            "Cnode ID").loc[:, "Parent Cnode ID"]

        # get depth level for each function in the call tree
        levels = ct.get_level(parent_series)

        # We convert the Cnode IDs to short callpaths in the dataframe.
        df_i = ic.convert_index(output_i.df,
                                output_i.ctree_df,
                                target="Short Callpath")

        # extract the data
        res = (df_i.reset_index()[[
            "Short Callpath", "Thread ID", metric
        ]].groupby("Short Callpath").sum().sort_values([metric])[metric])

        res_df = pd.DataFrame(res)

        time = res_df.reset_index()["time"]
        fname = res_df.reset_index()["Short Callpath"].str.extract(
            r"(\w+),([0-9]+)")[0]
        cnode_id = (res_df.reset_index()["Short Callpath"].str.extract(
            r"(\w+),([0-9]+)")[1].astype(int))

        combined = pd.merge(
            left=(pd.concat([time, fname, cnode_id], axis="columns").rename(
                {
                    "time": "time",
                    0: "fname",
                    1: "Cnode ID"
                }, axis="columns")),
            right=levels.reset_index().rename({0: "level"}, axis="columns"),
            on="Cnode ID",
        )

        # to extract functions called only at the 3rd level
        time_data = combined[combined["level"] == 2].sort_values(
            by=["level", "time"], ascending=False)

        return time_data
Beispiel #4
0
def convert_df_to_inclusive(df_convertible, call_tree, tree_df=None):
    """
    Converts a DataFrame from exclusive to inclusive. A level named
    ``Cnode ID``, ``Full Callpath`` or ``Short Callpath`` must be in the index.

    Parameters
    ----------
    df_convertible : pandas.DataFrame
        A DataFrame containing only metrics that can be converted safely from
        exclusive to inclusive.
    call_tree: CallTreeNode
        A recursive representation of the call tree.
    tree_df : pandas.DataFrame or None
        In case ``df_convertible`` is not indexed by ``Cnode ID``, a dataframe
        that can be used to retrieve the ``Cnode ID`` from the index of 
        ``df_convertible``.

    Returns
    -------
    res : DataFrame
        A DataFrame

    """

    old_index_name = ic.find_index_col(df_convertible)

    # dfcr = df_convertible_reindexed
    dfcr = ic.convert_index(df_convertible, tree_df, target="Cnode ID")

    levels_to_unstack = [
        name for name in df_convertible.index.names if name != "Cnode ID"
    ]
    df_transposed = df_convertible.unstack(levels_to_unstack)

    def aggregate(root):
        value = df_transposed.loc[root.cnode_id, :]
        for child in root.children:
            value += aggregate(child)
        return value

    names = df_transposed.columns.names

    return (pd.concat(
        objs=[aggregate(n) for n in ct.iterate_on_call_tree(call_tree)],
        keys=[n.cnode_id for n in ct.iterate_on_call_tree(call_tree)],
    ).rename_axis(mapper=["Cnode ID"] + names,
                  axis="index").unstack(names).pipe(
                      ic.convert_index, tree_df,
                      old_index_name).stack(levels_to_unstack))
Beispiel #5
0
    # Reading, parsing and loading data in the cubex file
    output_i = mg.process_cubex(inpfilename, exclusive=exclincl)

    call_tree = output_i.ctree

    func_node = next(node for node in ct.iterate_on_call_tree(call_tree)
                     if node.fname == rootfuncname)

    children_info = [
        node.fname + "," + str(node.cnode_id)
        for node in ct.iterate_on_call_tree(func_node, 1)
    ]

    # We convert the Cnode IDs to short callpaths in the dataframe.
    df_i = ic.convert_index(output_i.df,
                            output_i.ctree_df,
                            target="Short Callpath")

    res_df = df_i.loc[children_info]

    res = (res_df.reset_index()[[
        "Short Callpath", "Thread ID", metric
    ]].groupby("Short Callpath").sum().sort_values([metric],
                                                   ascending=False)[metric])

    res = res.head(11 if len(res) > 11 else len(res)).tail(
        10 if len(res) > 10 else len(res) - 1)

    res.plot(kind="bar")

    plt.xlabel("Function name: " + rootfuncname,
Beispiel #6
0
    # (see https://cupybe.readthedocs.io/en/latest/merger.html)

    if len(argv) == 1:
        input_file = "../test_data/profile.cubex"
    else:
        input_file = argv[1]
        print("Opening file", input_file)

    output_i = mg.process_cubex(input_file, exclusive=False)

    df_i = output_i.df  # Dataframes with the metrics

    tree = output_i.ctree_df  # Dataframe containing info on the calltree

    # We convert the Cnode IDs to short callpaths in the dataframe.
    df_i = ic.convert_index(df_i, tree, target="Short Callpath")

    # We calculate the mean of the time
    times_mean = df_i.time.groupby("Short Callpath").mean()

    # We do a merge (=join) on the tree dataframe to find the parent-child relation
    parent_child = (
        pd.merge(
            left=tree,
            right=tree,
            left_on="Cnode ID",
            right_on="Parent Cnode ID",
            suffixes=("-Parent", ""),
        )
        # we select the two columns we're interested in
        .loc[:, ["Short Callpath", "Short Callpath-Parent"]].set_index(
Beispiel #7
0
        inoutput, exoutput, files_check = data
        if files_check != files:
            raise Exception("Data  does not contain the expected files.")
    except:
        inoutput = mg.process_multi(files, exclusive=False)
        exoutput = mg.process_multi(files, exclusive=True)
        with open(pickle_archive, "wb") as f:
            data = pickle.dump((inoutput, exoutput, files), f)

    # renaming variables for convenience
    inctree_df = inoutput.ctree_df
    inctree = inoutput.ctree
    inncmetrics = inoutput.ncmetrics

    # index from Cnode ID to short callpath for readability
    incommon = ic.convert_index(inoutput.common, inctree_df, target="Short Callpath")
    innoncommon = ic.convert_index(
        inoutput.noncommon, inctree_df, target="Short Callpath"
    )

    # renaming variables for convenience
    exctree_df = exoutput.ctree_df
    exctree = exoutput.ctree
    exncmetrics = exoutput.ncmetrics

    # index from Cnode ID to short callpath for readability
    excommon = ic.convert_index(exoutput.common, exctree_df, target="Short Callpath")
    exnoncommon = ic.convert_index(
        exoutput.noncommon, exctree_df, target="Short Callpath"
    )
assert noncommon.columns.names == ['metric']

tree_df = output.ctree_df

# Checkin all conversions in cicrle

for target in ['Full Callpath',   # one way...
               'Short Callpath',
               'Cnode ID',
               'Short Callpath',  # .. and back.
               'Full Callpath',
               'Cnode ID' ]:

    print(f"Target : {target}")
    # Common metrics
    common = ic.convert_index(common,tree_df,target)
    print(common.index.names   )
    print(common.columns.names )
    
    assert common.index.names == [target, 'Thread ID']
    assert common.columns.names == ['run', 'metric']
    
    print("Column names are as expected for the 'common' dataframe.")
    
    # Non common metrics 
    
    noncommon = ic.convert_index(noncommon,tree_df,target)
    print(noncommon.index.names  )    
    print(noncommon.columns.names)
    
    assert noncommon.index.names == [target, 'Thread ID']