def test_graphframe(calc_pi_hpct_db): """Sanity test a GraphFrame object with known data.""" gf = GraphFrame() gf.from_hpctoolkit(str(calc_pi_hpct_db)) assert len(gf.dataframe.groupby('module')) == 5 assert len(gf.dataframe.groupby('file')) == 11 assert len(gf.dataframe.groupby('name')) == 20
def test_graphframe_to_literal_with_threads(data_dir, osu_allgather_hpct_db): gf = GraphFrame.from_hpctoolkit(str(osu_allgather_hpct_db)) graph_literal = gf.to_literal() gf2 = GraphFrame.from_literal(graph_literal) assert len(gf.graph) == len(gf2.graph)
def test_apply_indices(calc_pi_hpct_db): gf = GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) main = gf.graph.roots[0].children[0] path = [ { "name": "[0-9]*:?MPI_.*" }, ("*", { "name": "^((?!MPID).)*" }), { "name": "[0-9]*:?MPID.*" }, ] matches = [ [ main.children[0], main.children[0].children[0], main.children[0].children[0].children[0], main.children[0].children[0].children[0].children[0], ], [ main.children[1], main.children[1].children[0], main.children[1].children[0].children[0], ], ] query = QueryMatcher(path) assert query.apply(gf) == matches gf.drop_index_levels() assert query.apply(gf) == matches
def test_unify_hpctoolkit_data(calc_pi_hpct_db): gf1 = GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) gf2 = GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) assert gf1.graph is not gf2.graph # indexes are the same since we are reading in the same dataset assert all(gf1.dataframe.index == gf2.dataframe.index) gf1.unify(gf2) assert gf1.graph is gf2.graph # Indexes should still be the same after unify. Sort indexes before comparing. gf1.dataframe.sort_index(inplace=True) gf2.dataframe.sort_index(inplace=True) assert all(gf1.dataframe.index == gf2.dataframe.index)
def test_graphframe_to_literal(data_dir, calc_pi_hpct_db): """Sanity test a GraphFrame object with known data.""" gf = GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) graph_literal = gf.to_literal() gf2 = GraphFrame.from_literal(graph_literal) assert len(gf.graph) == len(gf2.graph)
def test_unify_hpctoolkit_data(calc_pi_hpct_db): gf1 = GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) gf2 = GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) assert gf1.graph is not gf2.graph with pytest.raises(ValueError): # this is an invalid comparison because the indexes are different at this point gf1.dataframe["node"].apply(id) != gf2.dataframe["node"].apply(id) assert all(gf1.dataframe.index != gf2.dataframe.index) gf1.unify(gf2) # indexes are now the same. assert gf1.graph is gf2.graph assert all( gf1.dataframe["node"].apply(id) == gf2.dataframe["node"].apply(id)) assert all(gf1.dataframe.index == gf2.dataframe.index)
def test_drop_index_levels(calc_pi_hpct_db): gf = GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) num_nodes = len(gf.graph) gf.drop_index_levels() num_rows = len(gf.dataframe.index) assert num_nodes == num_rows
def test_inclusive_time_calculation(data_dir, calc_pi_hpct_db): """Validate update_inclusive_columns() on known dataset.""" gf = GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) # save original time (inc) column for correctness check gf.dataframe["orig_inc_time"] = gf.dataframe["time (inc)"] # remove original time (inc) column since it will be generated by update_inclusive_columns() del gf.dataframe["time (inc)"] gf.update_inclusive_columns() assert all(gf.dataframe["time (inc)"].values == gf.dataframe["orig_inc_time"].values)
def test_graphframe(calc_pi_hpct_db): """Sanity test a GraphFrame object with known data.""" gf = GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) assert len(gf.dataframe.groupby("module")) == 5 assert len(gf.dataframe.groupby("file")) == 11 assert len(gf.dataframe.groupby("name")) == 20 for col in gf.dataframe.columns: if col in ("time (inc)", "time"): assert gf.dataframe[col].dtype == np.float64 elif col in ("nid", "rank", "line"): assert gf.dataframe[col].dtype == np.int64 elif col in ("name", "type", "file", "module", "node"): assert gf.dataframe[col].dtype == np.object
def test_allgather(osu_allgather_hpct_db): gf = GraphFrame.from_hpctoolkit(str(osu_allgather_hpct_db)) assert len(gf.dataframe.groupby("module")) == 9 assert len(gf.dataframe.groupby("file")) == 41 assert len(gf.dataframe.groupby("name")) == 383 assert len(gf.dataframe.groupby("type")) == 3 for col in gf.dataframe.columns: if col in ("time (inc)", "time"): assert gf.dataframe[col].dtype == np.float64 elif col in ("nid", "rank", "thread", "line"): assert gf.dataframe[col].dtype == np.int64 elif col in ("name", "type", "file", "module", "node"): assert gf.dataframe[col].dtype == np.object
def test_tree(calc_pi_hpct_db): gf = GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) output = gf.tree(metric="time", color=False) assert output.startswith("0.000 <program root> <unknown file>") assert ( "0.000 198:MPIR_Init_thread /tmp/dpkg-mkdeb.gouoc49UG7/src/mvapich/src/build/../src/mpi/init/initthread.c" in output) output = gf.tree(metric="time (inc)", color=False) assert "17989.000 interp.c:0 interp.c" in output assert ( "999238.000 230:psm_dofinalize /tmp/dpkg-mkdeb.gouoc49UG7/src/mvapich/src/build/../src/mpid/ch3/channels/psm/src/psm_exit.c" in output) output = gf.tree(metric="time (inc)", color=False, threshold=0.5) assert ( "999238.000 294:MPID_Finalize /tmp/dpkg-mkdeb.gouoc49UG7/src/mvapich/src/build/../src/mpid/ch3/src/mpid_finalize.c" in output)
def test_tree(calc_pi_hpct_db): gf = GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) output = ConsoleRenderer(unicode=True, color=False).render( gf.graph.roots, gf.dataframe, metric_column="time", precision=3, name_column="name", expand_name=False, context_column="file", rank=0, thread=0, depth=10000, highlight_name=False, invert_colormap=False, ) assert "0.000 <program root> <unknown file>" in output assert ( "0.000 198:MPIR_Init_thread /tmp/dpkg-mkdeb.gouoc49UG7/src/mvapich/src/build/../src/mpi/init/initthread.c" in output ) output = ConsoleRenderer(unicode=True, color=False).render( gf.graph.roots, gf.dataframe, metric_column="time (inc)", precision=3, name_column="name", expand_name=False, context_column="file", rank=0, thread=0, depth=10000, highlight_name=False, invert_colormap=False, ) assert "17989.000 interp.c:0 interp.c" in output assert ( "999238.000 230:psm_dofinalize /tmp/dpkg-mkdeb.gouoc49UG7/src/mvapich/src/build/../src/mpid/ch3/channels/psm/src/psm_exit.c" in output )
def test_high_level_node_id_index_levels(calc_pi_hpct_db): gf = GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) root = gf.graph.roots[0] query = QueryMatcher([("*", {"node_id": "<= 2"})]) matches = [ [root, root.children[0]], [root.children[0]], [root, root.children[0], root.children[0].children[0]], [root.children[0], root.children[0].children[0]], [root.children[0].children[0]], ] assert sorted(query.apply(gf)) == sorted(matches) query = QueryMatcher([("*", {"node_id": 0})]) matches = [[root]] assert query.apply(gf) == matches with pytest.raises(InvalidQueryFilter): query = QueryMatcher([{"node_id": "hello"}]) query.apply(gf)
def test_graphframe(data_dir, calc_pi_hpct_db): """Sanity test a GraphFrame object with known data.""" gf = GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) assert len(gf.dataframe.groupby("module")) == 5 assert len(gf.dataframe.groupby("file")) == 11 assert len(gf.dataframe.groupby("name")) == 20 for col in gf.dataframe.columns: if col in ("time (inc)", "time"): assert gf.dataframe[col].dtype == np.float64 elif col in ("nid", "rank", "line"): assert gf.dataframe[col].dtype == np.int64 elif col in ("name", "type", "file", "module", "node"): assert gf.dataframe[col].dtype == np.object # add tests to confirm values in dataframe df = pd.read_csv( str(os.path.join(data_dir, "hpctoolkit-cpi-graphframe.csv"))) gf.dataframe.reset_index(inplace=True) df.reset_index(inplace=True) gf.dataframe.sort_values(by=["nid", "rank"], inplace=True) df.sort_values(by=["nid", "rank"], inplace=True) t1 = gf.dataframe["time"].values t2 = df["time"].values ti1 = gf.dataframe["time (inc)"].values ti2 = df["time (inc)"].values for v1, v2 in zip(t1, t2): assert v1 == v2 for v1, v2 in zip(ti1, ti2): assert v1 == v2
def test_allgather(data_dir, osu_allgather_hpct_db): gf = GraphFrame.from_hpctoolkit(str(osu_allgather_hpct_db)) assert len(gf.dataframe.groupby("module")) == 9 assert len(gf.dataframe.groupby("file")) == 41 assert len(gf.dataframe.groupby("name")) == 383 assert len(gf.dataframe.groupby("type")) == 3 for col in gf.dataframe.columns: if col in ("time (inc)", "time"): assert gf.dataframe[col].dtype == np.float64 elif col in ("nid", "rank", "thread", "line"): assert gf.dataframe[col].dtype == np.int64 elif col in ("name", "type", "file", "module", "node"): assert gf.dataframe[col].dtype == np.object # add tests to confirm values in dataframe df = pd.read_csv( str(os.path.join(data_dir, "hpctoolkit-allgather-graphframe.csv"))) gf.dataframe.reset_index(inplace=True) df.reset_index(inplace=True) gf.dataframe.sort_values(by=["nid", "rank", "thread"], inplace=True) df.sort_values(by=["nid", "rank", "thread"], inplace=True) t1 = gf.dataframe["time"].values t2 = df["time"].values ti1 = gf.dataframe["time (inc)"].values ti2 = df["time (inc)"].values for v1, v2 in zip(t1, t2): assert v1 == v2 for v1, v2 in zip(ti1, ti2): assert v1 == v2
#!/usr/bin/env python # # Copyright 2017-2020 Lawrence Livermore National Security, LLC and other # Hatchet Project Developers. See the top-level LICENSE file for details. # # SPDX-License-Identifier: MIT from __future__ import print_function import argparse from hatchet import GraphFrame if __name__ == "__main__": parser = argparse.ArgumentParser( description="print timings for reading an HPCToolkit database") parser.add_argument("directory", metavar="DIRECTORY", action="store", help="directory to read") args = parser.parse_args() reader = GraphFrame.from_hpctoolkit(args.directory)
def test_graph_not_equal(mock_graph_literal, calc_pi_hpct_db): gf = GraphFrame.from_literal(mock_graph_literal) other = GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) assert gf.graph != other.graph