def read_json_sections(self): # if cali-query exists, extract data from .cali to a file-like object if self.filename_ext == ".cali": cali_query = which("cali-query") if not cali_query: raise ValueError( "from_caliper() needs cali-query to query .cali file") cali_json = subprocess.Popen( [cali_query, "-q", self.query, self.filename_or_stream], stdout=subprocess.PIPE, ) self.filename_or_stream = cali_json.stdout # if filename_or_stream is a str, then open the file, otherwise # directly load the file-like object if isinstance(self.filename_or_stream, str): with open(self.filename_or_stream) as cali_json: json_obj = json.load(cali_json) else: json_obj = json.load(self.filename_or_stream) # read various sections of the Caliper JSON file self.json_data = json_obj["data"] self.json_cols = json_obj["columns"] self.json_cols_mdata = json_obj["column_metadata"] self.json_nodes = json_obj["nodes"] # decide which column to use as the primary path hierarchy # first preference to callpath if available if "source.function#callpath.address" in self.json_cols: self.path_col_name = "source.function#callpath.address" elif "path" in self.json_cols: self.path_col_name = "path" else: sys.exit("No hierarchy column in input file") # change column names for idx, item in enumerate(self.json_cols): if item == self.path_col_name: # this column is just a pointer into the nodes section self.json_cols[idx] = self.nid_col_name # make other columns consistent with other readers if item == "mpi.rank": self.json_cols[idx] = "rank" if item == "module#cali.sampler.pc": self.json_cols[idx] = "module" if item == "sum#time.duration": self.json_cols[idx] = "time" if item == "inclusive#sum#time.duration": self.json_cols[idx] = "time (inc)" # make list of metric columns self.metric_columns = [] for idx, item in enumerate(self.json_cols_mdata): if self.json_cols[idx] != "rank" and item["is_value"] is True: self.metric_columns.append(self.json_cols[idx])
def test_lulesh_json_stream(lulesh_caliper_cali): """Sanity check the Caliper reader ingesting a JSON string literal.""" cali_query = which("cali-query") grouping_attribute = "function" default_metric = "sum(sum#time.duration),inclusive_sum(sum#time.duration)" query = "select function,%s group by %s format json-split" % ( default_metric, grouping_attribute, ) cali_json = subprocess.Popen( [cali_query, "-q", query, lulesh_caliper_cali], stdout=subprocess.PIPE) gf = GraphFrame.from_caliper_json(cali_json.stdout) assert len(gf.dataframe.groupby("name")) == 18
def read_json_sections(self): # if cali-query exists, extract data from .cali to a file-like object if self.filename_ext == ".cali": cali_query = which("cali-query") if not cali_query: raise ValueError( "from_caliper() needs cali-query to query .cali file") cali_json = subprocess.Popen( [cali_query, "-q", self.query, self.filename_or_stream], stdout=subprocess.PIPE, ) self.filename_or_stream = cali_json.stdout # if filename_or_stream is a str, then open the file, otherwise # directly load the file-like object if isinstance(self.filename_or_stream, str): with open(self.filename_or_stream) as cali_json: json_obj = json.load(cali_json) else: json_obj = json.loads( self.filename_or_stream.read().decode("utf-8")) # read various sections of the Caliper JSON file self.json_data = json_obj["data"] self.json_cols = json_obj["columns"] self.json_cols_mdata = json_obj["column_metadata"] self.json_nodes = json_obj["nodes"] # decide which column to use as the primary path hierarchy # first preference to callpath if available if "source.function#callpath.address" in self.json_cols: self.path_col_name = "source.function#callpath.address" self.node_type = "function" elif "path" in self.json_cols: self.path_col_name = "path" self.node_type = "region" else: sys.exit("No hierarchy column in input file") # remove data entries containing None in `path` column (null in json file) # first, get column where `path` data is # then, parse json_data list of lists to identify lists containing None in # `path` column path_col = self.json_cols.index(self.path_col_name) entries_to_remove = [] for sublist in self.json_data: if sublist[path_col] is None: entries_to_remove.append(sublist) # then, remove them from the json_data list for i in entries_to_remove: self.json_data.remove(i) # change column names for idx, item in enumerate(self.json_cols): if item == self.path_col_name: # this column is just a pointer into the nodes section self.json_cols[idx] = self.nid_col_name # make other columns consistent with other readers if item == "mpi.rank": self.json_cols[idx] = "rank" if item == "module#cali.sampler.pc": self.json_cols[idx] = "module" if item == "sum#time.duration" or item == "sum#avg#sum#time.duration": self.json_cols[idx] = "time" if (item == "inclusive#sum#time.duration" or item == "sum#avg#inclusive#sum#time.duration"): self.json_cols[idx] = "time (inc)" # make list of metric columns self.metric_columns = [] for idx, item in enumerate(self.json_cols_mdata): if self.json_cols[idx] != "rank" and item["is_value"] is True: self.metric_columns.append(self.json_cols[idx])
assert len(reader.json_cols) == 4 assert len(reader.json_cols_mdata) == 4 assert len(reader.json_nodes) == 24 reader.create_graph() assert all(an in reader.idx_to_label.values() for an in annotations) def test_calc_pi_json(calc_pi_caliper_json): """Sanity test a GraphFrame object with known data.""" gf = GraphFrame.from_caliper_json(str(calc_pi_caliper_json)) assert len(gf.dataframe.groupby("name")) == 100 @pytest.mark.skipif(not which("cali-query"), reason="needs cali-query to be in path") def test_lulesh_cali(lulesh_caliper_cali): """Sanity check the Caliper reader ingesting a .cali file.""" grouping_attribute = "function" default_metric = "sum(sum#time.duration),inclusive_sum(sum#time.duration)" query = "select function,%s group by %s format json-split" % ( default_metric, grouping_attribute, ) gf = GraphFrame.from_caliper(str(lulesh_caliper_cali), query) assert len(gf.dataframe.groupby("name")) == 18
def test_which(mock_cali_query): assert which("cali-query")
def test_read_calc_pi_database(lulesh_caliper_json): """Sanity check the Caliper reader by examining a known input.""" reader = CaliperReader(str(lulesh_caliper_json)) reader.read_json_sections() assert len(reader.json_data) == 192 assert len(reader.json_cols) == 4 assert len(reader.json_cols_mdata) == 4 assert len(reader.json_nodes) == 24 reader.create_graph() assert all(an in reader.idx_to_label.values() for an in annotations) @pytest.mark.skipif(not which("cali-query"), reason="needs cali-query to be in path") def test_sample_cali(sample_caliper_raw_cali): """Sanity check the Caliper reader ingesting a .cali file.""" grouping_attribute = "function" default_metric = "sum(sum#time.duration),inclusive_sum(sum#time.duration)" query = "select function,%s group by %s format json-split" % ( default_metric, grouping_attribute, ) gf = GraphFrame.from_caliper(str(sample_caliper_raw_cali), query) assert len(gf.dataframe.groupby("name")) == 18 @pytest.mark.skipif(not which("cali-query"), reason="needs cali-query to be in path")