Example #1
0
    def read_json_sections(self):
        # if cali-query exists, extract data from .cali to a file-like object
        if self.filename_ext == ".cali":
            cali_query = which("cali-query")
            if not cali_query:
                raise ValueError(
                    "from_caliper() needs cali-query to query .cali file")
            cali_json = subprocess.Popen(
                [cali_query, "-q", self.query, self.filename_or_stream],
                stdout=subprocess.PIPE,
            )
            self.filename_or_stream = cali_json.stdout

        # if filename_or_stream is a str, then open the file, otherwise
        # directly load the file-like object
        if isinstance(self.filename_or_stream, str):
            with open(self.filename_or_stream) as cali_json:
                json_obj = json.load(cali_json)
        else:
            json_obj = json.load(self.filename_or_stream)

        # read various sections of the Caliper JSON file
        self.json_data = json_obj["data"]
        self.json_cols = json_obj["columns"]
        self.json_cols_mdata = json_obj["column_metadata"]
        self.json_nodes = json_obj["nodes"]

        # decide which column to use as the primary path hierarchy
        # first preference to callpath if available
        if "source.function#callpath.address" in self.json_cols:
            self.path_col_name = "source.function#callpath.address"
        elif "path" in self.json_cols:
            self.path_col_name = "path"
        else:
            sys.exit("No hierarchy column in input file")

        # change column names
        for idx, item in enumerate(self.json_cols):
            if item == self.path_col_name:
                # this column is just a pointer into the nodes section
                self.json_cols[idx] = self.nid_col_name
            # make other columns consistent with other readers
            if item == "mpi.rank":
                self.json_cols[idx] = "rank"
            if item == "module#cali.sampler.pc":
                self.json_cols[idx] = "module"
            if item == "sum#time.duration":
                self.json_cols[idx] = "time"
            if item == "inclusive#sum#time.duration":
                self.json_cols[idx] = "time (inc)"

        # make list of metric columns
        self.metric_columns = []
        for idx, item in enumerate(self.json_cols_mdata):
            if self.json_cols[idx] != "rank" and item["is_value"] is True:
                self.metric_columns.append(self.json_cols[idx])
Example #2
0
def test_lulesh_json_stream(lulesh_caliper_cali):
    """Sanity check the Caliper reader ingesting a JSON string literal."""
    cali_query = which("cali-query")
    grouping_attribute = "function"
    default_metric = "sum(sum#time.duration),inclusive_sum(sum#time.duration)"
    query = "select function,%s group by %s format json-split" % (
        default_metric,
        grouping_attribute,
    )

    cali_json = subprocess.Popen(
        [cali_query, "-q", query, lulesh_caliper_cali], stdout=subprocess.PIPE)

    gf = GraphFrame.from_caliper_json(cali_json.stdout)

    assert len(gf.dataframe.groupby("name")) == 18
Example #3
0
    def read_json_sections(self):
        # if cali-query exists, extract data from .cali to a file-like object
        if self.filename_ext == ".cali":
            cali_query = which("cali-query")
            if not cali_query:
                raise ValueError(
                    "from_caliper() needs cali-query to query .cali file")
            cali_json = subprocess.Popen(
                [cali_query, "-q", self.query, self.filename_or_stream],
                stdout=subprocess.PIPE,
            )
            self.filename_or_stream = cali_json.stdout

        # if filename_or_stream is a str, then open the file, otherwise
        # directly load the file-like object
        if isinstance(self.filename_or_stream, str):
            with open(self.filename_or_stream) as cali_json:
                json_obj = json.load(cali_json)
        else:
            json_obj = json.loads(
                self.filename_or_stream.read().decode("utf-8"))

        # read various sections of the Caliper JSON file
        self.json_data = json_obj["data"]
        self.json_cols = json_obj["columns"]
        self.json_cols_mdata = json_obj["column_metadata"]
        self.json_nodes = json_obj["nodes"]

        # decide which column to use as the primary path hierarchy
        # first preference to callpath if available
        if "source.function#callpath.address" in self.json_cols:
            self.path_col_name = "source.function#callpath.address"
            self.node_type = "function"
        elif "path" in self.json_cols:
            self.path_col_name = "path"
            self.node_type = "region"
        else:
            sys.exit("No hierarchy column in input file")

        # remove data entries containing None in `path` column (null in json file)
        # first, get column where `path` data is
        # then, parse json_data list of lists to identify lists containing None in
        # `path` column
        path_col = self.json_cols.index(self.path_col_name)
        entries_to_remove = []
        for sublist in self.json_data:
            if sublist[path_col] is None:
                entries_to_remove.append(sublist)
        # then, remove them from the json_data list
        for i in entries_to_remove:
            self.json_data.remove(i)

        # change column names
        for idx, item in enumerate(self.json_cols):
            if item == self.path_col_name:
                # this column is just a pointer into the nodes section
                self.json_cols[idx] = self.nid_col_name
            # make other columns consistent with other readers
            if item == "mpi.rank":
                self.json_cols[idx] = "rank"
            if item == "module#cali.sampler.pc":
                self.json_cols[idx] = "module"
            if item == "sum#time.duration" or item == "sum#avg#sum#time.duration":
                self.json_cols[idx] = "time"
            if (item == "inclusive#sum#time.duration"
                    or item == "sum#avg#inclusive#sum#time.duration"):
                self.json_cols[idx] = "time (inc)"

        # make list of metric columns
        self.metric_columns = []
        for idx, item in enumerate(self.json_cols_mdata):
            if self.json_cols[idx] != "rank" and item["is_value"] is True:
                self.metric_columns.append(self.json_cols[idx])
Example #4
0
    assert len(reader.json_cols) == 4
    assert len(reader.json_cols_mdata) == 4
    assert len(reader.json_nodes) == 24

    reader.create_graph()
    assert all(an in reader.idx_to_label.values() for an in annotations)


def test_calc_pi_json(calc_pi_caliper_json):
    """Sanity test a GraphFrame object with known data."""
    gf = GraphFrame.from_caliper_json(str(calc_pi_caliper_json))

    assert len(gf.dataframe.groupby("name")) == 100


@pytest.mark.skipif(not which("cali-query"),
                    reason="needs cali-query to be in path")
def test_lulesh_cali(lulesh_caliper_cali):
    """Sanity check the Caliper reader ingesting a .cali file."""
    grouping_attribute = "function"
    default_metric = "sum(sum#time.duration),inclusive_sum(sum#time.duration)"
    query = "select function,%s group by %s format json-split" % (
        default_metric,
        grouping_attribute,
    )

    gf = GraphFrame.from_caliper(str(lulesh_caliper_cali), query)

    assert len(gf.dataframe.groupby("name")) == 18

Example #5
0
def test_which(mock_cali_query):
    assert which("cali-query")
Example #6
0
def test_read_calc_pi_database(lulesh_caliper_json):
    """Sanity check the Caliper reader by examining a known input."""
    reader = CaliperReader(str(lulesh_caliper_json))
    reader.read_json_sections()

    assert len(reader.json_data) == 192
    assert len(reader.json_cols) == 4
    assert len(reader.json_cols_mdata) == 4
    assert len(reader.json_nodes) == 24

    reader.create_graph()
    assert all(an in reader.idx_to_label.values() for an in annotations)


@pytest.mark.skipif(not which("cali-query"), reason="needs cali-query to be in path")
def test_sample_cali(sample_caliper_raw_cali):
    """Sanity check the Caliper reader ingesting a .cali file."""
    grouping_attribute = "function"
    default_metric = "sum(sum#time.duration),inclusive_sum(sum#time.duration)"
    query = "select function,%s group by %s format json-split" % (
        default_metric,
        grouping_attribute,
    )

    gf = GraphFrame.from_caliper(str(sample_caliper_raw_cali), query)

    assert len(gf.dataframe.groupby("name")) == 18


@pytest.mark.skipif(not which("cali-query"), reason="needs cali-query to be in path")