Beispiel #1
0
    def read_cluster_stats(dir_name):
        """Read cluster results from disk.

    If the output directory is empty, or all bin files are empty, an empty dict
    will be returned. There are no checks for duplicate values.

    Malformed files will be skipped by the parser.

    To write the results object to disk, use append_cluster_stats.

    Args:
      dir_name (str): Top-level directory from which to read the results. Can
        be absolute or relative to the current working directory.
    """
        results_map = {}
        for node_id in os.listdir(dir_name):
            results_map[node_id] = []
            host_output_dir = os.path.join(dir_name, str(node_id))
            bin_file_paths = glob.glob(os.path.join(host_output_dir, "*.bin"))
            for file_path in bin_file_paths:
                existing_metric = CurieMetric()
                with open(file_path, "r") as f:
                    try:
                        existing_metric.ParseFromString(f.read())
                    except message.DecodeError:
                        log.warning("Failed to decode %s", file_path)
                    else:
                        results_map[node_id].append(existing_metric)
        return MetricsUtil.sorted_results_map(results_map)
Beispiel #2
0
    def test_cluster_results_update(self):
        self.scenario.cluster.collect_performance_stats.return_value = {
            "Node 1": [
                CurieMetric(name=CurieMetric.kCpuUsage,
                            description="Average CPU usage for all cores.",
                            instance="Aggregated",
                            type=CurieMetric.kGauge,
                            consolidation=CurieMetric.kAvg,
                            unit=CurieMetric.kPercent,
                            experimental=True)
            ],
            "Node 2": [
                CurieMetric(name=CurieMetric.kCpuUsage,
                            description="Average CPU usage for all cores.",
                            instance="Aggregated",
                            type=CurieMetric.kGauge,
                            consolidation=CurieMetric.kAvg,
                            unit=CurieMetric.kPercent,
                            experimental=True)
            ],
        }

        self.scenario._cluster_results_update()

        self.assertTrue(
            os.path.isdir(
                os.path.join(self.scenario.output_directory, "cluster_stats")))
        csv_path = os.path.join(self.scenario.output_directory,
                                "cluster_stats", "cluster_stats.csv")
        self.assertTrue(os.path.isfile(csv_path))

        # Execute again to test the appending code path.
        self.scenario._cluster_results_update()
Beispiel #3
0
 def test_to_csv_no_header(self):
     metric = CurieMetric()
     metric.CopyFrom(self.counter_template)
     metric.timestamps.extend([1454092320, 1454092321])
     metric.values.extend([1, 2])
     new_results = {"node_0": [metric]}
     csv = ScenarioUtil.results_map_to_csv(new_results, header=False)
     self.assertEqual(
         csv, "1454092320,node_0,CpuUsage.Avg.Percent,Aggregated,1\n" +
         "1454092321,node_0,CpuUsage.Avg.Percent,Aggregated,2\n")
Beispiel #4
0
 def test_append_cluster_stats_corrupt(self):
     output_dir = os.path.join(environment.test_output_dir(self),
                               "test_append_cluster_stats_corrupt")
     if os.path.isdir(output_dir):
         shutil.rmtree(output_dir)
     metric_to_append = CurieMetric()
     metric_to_append.CopyFrom(self.counter_template)
     metric_to_append.timestamps.extend([1454092320, 1454092321])
     metric_to_append.values.extend([1, 2])
     new_results = {"node_0": [metric_to_append]}
     ScenarioUtil.append_cluster_stats(new_results, output_dir)
     # Corrupt the file.
     filename = ("%s_%s" % (self._counter_template_name(),
                            self.counter_template.instance)).replace(
                                ".", "_")
     bin_path = os.path.join(output_dir, "node_0", filename + ".bin")
     assert (os.path.isfile(bin_path))
     with open(bin_path, "w") as f:
         f.write("Cela ne veut pas un protobuf.")
     metric_to_append = CurieMetric()
     metric_to_append.CopyFrom(self.counter_template)
     metric_to_append.timestamps.extend([1454092322, 1454092323])
     metric_to_append.values.extend([3, 4])
     new_results = {"node_0": [metric_to_append]}
     ScenarioUtil.append_cluster_stats(new_results, output_dir)
     expected_metric = CurieMetric()
     expected_metric.CopyFrom(self.counter_template)
     expected_metric.timestamps.extend([1454092322, 1454092323])
     expected_metric.values.extend([3, 4])
     results = ScenarioUtil.read_cluster_stats(output_dir)
     self.assertEqual(results.keys(), ["node_0"])
     self.assertEqual(len(results["node_0"]), 1)
     self.assertEqual(results["node_0"][0], expected_metric)
Beispiel #5
0
 def test_append_cluster_stats_duplicates(self):
     output_dir = os.path.join(environment.test_output_dir(self),
                               "test_append_cluster_stats_duplicates")
     if os.path.isdir(output_dir):
         shutil.rmtree(output_dir)
     metric_to_append = CurieMetric()
     metric_to_append.CopyFrom(self.counter_template)
     metric_to_append.timestamps.extend(
         [1454092320, 1454092321, 1454092322])
     metric_to_append.values.extend([1, 2, 3])
     new_results = {"node_0": [metric_to_append]}
     ScenarioUtil.append_cluster_stats(new_results, output_dir)
     metric_to_append = CurieMetric()
     metric_to_append.CopyFrom(self.counter_template)
     metric_to_append.timestamps.extend([1454092322, 1454092323])
     metric_to_append.values.extend([3, 4])
     new_results = {"node_0": [metric_to_append]}
     ScenarioUtil.append_cluster_stats(new_results, output_dir)
     expected_metric = CurieMetric()
     expected_metric.CopyFrom(self.counter_template)
     expected_metric.timestamps.extend(
         [1454092320, 1454092321, 1454092322, 1454092323])
     expected_metric.values.extend([1, 2, 3, 4])
     results = ScenarioUtil.read_cluster_stats(output_dir)
     self.assertEqual(results.keys(), ["node_0"])
     self.assertEqual(len(results["node_0"]), 1)
     self.assertEqual(results["node_0"][0], expected_metric)
Beispiel #6
0
 def test_to_csv_newline(self):
     metric = CurieMetric()
     metric.CopyFrom(self.counter_template)
     metric.timestamps.extend([1454092320, 1454092321])
     metric.values.extend([1, 2])
     new_results = {"node_0": [metric]}
     csv = ScenarioUtil.results_map_to_csv(new_results, newline="\r\n")
     self.assertEqual(
         csv, "timestamp,node_id,metric_name,instance,value\r\n" +
         "1454092320,node_0,CpuUsage.Avg.Percent,Aggregated,1\r\n" +
         "1454092321,node_0,CpuUsage.Avg.Percent,Aggregated,2\r\n")
Beispiel #7
0
 def __produce_curie_metrics(self, stats_specific_responses, node):
     responses_by_counter_name = {}
     for metric in stats_specific_responses:
         responses_by_counter_name[metric["metric"]] = metric
     results = []
     for curie_metric in self.metrics():
         ahv_counter_name = self._curie_metric_to_metric_name(curie_metric)
         metric = responses_by_counter_name[ahv_counter_name]
         start_time_secs = int(metric["startTimeInUsecs"] / 1e6)
         interval_secs = int(metric["intervalInSecs"])
         values = metric["values"]
         offsets = [index * interval_secs for index in range(len(values))]
         timestamps = [start_time_secs + offset for offset in offsets]
         # If any values are None, remove it and its corresponding timestamp.
         timestamp_value_tuples = [
             tup for tup in zip(timestamps, values) if tup[1] is not None
         ]
         if timestamp_value_tuples:
             timestamps, values = zip(*timestamp_value_tuples)
         else:
             timestamps, values = [], []
         result = CurieMetric()
         result.CopyFrom(curie_metric)
         # TODO(ryan.hardin): Generalize unit conversion, move to utility module.
         if result.rate == CurieMetric.kPerSecond:
             # Convert units per interval into units per second.
             values = [(value / float(interval_secs)) for value in values]
         if result.unit == CurieMetric.kPercent:
             # Assume metric in ppm (parts per million) - convert to percentage.
             values = [(value / 1e4) for value in values]
         elif result.unit == CurieMetric.kKilobytes:
             # Assume metric in bytes - convert to kilobytes.
             values = [(value / float(2**10)) for value in values]
         elif (result.unit == CurieMetric.kMegahertz
               and result.name == CurieMetric.kCpuUsage):
             # Assume metric in ppm (parts per million) - convert total megahertz.
             # TODO(ryan.hardin): Should node.cpu_capacity_in_hz ever return None?
             if node.cpu_capacity_in_hz is None:
                 log.debug("node.cpu_capacity_in_hz returned None")
                 timestamps, values = [], []
             else:
                 values = [(cpu_ppm * node.cpu_capacity_in_hz / 1e12)
                           for cpu_ppm in values]
         CHECK(len(result.timestamps) == 0)
         result.timestamps.extend(timestamps)
         CHECK(len(result.values) == 0)
         result.values.extend([int(value) for value in values])
         results.append(result)
     return results
Beispiel #8
0
 def setUp(self):
     self.counter_template = CurieMetric(name=CurieMetric.kCpuUsage,
                                         description="foo",
                                         instance="Aggregated",
                                         type=CurieMetric.kGauge,
                                         consolidation=CurieMetric.kAvg,
                                         unit=CurieMetric.kPercent)
 def test_collect_performance_stats_unsupported_metric(self,
                                                       mock_hosts_stats):
   mock_hosts_stats.return_value = {
       "statsSpecificResponses": [
         {
           "successful": True,
           "message": None,
           "startTimeInUsecs": 1476739262143200,
           "intervalInSecs": 20,
           "metric": "hypervisor_memory_usage_ppm",
           "values": [12345, 12345]
         }
       ]
     }
   cluster = AcropolisCluster(self.cluster_metadata)
   nodes = [mock.Mock(spec=AcropolisNode) for _ in xrange(4)]
   for id, node in enumerate(nodes):
     node.node_id.return_value = id
     node.cpu_capacity_in_hz = 12345
   with mock.patch.object(cluster, "nodes") as mock_nodes:
     mock_nodes.return_value = nodes
     with mock.patch.object(cluster, "metrics") as mock_metrics:
       mock_metrics.return_value = [
         CurieMetric(name=CurieMetric.kDatastoreRead,
                      description="This should not be supported.",
                      instance="*",
                      type=CurieMetric.kGauge,
                      consolidation=CurieMetric.kAvg,
                      unit=CurieMetric.kKilobytes,
                      rate=CurieMetric.kPerSecond)]
       with self.assertRaises(CurieTestException):
         cluster.collect_performance_stats()
Beispiel #10
0
 def metrics(cls):
     metrics = [
         CurieMetric(name=CurieMetric.kCpuUsage,
                     description="Average CPU usage for all cores.",
                     instance="Aggregated",
                     type=CurieMetric.kGauge,
                     consolidation=CurieMetric.kAvg,
                     unit=CurieMetric.kPercent,
                     experimental=True),
         # TODO(ryan.hardin): Use kHertz here instead of kMegahertz.
         CurieMetric(name=CurieMetric.kCpuUsage,
                     description="Average CPU usage for all cores.",
                     instance="Aggregated",
                     type=CurieMetric.kGauge,
                     consolidation=CurieMetric.kAvg,
                     unit=CurieMetric.kMegahertz,
                     experimental=True),
         CurieMetric(name=CurieMetric.kMemUsage,
                     description="Average memory usage.",
                     instance="Aggregated",
                     type=CurieMetric.kGauge,
                     consolidation=CurieMetric.kAvg,
                     unit=CurieMetric.kPercent,
                     experimental=True),
         CurieMetric(name=CurieMetric.kNetReceived,
                     description="Average network data received across all "
                     "interfaces.",
                     instance="Aggregated",
                     type=CurieMetric.kGauge,
                     consolidation=CurieMetric.kAvg,
                     unit=CurieMetric.kKilobytes,
                     rate=CurieMetric.kPerSecond),
         CurieMetric(
             name=CurieMetric.kNetTransmitted,
             description="Average network data transmitted across all "
             "interfaces.",
             instance="Aggregated",
             type=CurieMetric.kGauge,
             consolidation=CurieMetric.kAvg,
             unit=CurieMetric.kKilobytes,
             rate=CurieMetric.kPerSecond),
     ]
     return metrics
Beispiel #11
0
 def metrics(cls):
     metrics = [
         CurieMetric(name=CurieMetric.kMemActive,
                     description="Average active memory.",
                     instance="Aggregated",
                     type=CurieMetric.kGauge,
                     consolidation=CurieMetric.kAvg,
                     unit=CurieMetric.kKilobytes),
         CurieMetric(
             name=CurieMetric.kDatastoreRead,
             description="Average number of read commands issued per "
             "second to the datastore.",
             instance="*",
             type=CurieMetric.kGauge,
             consolidation=CurieMetric.kAvg,
             unit=CurieMetric.kOperations,
             rate=CurieMetric.kPerSecond),
         CurieMetric(
             name=CurieMetric.kDatastoreWrite,
             description="Average number of write commands issued per "
             "second to the datastore.",
             instance="*",
             type=CurieMetric.kGauge,
             consolidation=CurieMetric.kAvg,
             unit=CurieMetric.kOperations,
             rate=CurieMetric.kPerSecond),
         CurieMetric(
             name=CurieMetric.kDatastoreRead,
             description="Average rate of reading from the datastore.",
             instance="*",
             type=CurieMetric.kGauge,
             consolidation=CurieMetric.kAvg,
             unit=CurieMetric.kKilobytes,
             rate=CurieMetric.kPerSecond),
         CurieMetric(
             name=CurieMetric.kDatastoreWrite,
             description="Average rate of writing to the datastore.",
             instance="*",
             type=CurieMetric.kGauge,
             consolidation=CurieMetric.kAvg,
             unit=CurieMetric.kKilobytes,
             rate=CurieMetric.kPerSecond),
         CurieMetric(
             name=CurieMetric.kDatastoreReadLatency,
             description="Average time a read from the datastore takes.",
             instance="*",
             type=CurieMetric.kGauge,
             consolidation=CurieMetric.kAvg,
             unit=CurieMetric.kMilliseconds),
         CurieMetric(
             name=CurieMetric.kDatastoreWriteLatency,
             description="Average time a write to the datastore takes.",
             instance="*",
             type=CurieMetric.kGauge,
             consolidation=CurieMetric.kAvg,
             unit=CurieMetric.kMilliseconds),
     ]
     return super(VsphereCluster, cls).metrics() + metrics
Beispiel #12
0
 def test_to_csv_rate(self):
     metric = CurieMetric()
     metric.CopyFrom(self.counter_template)
     metric.timestamps.extend([1454092320, 1454092321])
     metric.values.extend([1, 2])
     metric.name = CurieMetric.kNetTransmitted
     metric.unit = CurieMetric.kKilobytes
     metric.rate = CurieMetric.kPerSecond
     new_results = {"node_0": [metric]}
     csv = ScenarioUtil.results_map_to_csv(new_results, newline="\r\n")
     self.assertEqual(
         csv, "timestamp,node_id,metric_name,instance,value\r\n" +
         "1454092320,node_0,NetTransmitted.Avg.KilobytesPerSecond,Aggregated,1\r\n"
         +
         "1454092321,node_0,NetTransmitted.Avg.KilobytesPerSecond,Aggregated,2\r\n"
     )
Beispiel #13
0
 def test_parse_experimental_metric(self, mock_metrics):
     mock_metrics.return_value = [
         CurieMetric(name=CurieMetric.kCpuUsage,
                     description="This is a fake experimental metric",
                     instance="Aggregated",
                     type=CurieMetric.kGauge,
                     consolidation=CurieMetric.kAvg,
                     unit=CurieMetric.kMegahertz,
                     experimental=True)
     ]
     result = ClusterResult.parse(self.scenario, "Experimental Metric", {
         "metric": "CpuUsage.Avg.Megahertz",
         "aggregate": "sum"
     })
     self.assertIsNone(result)
     self.scenario.enable_experimental_metrics = True
     result = ClusterResult.parse(self.scenario, "Experimental Metric", {
         "metric": "CpuUsage.Avg.Megahertz",
         "aggregate": "sum"
     })
     self.assertIsInstance(result, ClusterResult)
     self.assertEqual(result.metric_name, "CpuUsage.Avg.Megahertz")
Beispiel #14
0
 def test_get_result_pbs_partial(self):
     result = ClusterResult(self.scenario, "fake_result",
                            "NetReceived.Avg.KilobytesPerSecond")
     metric = CurieMetric(
         name=CurieMetric.kNetReceived,
         description="Average network data received across all "
         "interfaces.",
         instance="Aggregated",
         type=CurieMetric.kGauge,
         consolidation=CurieMetric.kAvg,
         unit=CurieMetric.kKilobytes,
         rate=CurieMetric.kPerSecond,
         experimental=True)
     ScenarioUtil.append_cluster_stats(
         {
             "169.254.0.0": [metric],
             "169.254.0.1": [metric],
             "169.254.0.2": [],
             "169.254.0.3": [],
         }, self.scenario.cluster_stats_dir())
     pbs = result.get_result_pbs()
     self.assertEqual(2, len(pbs))
     self.assertIsInstance(pbs[0], CurieTestResult)
     self.assertIsInstance(pbs[1], CurieTestResult)
Beispiel #15
0
 def test_append_read_cluster_stats_empty(self):
     output_dir = os.path.join(environment.test_output_dir(self),
                               "test_append_read_cluster_stats_empty")
     if os.path.isdir(output_dir):
         shutil.rmtree(output_dir)
     empty_metric = CurieMetric()
     empty_metric.CopyFrom(self.counter_template)
     del empty_metric.timestamps[:]
     del empty_metric.values[:]
     self.assertEqual(empty_metric.timestamps, [])
     self.assertEqual(empty_metric.values, [])
     # Write empty.
     new_results = {"node_0": [empty_metric]}
     ScenarioUtil.append_cluster_stats(new_results, output_dir)
     results = ScenarioUtil.read_cluster_stats(output_dir)
     self.assertEqual(results.keys(), ["node_0"])
     self.assertEqual(len(results["node_0"]), 1)
     self.assertEqual(results["node_0"][0], empty_metric)
     # Append empty.
     new_results = {"node_0": [empty_metric]}
     ScenarioUtil.append_cluster_stats(new_results, output_dir)
     results = ScenarioUtil.read_cluster_stats(output_dir)
     self.assertEqual(results.keys(), ["node_0"])
     self.assertEqual(len(results["node_0"]), 1)
     self.assertEqual(results["node_0"][0], empty_metric)
     # Append non-empty.
     non_empty_metric = CurieMetric()
     non_empty_metric.CopyFrom(self.counter_template)
     non_empty_metric.timestamps.extend([1454092320, 1454092321])
     non_empty_metric.values.extend([1, 2])
     new_results = {"node_0": [non_empty_metric]}
     ScenarioUtil.append_cluster_stats(new_results, output_dir)
     results = ScenarioUtil.read_cluster_stats(output_dir)
     self.assertEqual(results.keys(), ["node_0"])
     self.assertEqual(len(results["node_0"]), 1)
     self.assertEqual(results["node_0"][0], non_empty_metric)
     # Append empty again.
     new_results = {"node_0": [empty_metric]}
     ScenarioUtil.append_cluster_stats(new_results, output_dir)
     results = ScenarioUtil.read_cluster_stats(output_dir)
     self.assertEqual(results.keys(), ["node_0"])
     self.assertEqual(len(results["node_0"]), 1)
     self.assertEqual(results["node_0"][0], non_empty_metric)
Beispiel #16
0
    def append_cluster_stats(results_map, dir_name):
        """Write cluster results to disk, appending to any that already exist.

    If the output directory is empty, a new subdirectory will be created for
    each node. For each node, a bin (serialized protobuf) file will be created
    for each counter.

    If previously-collected results already exist, the new results will be
    appended to any existing bin files.

    A simple check for duplicate samples is performed, based on the epoch time
    of the last sample in the existing bin file. If a sample to be appended has
    an epoch time less than or equal to the last epoch in the file, it will be
    ignored.

    To read the results files back into a Python object, use
    read_cluster_stats.

    Args:
      results_map (dict): Results from Cluster.collect_performance_stats.
      dir_name (str): Top-level directory in which subdirectories and results
        are written. If it does not exist, it will be created. Can be absolute
        or relative to the current working directory.

    Returns:
      (int) Epoch time of latest appended sample. If no samples were appended,
        returns None.
    """

        max_appended_epoch_time = None
        for node_id in results_map:
            host_results = results_map[node_id]
            if host_results is None:
                # Error message already logged during query call.
                continue
            elif not host_results:
                # Call succeeded, but results list is empty.
                log.warning("No new stats data collected for '%s'", node_id)
                continue
            host_output_dir = os.path.join(dir_name, str(node_id))
            if not os.path.isdir(host_output_dir):
                os.makedirs(host_output_dir)
            for new_metric in host_results:
                # Pull the new timestamps and values out of the new metric.
                counter_name = MetricsUtil.metric_name(new_metric)
                new_t_v_pairs = zip(new_metric.timestamps, new_metric.values)
                # Read the existing metric from disk.
                file_name = ("%s_%s" %
                             (counter_name, new_metric.instance)).replace(
                                 ".", "_")
                file_path = os.path.join(host_output_dir, "%s.bin" % file_name)
                # Initialize a metric with empty repeated values.
                existing_metric = CurieMetric()
                existing_metric.CopyFrom(new_metric)
                del existing_metric.timestamps[:]
                del existing_metric.values[:]
                if os.path.isfile(file_path):
                    try:
                        with open(file_path, "r") as f:
                            existing_metric.ParseFromString(f.read())
                    except (message.DecodeError, IOError):
                        log.warning(
                            "Failed to decode %s - file will be overwritten",
                            file_path)
                    else:
                        # Remove duplicates.
                        latest_existing_timestamp = -1
                        if len(existing_metric.timestamps) > 0:
                            latest_existing_timestamp = max(
                                existing_metric.timestamps)
                        new_t_v_pairs = [
                            t_v_pair for t_v_pair in new_t_v_pairs
                            if t_v_pair[0] > latest_existing_timestamp
                        ]
                if not new_t_v_pairs:
                    log.debug("No new %s data available for %s", counter_name,
                              node_id)
                    new_timestamps, new_values = [], []
                else:
                    new_timestamps, new_values = zip(*new_t_v_pairs)
                existing_metric.timestamps.extend(new_timestamps)
                existing_metric.values.extend(new_values)
                try:
                    serialized = existing_metric.SerializeToString()
                except message.EncodeError:
                    # This can happen if the protobuf read from disk was invalid, but
                    # did not throw a DecodeError when it was parsed.
                    log.warning(
                        "Failed to serialize appended results to %s - file will "
                        "be overwritten", file_path)
                    serialized = new_metric.SerializeToString()
                OsUtil.write_and_rename(file_path, serialized)
                if existing_metric.timestamps:
                    max_appended_epoch_time = max(
                        max_appended_epoch_time,
                        max(existing_metric.timestamps))
        return max_appended_epoch_time