def testPercentileCalculator(self):
        numbers = list(range(0, 1001))
        percentiles = sample.PercentileCalculator(
            numbers, percentiles=[0, 1, 99.9, 100])

        self.assertEqual(percentiles['p0'], 0)
        self.assertEqual(percentiles['p1'], 10)
        self.assertEqual(percentiles['p99.9'], 999)
        self.assertEqual(percentiles['p100'], 1000)
        self.assertEqual(percentiles['average'], 500)

        # 4 percentiles we requested, plus average and stddev
        self.assertEqual(len(percentiles), 6)
 def testWrongTypePercentile(self):
     with self.assertRaises(ValueError):
         sample.PercentileCalculator([3], percentiles=['a'])
 def testOutOfRangePercentile(self):
     with self.assertRaises(ValueError):
         sample.PercentileCalculator([3], percentiles=[-1])
 def testNoNumbers(self):
     with self.assertRaises(ValueError):
         sample.PercentileCalculator([], percentiles=[0, 1, 99])
def RunNetperf(vm, benchmark_name, server_ip, num_streams):
  """Spawns netperf on a remote VM, parses results.

  Args:
    vm: The VM that the netperf TCP_RR benchmark will be run upon.
    benchmark_name: The netperf benchmark to run, see the documentation.
    server_ip: A machine that is running netserver.
    num_streams: The number of netperf client threads to run.

  Returns:
    A sample.Sample object with the result.
  """
  enable_latency_histograms = FLAGS.netperf_enable_histograms or num_streams > 1
  # Throughput benchmarks don't have latency histograms
  enable_latency_histograms = enable_latency_histograms and \
      benchmark_name != 'TCP_STREAM'
  # Flags:
  # -o specifies keys to include in CSV output.
  # -j keeps additional latency numbers
  # -v sets the verbosity level so that netperf will print out histograms
  # -I specifies the confidence % and width - here 99% confidence that the true
  #    value is within +/- 2.5% of the reported value
  # -i specifies the maximum and minimum number of iterations.
  confidence = ('-I 99,5 -i {0},3'.format(FLAGS.netperf_max_iter)
                if FLAGS.netperf_max_iter else '')
  verbosity = '-v2 ' if enable_latency_histograms else ''
  netperf_cmd = ('{netperf_path} -p {{command_port}} -j {verbosity} '
                 '-t {benchmark_name} -H {server_ip} -l {length} {confidence}'
                 ' -- '
                 '-P ,{{data_port}} '
                 '-o THROUGHPUT,THROUGHPUT_UNITS,P50_LATENCY,P90_LATENCY,'
                 'P99_LATENCY,STDDEV_LATENCY,'
                 'MIN_LATENCY,MAX_LATENCY,'
                 'CONFIDENCE_ITERATION,THROUGHPUT_CONFID').format(
                     netperf_path=netperf.NETPERF_PATH,
                     benchmark_name=benchmark_name,
                     server_ip=server_ip,
                     length=FLAGS.netperf_test_length,
                     confidence=confidence, verbosity=verbosity)
  if FLAGS.netperf_thinktime != 0:
    netperf_cmd += (' -X {thinktime},{thinktime_array_size},'
                    '{thinktime_run_length} ').format(
                        thinktime=FLAGS.netperf_thinktime,
                        thinktime_array_size=FLAGS.netperf_thinktime_array_size,
                        thinktime_run_length=FLAGS.netperf_thinktime_run_length)

  # Run all of the netperf processes and collect their stdout
  # TODO: Record process start delta of netperf processes on the remote machine

  # Give the remote script the max possible test length plus 5 minutes to
  # complete
  remote_cmd_timeout = \
      FLAGS.netperf_test_length * (FLAGS.netperf_max_iter or 1) + 300
  remote_cmd = ('./%s --netperf_cmd="%s" --num_streams=%s --port_start=%s' %
                (REMOTE_SCRIPT, netperf_cmd, num_streams, PORT_START))
  remote_stdout, _ = vm.RemoteCommand(remote_cmd,
                                      timeout=remote_cmd_timeout)

  # Decode stdouts, stderrs, and return codes from remote command's stdout
  json_out = json.loads(remote_stdout)
  stdouts = json_out[0]

  # Metadata to attach to samples
  metadata = {'netperf_test_length': FLAGS.netperf_test_length,
              'max_iter': FLAGS.netperf_max_iter or 1,
              'sending_thread_count': num_streams}

  parsed_output = [ParseNetperfOutput(stdout, metadata, benchmark_name,
                                      enable_latency_histograms)
                   for stdout in stdouts]

  if len(parsed_output) == 1:
    # Only 1 netperf thread
    throughput_sample, latency_samples, histogram = parsed_output[0]
    return [throughput_sample] + latency_samples
  else:
    # Multiple netperf threads

    samples = []

    # Unzip parsed output
    # Note that latency_samples are invalid with multiple threads because stats
    # are computed per-thread by netperf, so we don't use them here.
    throughput_samples, _, latency_histograms = [list(t)
                                                 for t in zip(*parsed_output)]
    # They should all have the same units
    throughput_unit = throughput_samples[0].unit
    # Extract the throughput values from the samples
    throughputs = [s.value for s in throughput_samples]
    # Compute some stats on the throughput values
    throughput_stats = sample.PercentileCalculator(throughputs, [50, 90, 99])
    throughput_stats['min'] = min(throughputs)
    throughput_stats['max'] = max(throughputs)
    # Calculate aggregate throughput
    throughput_stats['total'] = throughput_stats['average'] * len(throughputs)
    # Create samples for throughput stats
    for stat, value in throughput_stats.items():
      samples.append(
          sample.Sample('%s_Throughput_%s' % (benchmark_name, stat),
                        float(value),
                        throughput_unit, metadata))
    if enable_latency_histograms:
      # Combine all of the latency histogram dictionaries
      latency_histogram = Counter()
      for histogram in latency_histograms:
        latency_histogram.update(histogram)
      # Create a sample for the aggregate latency histogram
      hist_metadata = {'histogram': json.dumps(latency_histogram)}
      hist_metadata.update(metadata)
      samples.append(sample.Sample(
          '%s_Latency_Histogram' % benchmark_name, 0, 'us', hist_metadata))
      # Calculate stats on aggregate latency histogram
      latency_stats = _HistogramStatsCalculator(latency_histogram, [50, 90, 99])
      # Create samples for the latency stats
      for stat, value in latency_stats.items():
        samples.append(
            sample.Sample('%s_Latency_%s' % (benchmark_name, stat),
                          float(value),
                          'us', metadata))
    return samples
def ParseSysbenchOutput(sysbench_output, results, metadata):
    """Parses sysbench output.

  Extract relevant TPS and latency numbers, and populate the final result
  collection with these information.

  Specifically, we are interested in tps numbers reported by each reporting
  interval, and the summary latency numbers printed at the end of the run in
  "General Statistics" -> "Response Time".

  Example Sysbench output:

  sysbench 0.5:  multi-threaded system evaluation benchmark
  <... lots of output we don't care here ...>
  Threads started!

  [   2s] threads: 16, tps: 526.38, reads: 7446.79, writes: 2105.52, response
  time: 210.67ms (99%), errors: 0.00, reconnects:  0.00
  < .... lots of tps output every 2 second, we need all those>

  < ... lots of other output we don't care for now...>
  General statistics:
      total time:                          17.0563s
      total number of events:              10000
      total time taken by event execution: 272.6053s
      response time:
           min:                                 18.31ms
           avg:                                 27.26ms
           max:                                313.50ms
           approx.  99 percentile:              57.15ms
  < We care about the response time section above, these are latency numbers>
  < then there are some outputs after this, we don't care either>

  Args:
    sysbench_output: The output from sysbench.
    results: The dictionary to store results based on sysbench output.
    metadata: The metadata to be passed along to the Samples class.
  """
    all_tps = []
    seen_general_statistics = False
    seen_response_time = False

    response_times = {}

    sysbench_output_io = StringIO.StringIO(sysbench_output)
    for line in sysbench_output_io.readlines():
        if re.match('^\[', line):
            tps = re.findall('tps: (.*?),', line)
            all_tps.append(float(tps[0]))
            continue

        if line.startswith('General statistics:'):
            seen_general_statistics = True
            continue

        if seen_general_statistics:
            if re.match('^ +response time:.*', line):
                seen_response_time = True
                continue

        if seen_general_statistics and seen_response_time:
            for token in RESPONSE_TIME_TOKENS:
                search_string = '.*%s: +(.*)ms' % token
                if re.findall(search_string, line):
                    response_times[token] = float(
                        re.findall(search_string, line)[0])

    tps_line = ', '.join(map(str, all_tps))
    # Print all tps data points in the log for reference. And report
    # percentiles of these tps data in the final result set.
    logging.info('All TPS numbers: \n %s', tps_line)

    tps_percentile = sample.PercentileCalculator(all_tps)
    for percentile in sample.PERCENTILES_LIST:
        percentile_string = 'p%s' % str(percentile)
        logging.info('%s tps %f', percentile_string,
                     tps_percentile[percentile_string])
        metric_name = ('%s %s') % (SYSBENCH_RESULT_NAME_TPS, percentile_string)
        results.append(
            sample.Sample(metric_name, tps_percentile[percentile_string],
                          NA_UNIT, metadata))

    # Also report average, stddev, and coefficient of variation
    for token in ['average', 'stddev']:
        logging.info('tps %s %f', token, tps_percentile[token])
        metric_name = ('%s %s') % (SYSBENCH_RESULT_NAME_TPS, token)
        results.append(
            sample.Sample(metric_name, tps_percentile[token], NA_UNIT,
                          metadata))

    if tps_percentile['average'] > 0:
        cv = tps_percentile['stddev'] / tps_percentile['average']
        logging.info('tps coefficient of variation %f', cv)
        metric_name = ('%s %s') % (SYSBENCH_RESULT_NAME_TPS, 'cv')
        results.append(sample.Sample(metric_name, cv, NA_UNIT, metadata))

    # Now, report the latency numbers.
    for token in RESPONSE_TIME_TOKENS:
        logging.info('%s_response_time is %f', token, response_times[token])
        metric_name = '%s %s' % (SYSBENCH_RESULT_NAME_LATENCY, token)

        if token == 'percentile':
            metric_name = '%s %s' % (metric_name,
                                     FLAGS.sysbench_latency_percentile)

        results.append(
            sample.Sample(metric_name, response_times[token], MS_UNIT,
                          metadata))
Beispiel #7
0
def RunNetperf(primary_vm, secondary_vm, benchmark_name, num_streams,
               iteration, results):
    """Spawns netperf on a remote VM, parses results.

  Args:
    primary_vm: The VM that the netperf benchmark will be run upon.
    secondary_vm: The VM that the netperf server is running on.
    benchmark_name: The netperf benchmark to run, see the documentation.
    num_streams: The number of netperf client threads to run.
    iteration: The iteration to prefix the metrics with, as well as the index
      into the array where the results will be stored.
    results: The results variable shared by all threads.  The iteration-th
      element holds a tuple of
      (Samples[], begin_starting_processes, end_starting_processes)
  """
    # Flags:
    # -o specifies keys to include in CSV output.
    # -j keeps additional latency numbers
    netperf_cmd = ('{netperf_path} -p {{command_port}} -j '
                   '-t {benchmark_name} -H {server_ip} -l {length}'
                   ' -- '
                   '-P ,{{data_port}} '
                   '-o THROUGHPUT,THROUGHPUT_UNITS,P50_LATENCY,P90_LATENCY,'
                   'P99_LATENCY,STDDEV_LATENCY,'
                   'MIN_LATENCY,MAX_LATENCY,'
                   'CONFIDENCE_ITERATION,THROUGHPUT_CONFID').format(
                       netperf_path=netperf.NETPERF_PATH,
                       benchmark_name=benchmark_name,
                       server_ip=secondary_vm.internal_ip,
                       length=FLAGS.bidirectional_network_test_length)

    # Run all of the netperf processes and collect their stdout

    # Give the remote script the test length plus 5 minutes to complete
    remote_cmd_timeout = FLAGS.bidirectional_network_test_length + 300
    remote_cmd = ('./%s --netperf_cmd="%s" --num_streams=%s --port_start=%s' %
                  (REMOTE_SCRIPT, netperf_cmd, num_streams, PORT_START))
    remote_stdout, _ = primary_vm.RemoteCommand(remote_cmd,
                                                timeout=remote_cmd_timeout)

    # Decode the remote command's stdout which the stdouts, stderrs and return
    # code from each sub invocation of netperf (per stream)
    json_out = json.loads(remote_stdout)
    stdouts = json_out[0]
    # unused_stderrs = json_out[1]
    # unused_return_codes = json_out[2]
    begin_starting_processes = json_out[3]
    end_starting_processes = json_out[4]

    local_results = []

    # Metadata to attach to samples
    metadata = {
        'bidirectional_network_test_length':
        FLAGS.bidirectional_network_test_length,
        'bidirectional_stream_num_streams': num_streams,
        'ip_type': 'internal',
        'primary_machine_type': primary_vm.machine_type,
        'primary_zone': primary_vm.zone,
        'secondary_machine_type': secondary_vm.machine_type,
        'secondary_zone': secondary_vm.zone,
    }

    stream_start_delta = end_starting_processes - begin_starting_processes
    local_results.append(
        sample.Sample('%s_%s_start_delta' % (iteration, benchmark_name),
                      float(stream_start_delta), SEC, metadata))

    throughput_samples = [
        _ParseNetperfOutput(stdout, metadata, benchmark_name, iteration)
        for stdout in stdouts
    ]

    # They should all have the same units
    throughput_unit = throughput_samples[0].unit
    # Extract the throughput values from the samples
    throughputs = [s.value for s in throughput_samples]
    # Compute some stats on the throughput values
    throughput_stats = sample.PercentileCalculator(throughputs, [50, 90, 99])
    throughput_stats['min'] = min(throughputs)
    throughput_stats['max'] = max(throughputs)
    # Calculate aggregate throughput
    throughput_stats['total'] = throughput_stats['average'] * len(throughputs)
    # Create samples for throughput stats
    for stat, value in throughput_stats.items():
        local_results.append(
            sample.Sample(
                '%s_%s_Throughput_%s' % (iteration, benchmark_name, stat),
                float(value), throughput_unit, metadata))
    results[iteration] = (local_results, begin_starting_processes,
                          end_starting_processes)
 def testPandasSeries(self):
     percentiles = sample.PercentileCalculator(pd.Series([1, 2, 3]),
                                               percentiles=[50])
     self.assertEqual(percentiles['p50'], 2)
def RunNetperf(vm, benchmark_name, server_ip, num_streams):
    """Spawns netperf on a remote VM, parses results.

  Args:
    vm: The VM that the netperf TCP_RR benchmark will be run upon.
    benchmark_name: The netperf benchmark to run, see the documentation.
    server_ip: A machine that is running netserver.
    num_streams: The number of netperf client threads to run.

  Returns:
    A sample.Sample object with the result.
  """
    enable_latency_histograms = FLAGS.netperf_enable_histograms or num_streams > 1
    # Throughput benchmarks don't have latency histograms
    enable_latency_histograms = (enable_latency_histograms
                                 and (benchmark_name
                                      not in ['TCP_STREAM', 'UDP_STREAM']))
    # Flags:
    # -o specifies keys to include in CSV output.
    # -j keeps additional latency numbers
    # -v sets the verbosity level so that netperf will print out histograms
    # -I specifies the confidence % and width - here 99% confidence that the true
    #    value is within +/- 2.5% of the reported value
    # -i specifies the maximum and minimum number of iterations.
    confidence = (f'-I 99,5 -i {FLAGS.netperf_max_iter},3'
                  if FLAGS.netperf_max_iter else '')
    verbosity = '-v2 ' if enable_latency_histograms else ''

    remote_cmd_timeout = (FLAGS.netperf_test_length *
                          (FLAGS.netperf_max_iter or 1) + 300)

    metadata = {
        'netperf_test_length': FLAGS.netperf_test_length,
        'sending_thread_count': num_streams,
        'max_iter': FLAGS.netperf_max_iter or 1
    }

    netperf_cmd = (f'{netperf.NETPERF_PATH} '
                   f'-p {{command_port}} '
                   f'-j {verbosity} '
                   f'-t {benchmark_name} '
                   f'-H {server_ip} '
                   f'-l {FLAGS.netperf_test_length} {confidence}'
                   ' -- '
                   f'-P ,{{data_port}} '
                   f'-o {OUTPUT_SELECTOR}')

    if benchmark_name.upper() == 'UDP_STREAM':
        send_size = FLAGS.netperf_udp_stream_send_size_in_bytes
        netperf_cmd += f' -R 1 -m {send_size} -M {send_size} '
        metadata['netperf_send_size_in_bytes'] = (
            FLAGS.netperf_udp_stream_send_size_in_bytes)

    elif benchmark_name.upper() == 'TCP_STREAM':
        send_size = FLAGS.netperf_tcp_stream_send_size_in_bytes
        netperf_cmd += f' -m {send_size} -M {send_size} '
        metadata['netperf_send_size_in_bytes'] = (
            FLAGS.netperf_tcp_stream_send_size_in_bytes)

    if FLAGS.netperf_thinktime != 0:
        netperf_cmd += (' -X '
                        f'{FLAGS.netperf_thinktime},'
                        f'{FLAGS.netperf_thinktime_array_size},'
                        f'{FLAGS.netperf_thinktime_run_length} ')

    if FLAGS.netperf_mss and 'TCP' in benchmark_name.upper():
        netperf_cmd += f' -G {FLAGS.netperf_mss}b'
        metadata['netperf_mss_requested'] = FLAGS.netperf_mss

    # Run all of the netperf processes and collect their stdout
    # TODO(dlott): Analyze process start delta of netperf processes on the remote
    #              machine

    # Give the remote script the max possible test length plus 5 minutes to
    # complete
    remote_cmd_timeout = \
        FLAGS.netperf_test_length * (FLAGS.netperf_max_iter or 1) + 300
    remote_cmd = (f'./{REMOTE_SCRIPT} --netperf_cmd="{netperf_cmd}" '
                  f'--num_streams={num_streams} --port_start={PORT_START}')
    remote_stdout, _ = vm.RobustRemoteCommand(remote_cmd,
                                              should_log=True,
                                              timeout=remote_cmd_timeout)

    # Decode stdouts, stderrs, and return codes from remote command's stdout
    json_out = json.loads(remote_stdout)
    stdouts = json_out[0]

    parsed_output = [
        ParseNetperfOutput(stdout, metadata, benchmark_name,
                           enable_latency_histograms) for stdout in stdouts
    ]

    if len(parsed_output) == 1:
        # Only 1 netperf thread
        throughput_sample, latency_samples, histogram = parsed_output[0]
        return [throughput_sample] + latency_samples
    else:
        # Multiple netperf threads

        samples = []

        # Unzip parsed output
        # Note that latency_samples are invalid with multiple threads because stats
        # are computed per-thread by netperf, so we don't use them here.
        throughput_samples, _, latency_histograms = [
            list(t) for t in zip(*parsed_output)
        ]
        # They should all have the same units
        throughput_unit = throughput_samples[0].unit
        # Extract the throughput values from the samples
        throughputs = [s.value for s in throughput_samples]
        # Compute some stats on the throughput values
        throughput_stats = sample.PercentileCalculator(throughputs,
                                                       [50, 90, 99])
        throughput_stats['min'] = min(throughputs)
        throughput_stats['max'] = max(throughputs)
        # Calculate aggregate throughput
        throughput_stats['total'] = throughput_stats['average'] * len(
            throughputs)
        # Create samples for throughput stats
        for stat, value in throughput_stats.items():
            samples.append(
                sample.Sample(f'{benchmark_name}_Throughput_{stat}',
                              float(value), throughput_unit, metadata))
        if enable_latency_histograms:
            # Combine all of the latency histogram dictionaries
            latency_histogram = collections.Counter()
            for histogram in latency_histograms:
                latency_histogram.update(histogram)
            # Create a sample for the aggregate latency histogram
            hist_metadata = {'histogram': json.dumps(latency_histogram)}
            hist_metadata.update(metadata)
            samples.append(
                sample.Sample(f'{benchmark_name}_Latency_Histogram', 0, 'us',
                              hist_metadata))
            # Calculate stats on aggregate latency histogram
            latency_stats = _HistogramStatsCalculator(latency_histogram,
                                                      [50, 90, 99])
            # Create samples for the latency stats
            for stat, value in latency_stats.items():
                samples.append(
                    sample.Sample(f'{benchmark_name}_Latency_{stat}',
                                  float(value), 'us', metadata))
        return samples