def test_aggregated_query_metadata_passing(self):
     b_p = agg.EdwBenchmarkPerformance(total_iterations=2,
                                       expected_queries=[Q1_NAME, Q2_NAME])
     i1_p = agg.EdwPowerIterationPerformance('1', 2)
     i1_p.add_query_performance(Q1_NAME, 1.0, {'job_id': 'q1_i1_job_id'})
     i1_p.add_query_performance(Q2_NAME, 2.0, {'job_id': 'q2_i1_job_id'})
     b_p.add_power_iteration_performance(i1_p)
     i2_p = agg.EdwPowerIterationPerformance('2', 2)
     i2_p.add_query_performance(Q1_NAME, 3.0, {'job_id': 'q1_i2_job_id'})
     i2_p.add_query_performance(Q2_NAME, 4.0, {})
     b_p.add_power_iteration_performance(i2_p)
     actual_aggregated_query_metadata_q1 = b_p.aggregated_query_metadata(
         Q1_NAME)
     expected_aggregated_query_metadata_q1 = {
         '1' + '_job_id': 'q1_i1_job_id',
         '2' + '_job_id': 'q1_i2_job_id'
     }
     self.assertDictEqual(expected_aggregated_query_metadata_q1,
                          actual_aggregated_query_metadata_q1)
     actual_aggregated_query_metadata_q2 = b_p.aggregated_query_metadata(
         Q2_NAME)
     expected_aggregated_query_metadata_q2 = {
         '1' + '_job_id': 'q2_i1_job_id',
     }
     self.assertDictEqual(expected_aggregated_query_metadata_q2,
                          actual_aggregated_query_metadata_q2)
    def test_get_queries_geomean_performance_samples_passing(self):
        b_p = agg.EdwBenchmarkPerformance(total_iterations=2,
                                          expected_queries=[Q1_NAME, Q2_NAME])
        i1_p = agg.EdwPowerIterationPerformance('1', 2)
        i1_p.add_query_performance(Q1_NAME, 1.0, {'job_id': 'q1_i1_job_id'})
        i1_p.add_query_performance(Q2_NAME, 2.0, {'job_id': 'q2_i1_job_id'})
        b_p.add_power_iteration_performance(i1_p)
        i2_p = agg.EdwPowerIterationPerformance('2', 2)
        i2_p.add_query_performance(Q1_NAME, 3.0, {'job_id': 'q1_i2_job_id'})
        i2_p.add_query_performance(Q2_NAME, 4.0, {})
        b_p.add_power_iteration_performance(i2_p)
        actual_sample_list = b_p.get_queries_geomean_performance_samples(
            {'benchmark_name': 'b_name'})
        self.assertEqual(len(actual_sample_list), 3)
        self.assertSameElements([x.metric for x in actual_sample_list], [
            'edw_iteration_geomean_time', 'edw_iteration_geomean_time',
            'edw_aggregated_geomean'
        ])
        raw_samples = list(
            filter(lambda x: x.metric == 'edw_iteration_geomean_time',
                   actual_sample_list))
        actual_raw_samples_values = [x.value for x in raw_samples]
        expected_raw_samples_values = [
            agg.geometric_mean([1.0, 2.0]),
            agg.geometric_mean([3.0, 4.0])
        ]
        self.assertSameElements(actual_raw_samples_values,
                                expected_raw_samples_values)

        aggregated_sample = list(
            filter(lambda x: x.metric == 'edw_aggregated_geomean',
                   actual_sample_list))[0]
        self.assertEqual(
            aggregated_sample.value,
            agg.geometric_mean([(1.0 + 3.0) / 2, (2.0 + 4.0) / 2]))
 def test_add_power_iteration_performance(self):
     b_p = agg.EdwBenchmarkPerformance(total_iterations=2,
                                       expected_queries=[Q1_NAME])
     i1_p = agg.EdwPowerIterationPerformance('1', 1)
     i2_p = agg.EdwPowerIterationPerformance('2', 1)
     i1_p.add_query_performance(Q1_NAME, Q1_PERFORMANCE, METADATA_EMPTY)
     i2_p.add_query_performance(Q2_NAME, Q2_PERFORMANCE, METADATA_EMPTY)
     b_p.add_power_iteration_performance(i1_p)
     b_p.add_power_iteration_performance(i2_p)
     self.assertEqual(len(b_p.iteration_performances), 2)
     self.assertSameElements(b_p.iteration_performances.keys(), ['1', '2'])
 def test_is_successful(self):
     i_p = agg.EdwPowerIterationPerformance('1', 2)
     i_p.add_query_performance(Q1_NAME, Q1_PERFORMANCE, METADATA_EMPTY)
     i_p.add_query_performance(Q2_NAME, Q2_PERFORMANCE, METADATA_EMPTY)
     self.assertTrue(i_p.is_successful(expected_queries=[Q1_NAME, Q2_NAME]))
     i2_p = agg.EdwPowerIterationPerformance('2', 2)
     i_p.add_query_performance(QFAIL_NAME, QFAIL_PERFORMANCE,
                               METADATA_EMPTY)
     self.assertFalse(
         i2_p.is_successful(
             expected_queries=[Q1_NAME, Q2_NAME, QFAIL_NAME]))
 def test_aggregated_query_status_look_for_failing_query(self):
     b_p = agg.EdwBenchmarkPerformance(total_iterations=2,
                                       expected_queries=[Q1_NAME, Q2_NAME])
     i1_p = agg.EdwPowerIterationPerformance('1', 2)
     i1_p.add_query_performance(Q1_NAME, Q1_PERFORMANCE, METADATA_EMPTY)
     i1_p.add_query_performance(Q2_NAME, Q2_PERFORMANCE, METADATA_EMPTY)
     b_p.add_power_iteration_performance(i1_p)
     i2_p = agg.EdwPowerIterationPerformance('2', 2)
     i2_p.add_query_performance(Q1_NAME, Q1_PERFORMANCE, METADATA_EMPTY)
     i2_p.add_query_performance(Q2_NAME, QFAIL_PERFORMANCE, METADATA_EMPTY)
     b_p.add_power_iteration_performance(i2_p)
     self.assertTrue(b_p.aggregated_query_status(Q1_NAME))
     self.assertFalse(b_p.aggregated_query_status(Q2_NAME))
 def test_aggregated_query_metadata_failing_query(self):
     b_p = agg.EdwBenchmarkPerformance(total_iterations=2,
                                       expected_queries=[Q1_NAME, Q2_NAME])
     i1_p = agg.EdwPowerIterationPerformance('1', 2)
     i1_p.add_query_performance(Q1_NAME, Q1_PERFORMANCE, METADATA_EMPTY)
     i1_p.add_query_performance(Q2_NAME, Q2_PERFORMANCE, METADATA_EMPTY)
     b_p.add_power_iteration_performance(i1_p)
     i2_p = agg.EdwPowerIterationPerformance('2', 2)
     i2_p.add_query_performance(Q1_NAME, Q1_PERFORMANCE, METADATA_EMPTY)
     i2_p.add_query_performance(Q2_NAME, QFAIL_PERFORMANCE, METADATA_EMPTY)
     b_p.add_power_iteration_performance(i2_p)
     with self.assertRaises(agg.EdwPerformanceAggregationError):
         b_p.aggregated_query_metadata(Q2_NAME)
 def test_add_power_iteration_performance_duplicate_iteration(self):
     """Testing the scenario where a iteration with missing query is added."""
     # Creating the bechmark performance
     b_p = agg.EdwBenchmarkPerformance(total_iterations=2,
                                       expected_queries=[Q1_NAME, Q2_NAME])
     i1_p = agg.EdwPowerIterationPerformance('1', 1)
     i1_p.add_query_performance(Q1_NAME, Q1_PERFORMANCE, METADATA_EMPTY)
     i1_p.add_query_performance(Q2_NAME, Q2_PERFORMANCE, METADATA_EMPTY)
     b_p.add_power_iteration_performance(i1_p)
     i2_p = agg.EdwPowerIterationPerformance('1', 1)
     i2_p.add_query_performance(Q1_NAME, Q1_PERFORMANCE, METADATA_EMPTY)
     i2_p.add_query_performance(Q2_NAME, Q2_PERFORMANCE, METADATA_EMPTY)
     # Expecting an error to be raised due duplicate iteration ID.
     with self.assertRaises(agg.EdwPerformanceAggregationError):
         b_p.add_power_iteration_performance(i2_p)
 def test_aggregated_query_execution_time_passing(self):
     b_p = agg.EdwBenchmarkPerformance(total_iterations=2,
                                       expected_queries=[Q1_NAME, Q2_NAME])
     i1_p = agg.EdwPowerIterationPerformance('1', 2)
     i1_p.add_query_performance(Q1_NAME, Q1_PERFORMANCE, METADATA_EMPTY)
     i1_p.add_query_performance(Q2_NAME, Q2_PERFORMANCE, METADATA_EMPTY)
     b_p.add_power_iteration_performance(i1_p)
     i2_p = agg.EdwPowerIterationPerformance('2', 2)
     i2_p.add_query_performance(Q1_NAME, 3.0, METADATA_EMPTY)
     i2_p.add_query_performance(Q2_NAME, 4.0, METADATA_EMPTY)
     b_p.add_power_iteration_performance(i2_p)
     self.assertEqual(b_p.aggregated_query_execution_time(Q1_NAME),
                      (1.0 + 3.0) / 2)
     self.assertEqual(b_p.aggregated_query_execution_time(Q2_NAME),
                      (2.0 + 4.0) / 2)
 def test_has_query_performance(self):
     i_p = agg.EdwPowerIterationPerformance('1', 2)
     i_p.add_query_performance(Q1_NAME, Q1_PERFORMANCE, METADATA_EMPTY)
     self.assertTrue(i_p.has_query_performance(Q1_NAME))
     self.assertFalse(i_p.has_query_performance(Q2_NAME))
     i_p.add_query_performance(Q2_NAME, Q2_PERFORMANCE, METADATA_EMPTY)
     self.assertTrue(i_p.has_query_performance(Q2_NAME))
 def test_get_all_query_performance_samples_passing(self):
     b_p = agg.EdwBenchmarkPerformance(total_iterations=2,
                                       expected_queries=[Q1_NAME, Q2_NAME])
     i1_p = agg.EdwPowerIterationPerformance('1', 2)
     i1_p.add_query_performance(Q1_NAME, 1.0, {'job_id': 'q1_i1_job_id'})
     i1_p.add_query_performance(Q2_NAME, 2.0, {'job_id': 'q2_i1_job_id'})
     b_p.add_power_iteration_performance(i1_p)
     i2_p = agg.EdwPowerIterationPerformance('2', 2)
     i2_p.add_query_performance(Q1_NAME, 3.0, {'job_id': 'q1_i2_job_id'})
     i2_p.add_query_performance(Q2_NAME, 4.0, {})
     b_p.add_power_iteration_performance(i2_p)
     actual_sample_list = b_p.get_all_query_performance_samples({})
     self.assertEqual(len(actual_sample_list), 6)
     # 4 raw query samples and 2 aggregated samples
     self.assertSameElements([x.metric for x in actual_sample_list], [
         'edw_raw_query_time', 'edw_raw_query_time', 'edw_raw_query_time',
         'edw_raw_query_time', 'edw_aggregated_query_time',
         'edw_aggregated_query_time'
     ])
 def test_get_queries_geomean_performance(self):
     i_p = agg.EdwPowerIterationPerformance('1', 2)
     i_p.add_query_performance(Q1_NAME, Q1_PERFORMANCE, METADATA_EMPTY)
     i_p.add_query_performance(Q2_NAME, Q2_PERFORMANCE, METADATA_EMPTY)
     iteration_geomean_performance_sample = i_p.get_queries_geomean_performance_sample(
         expected_queries=[Q1_NAME, Q2_NAME], metadata=METADATA_EMPTY)
     self.assertEqual(iteration_geomean_performance_sample.metric,
                      'edw_iteration_geomean_time')
     self.assertEqual(iteration_geomean_performance_sample.value,
                      agg.geometric_mean([Q1_PERFORMANCE, Q2_PERFORMANCE]))
     self.assertEqual(iteration_geomean_performance_sample.unit, SECS)
     self.assertDictEqual(iteration_geomean_performance_sample.metadata,
                          METADATA_EMPTY)
 def test_get_aggregated_query_performance_sample_passing(self):
     b_p = agg.EdwBenchmarkPerformance(total_iterations=2,
                                       expected_queries=[Q1_NAME, Q2_NAME])
     i1_p = agg.EdwPowerIterationPerformance('1', 2)
     i1_p.add_query_performance(Q1_NAME, 1.0, {'job_id': 'q1_i1_job_id'})
     i1_p.add_query_performance(Q2_NAME, 2.0, {'job_id': 'q2_i1_job_id'})
     b_p.add_power_iteration_performance(i1_p)
     i2_p = agg.EdwPowerIterationPerformance('2', 2)
     i2_p.add_query_performance(Q1_NAME, 3.0, {'job_id': 'q1_i2_job_id'})
     i2_p.add_query_performance(Q2_NAME, 4.0, {})
     b_p.add_power_iteration_performance(i2_p)
     actual_sample_q1 = b_p.get_aggregated_query_performance_sample(
         Q1_NAME, {'benchmark_name': 'b_name'})
     self.assertEqual(actual_sample_q1.metric, 'edw_aggregated_query_time')
     self.assertEqual(actual_sample_q1.value, (1.0 + 3.0) / 2)
     self.assertEqual(actual_sample_q1.unit, 'seconds')
     expected_metadata_q1 = {
         '1' + '_job_id': 'q1_i1_job_id',
         '2' + '_job_id': 'q1_i2_job_id',
         'query': Q1_NAME,
         'aggregation_method': 'mean',
         'execution_status': agg.EdwQueryExecutionStatus.SUCCESSFUL,
         'benchmark_name': 'b_name'
     }
     self.assertDictEqual(actual_sample_q1.metadata, expected_metadata_q1)
     actual_sample_q2 = b_p.get_aggregated_query_performance_sample(
         Q2_NAME, {})
     self.assertEqual(actual_sample_q2.metric, 'edw_aggregated_query_time')
     self.assertEqual(actual_sample_q2.value, (2.0 + 4.0) / 2)
     self.assertEqual(actual_sample_q2.unit, 'seconds')
     expected_metadata_q2 = {
         '1' + '_job_id': 'q2_i1_job_id',
         'query': Q2_NAME,
         'aggregation_method': 'mean',
         'execution_status': agg.EdwQueryExecutionStatus.SUCCESSFUL
     }
     self.assertDictEqual(actual_sample_q2.metadata, expected_metadata_q2)
Esempio n. 13
0
def Run(benchmark_spec):
    """Run phase executes the sql scripts on edw cluster and collects duration."""
    results = []

    edw_service_instance = benchmark_spec.edw_service
    client_interface = edw_service_instance.GetClientInterface()

    # Run a warm up query in case there are cold start issues.
    client_interface.WarmUpQuery()

    # Default to executing just the sample query if no queries are provided.
    all_queries = FLAGS.edw_power_queries.split(',') or [
        os.path.basename(edw_service.SAMPLE_QUERY_PATH)
    ]

    # Accumulator for the entire benchmark's performance
    benchmark_performance = results_aggregator.EdwBenchmarkPerformance(
        total_iterations=FLAGS.edw_suite_iterations,
        expected_queries=all_queries)

    # Multiple iterations of the suite are performed to avoid cold start penalty
    for i in range(1, FLAGS.edw_suite_iterations + 1):
        iteration = str(i)
        # Accumulator for the current suite's performance
        iteration_performance = results_aggregator.EdwPowerIterationPerformance(
            iteration_id=iteration, total_queries=len(all_queries))

        for query in all_queries:
            execution_time, metadata = client_interface.ExecuteQuery(query)
            iteration_performance.add_query_performance(
                query, execution_time, metadata)
        benchmark_performance.add_iteration_performance(iteration_performance)

    # Execution complete, generate results only if the benchmark was successful.
    benchmark_metadata = {}
    benchmark_metadata.update(edw_service_instance.GetMetadata())
    if benchmark_performance.is_successful():
        query_samples = benchmark_performance.get_all_query_performance_samples(
            metadata=benchmark_metadata)
        results.extend(query_samples)

        geomean_samples = (
            benchmark_performance.get_queries_geomean_performance_samples(
                metadata=benchmark_metadata))
        results.extend(geomean_samples)
    else:
        logging.error(
            'At least one query failed, so not reporting any results.')
    return results
 def test_get_aggregated_geomean_performance_sample_passing(self):
     b_p = agg.EdwBenchmarkPerformance(total_iterations=2,
                                       expected_queries=[Q1_NAME, Q2_NAME])
     i1_p = agg.EdwPowerIterationPerformance('1', 2)
     i1_p.add_query_performance(Q1_NAME, 1.0, {'job_id': 'q1_i1_job_id'})
     i1_p.add_query_performance(Q2_NAME, 2.0, {'job_id': 'q2_i1_job_id'})
     b_p.add_power_iteration_performance(i1_p)
     i2_p = agg.EdwPowerIterationPerformance('2', 2)
     i2_p.add_query_performance(Q1_NAME, 3.0, {'job_id': 'q1_i2_job_id'})
     i2_p.add_query_performance(Q2_NAME, 4.0, {})
     b_p.add_power_iteration_performance(i2_p)
     actual_sample = b_p.get_aggregated_geomean_performance_sample(
         {'benchmark_name': 'b_name'})
     self.assertEqual(actual_sample.metric, 'edw_aggregated_geomean')
     self.assertEqual(
         actual_sample.value,
         agg.geometric_mean([(1.0 + 3.0) / 2, (2.0 + 4.0) / 2]))
     self.assertEqual(actual_sample.unit, 'seconds')
     expected_metadata = {
         'benchmark_name': 'b_name',
         'intra_query_aggregation_method': 'mean',
         'inter_query_aggregation_method': 'geomean'
     }
     self.assertDictEqual(actual_sample.metadata, expected_metadata)
 def test_get_query_metadata(self):
     i_p = agg.EdwPowerIterationPerformance('1', 4)
     i_p.add_query_performance(Q1_NAME, Q1_PERFORMANCE,
                               {'job_id': 'q1_i1_job_id'})
     i_p.add_query_performance(Q2_NAME, Q2_PERFORMANCE, {})
     actual_aggregated_query_metadata_q1 = i_p.get_query_metadata(Q1_NAME)
     expected_aggregated_query_metadata_q1 = {
         'job_id': 'q1_i1_job_id',
     }
     self.assertDictEqual(actual_aggregated_query_metadata_q1,
                          expected_aggregated_query_metadata_q1)
     actual_aggregated_query_metadata_q2 = i_p.get_query_metadata(Q2_NAME)
     expected_aggregated_query_metadata_q2 = {}
     self.assertDictEqual(actual_aggregated_query_metadata_q2,
                          expected_aggregated_query_metadata_q2)
 def test_add_query_performance(self):
     i_p = agg.EdwPowerIterationPerformance('1', 2)
     q1_p = agg.EdwQueryPerformance(Q1_NAME, Q1_PERFORMANCE, METADATA_EMPTY)
     q2_p = agg.EdwQueryPerformance(Q2_NAME, Q2_PERFORMANCE, METADATA_EMPTY)
     i_p.add_query_performance(Q1_NAME, Q1_PERFORMANCE, METADATA_EMPTY)
     i_p.add_query_performance(Q2_NAME, Q2_PERFORMANCE, METADATA_EMPTY)
     actual_iteration_performance = i_p.performance
     expected_iteration_performance = {Q1_NAME: q1_p, Q2_NAME: q2_p}
     self.assertSameElements(actual_iteration_performance.keys(),
                             expected_iteration_performance.keys())
     self.assertEqual(actual_iteration_performance[Q1_NAME].performance,
                      expected_iteration_performance[Q1_NAME].performance)
     self.assertDictEqual(actual_iteration_performance[Q1_NAME].metadata,
                          expected_iteration_performance[Q1_NAME].metadata)
     self.assertEqual(actual_iteration_performance[Q2_NAME].performance,
                      expected_iteration_performance[Q2_NAME].performance)
     self.assertDictEqual(actual_iteration_performance[Q2_NAME].metadata,
                          expected_iteration_performance[Q2_NAME].metadata)
     self.assertEqual(i_p.total_count, 2)
     self.assertEqual(i_p.successful_count, 2)
 def test_get_all_query_performance_samples(self):
     i_p = agg.EdwPowerIterationPerformance('1', 10)
     i_p.add_query_performance(Q1_NAME, Q1_PERFORMANCE, METADATA_EMPTY)
     i_p.add_query_performance(Q2_NAME, Q2_PERFORMANCE, METADATA_EMPTY)
     actual_all_query_performance = i_p.get_all_query_performance_samples(
         METADATA_EMPTY)
     self.assertEqual(len(actual_all_query_performance), 2)
     self.assertListEqual([x.metric for x in actual_all_query_performance],
                          ['edw_raw_query_time', 'edw_raw_query_time'])
     self.assertSameElements(
         [x.value for x in actual_all_query_performance],
         [Q1_PERFORMANCE, Q2_PERFORMANCE])
     self.assertListEqual([x.unit for x in actual_all_query_performance],
                          [SECS, SECS])
     self.assertSameElements(
         [x.metadata['query'] for x in actual_all_query_performance],
         [Q1_NAME, Q2_NAME])
     self.assertSameElements([
         x.metadata['execution_status']
         for x in actual_all_query_performance
     ], [
         agg.EdwQueryExecutionStatus.SUCCESSFUL,
         agg.EdwQueryExecutionStatus.SUCCESSFUL
     ])
 def test_get_query_performance(self):
     i_p = agg.EdwPowerIterationPerformance('1', 2)
     i_p.add_query_performance(Q1_NAME, Q1_PERFORMANCE, METADATA_EMPTY)
     actual_query_performance = i_p.get_query_performance(Q1_NAME)
     self.assertEqual(actual_query_performance, Q1_PERFORMANCE)