def run_tests(config): """ The main function that launches the stress tests """ # Build docker images and push to GKE registry if config.global_settings.build_docker_images: for name, docker_image in config.docker_images_dict.iteritems(): if not (docker_image.build_image() and docker_image.push_to_gke_registry()): return False # Create a unique id for this run (Note: Using timestamp instead of UUID to # make it easier to deduce the date/time of the run just by looking at the run # run id. This is useful in debugging when looking at records in Biq query) run_id = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') dataset_id = '%s_%s' % (config.global_settings.dataset_id_prefix, run_id) bq_helper = BigQueryHelper(run_id, '', '', config.global_settings.gcp_project_id, dataset_id, config.global_settings.summary_table_id, config.global_settings.qps_table_id) bq_helper.initialize() gke = Gke(config.global_settings.gcp_project_id, run_id, dataset_id, config.global_settings.summary_table_id, config.global_settings.qps_table_id, config.global_settings.kubernetes_proxy_port) is_success = True try: print 'Launching servers..' for name, server_pod_spec in config.server_pod_specs_dict.iteritems(): if not gke.launch_servers(server_pod_spec): is_success = False # is_success is checked in the 'finally' block return False print('Launched servers. Waiting for %d seconds for the server pods to be ' 'fully online') % config.global_settings.pod_warmup_secs time.sleep(config.global_settings.pod_warmup_secs) for name, client_pod_spec in config.client_pod_specs_dict.iteritems(): if not gke.launch_clients(client_pod_spec): is_success = False # is_success is checked in the 'finally' block return False print('Launched all clients. Waiting for %d seconds for the client pods to ' 'be fully online') % config.global_settings.pod_warmup_secs time.sleep(config.global_settings.pod_warmup_secs) start_time = datetime.datetime.now() end_time = start_time + datetime.timedelta( seconds=config.global_settings.test_duration_secs) print 'Running the test until %s' % end_time.isoformat() while True: if datetime.datetime.now() > end_time: print 'Test was run for %d seconds' % config.global_settings.test_duration_secs break # Check if either stress server or clients have failed (btw, the bq_helper # monitors all the rows in the summary table and checks if any of them # have a failure status) if bq_helper.check_if_any_tests_failed(): is_success = False print 'Some tests failed.' break # Don't 'return' here. We still want to call bq_helper to print qps/summary tables # Tests running fine. Wait until next poll time to check the status print 'Sleeping for %d seconds..' % config.global_settings.test_poll_interval_secs time.sleep(config.global_settings.test_poll_interval_secs) # Print BiqQuery tables bq_helper.print_qps_records() bq_helper.print_summary_records() finally: # If there was a test failure, we should not delete the pods since they # would contain useful debug information (logs, core dumps etc) if is_success: for name, server_pod_spec in config.server_pod_specs_dict.iteritems(): gke.delete_servers(server_pod_spec) for name, client_pod_spec in config.client_pod_specs_dict.iteritems(): gke.delete_clients(client_pod_spec) return is_success
def run_test_main(test_settings, gke_settings, stress_server_settings, stress_client_clients): is_success = True if test_settings.build_docker_image: is_success = _build_docker_image(gke_settings.docker_image_name, gke_settings.tag_name) if not is_success: return False is_success = _push_docker_image_to_gke_registry(gke_settings.tag_name) if not is_success: return False # Create a unique id for this run (Note: Using timestamp instead of UUID to # make it easier to deduce the date/time of the run just by looking at the run # run id. This is useful in debugging when looking at records in Biq query) run_id = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') dataset_id = '%s_%s' % (_DATASET_ID_PREFIX, run_id) # Big Query settings (common for both Stress Server and Client) bq_settings = BigQuerySettings(run_id, dataset_id, _SUMMARY_TABLE_ID, _QPS_TABLE_ID) bq_helper = BigQueryHelper(run_id, '', '', args.project_id, dataset_id, _SUMMARY_TABLE_ID, _QPS_TABLE_ID) bq_helper.initialize() try: is_success = _launch_server_and_client(gke_settings, stress_server_settings, stress_client_settings, bq_settings, test_settings.kubernetes_proxy_port) if not is_success: return False start_time = datetime.datetime.now() end_time = start_time + datetime.timedelta( seconds=test_settings.test_duration_secs) print 'Running the test until %s' % end_time.isoformat() while True: if datetime.datetime.now() > end_time: print 'Test was run for %d seconds' % test_settings.test_duration_secs break # Check if either stress server or clients have failed if bq_helper.check_if_any_tests_failed(): is_success = False print 'Some tests failed.' break # Things seem to be running fine. Wait until next poll time to check the # status print 'Sleeping for %d seconds..' % test_settings.test_poll_interval_secs time.sleep(test_settings.test_poll_interval_secs) # Print BiqQuery tables bq_helper.print_summary_records() bq_helper.print_qps_records() finally: # If is_success is False at this point, it means that the stress tests were # started successfully but failed while running the tests. In this case we # do should not delete the pods (since they contain all the failure # information) if is_success: _delete_server_and_client(stress_server_settings, stress_client_settings, test_settings.kubernetes_proxy_port) return is_success
def run_tests(config): """ The main function that launches the stress tests """ # Build docker images and push to GKE registry if config.global_settings.build_docker_images: for name, docker_image in config.docker_images_dict.iteritems(): if not (docker_image.build_image() and docker_image.push_to_gke_registry()): return False # Create a unique id for this run (Note: Using timestamp instead of UUID to # make it easier to deduce the date/time of the run just by looking at the run # run id. This is useful in debugging when looking at records in Biq query) run_id = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') dataset_id = '%s_%s' % (config.global_settings.dataset_id_prefix, run_id) print 'Run id:', run_id print 'Dataset id:', dataset_id bq_helper = BigQueryHelper(run_id, '', '', config.global_settings.gcp_project_id, dataset_id, config.global_settings.summary_table_id, config.global_settings.qps_table_id) bq_helper.initialize() gke = Gke(config.global_settings.gcp_project_id, run_id, dataset_id, config.global_settings.summary_table_id, config.global_settings.qps_table_id, config.global_settings.kubernetes_proxy_port) is_success = True try: print 'Launching servers..' for name, server_pod_spec in config.server_pod_specs_dict.iteritems(): if not gke.launch_servers(server_pod_spec): is_success = False # is_success is checked in the 'finally' block return False print( 'Launched servers. Waiting for %d seconds for the server pods to be ' 'fully online') % config.global_settings.pod_warmup_secs time.sleep(config.global_settings.pod_warmup_secs) for name, client_pod_spec in config.client_pod_specs_dict.iteritems(): if not gke.launch_clients(client_pod_spec): is_success = False # is_success is checked in the 'finally' block return False print( 'Launched all clients. Waiting for %d seconds for the client pods to ' 'be fully online') % config.global_settings.pod_warmup_secs time.sleep(config.global_settings.pod_warmup_secs) start_time = datetime.datetime.now() end_time = start_time + datetime.timedelta( seconds=config.global_settings.test_duration_secs) print 'Running the test until %s' % end_time.isoformat() while True: if datetime.datetime.now() > end_time: print 'Test was run for %d seconds' % config.global_settings.test_duration_secs break # Check if either stress server or clients have failed (btw, the bq_helper # monitors all the rows in the summary table and checks if any of them # have a failure status) if bq_helper.check_if_any_tests_failed(): is_success = False print 'Some tests failed.' break # Don't 'return' here. We still want to call bq_helper to print qps/summary tables # Tests running fine. Wait until next poll time to check the status print 'Sleeping for %d seconds..' % config.global_settings.test_poll_interval_secs time.sleep(config.global_settings.test_poll_interval_secs) # Print BiqQuery tables bq_helper.print_qps_records() bq_helper.print_summary_records() finally: # If there was a test failure, we should not delete the pods since they # would contain useful debug information (logs, core dumps etc) if is_success: for name, server_pod_spec in config.server_pod_specs_dict.iteritems( ): gke.delete_servers(server_pod_spec) for name, client_pod_spec in config.client_pod_specs_dict.iteritems( ): gke.delete_clients(client_pod_spec) return is_success
def run_test_main(test_settings, gke_settings, stress_server_settings, stress_client_clients): is_success = True if test_settings.build_docker_image: is_success = _build_docker_image(gke_settings.docker_image_name, gke_settings.tag_name) if not is_success: return False is_success = _push_docker_image_to_gke_registry(gke_settings.tag_name) if not is_success: return False # Create a unique id for this run (Note: Using timestamp instead of UUID to # make it easier to deduce the date/time of the run just by looking at the run # run id. This is useful in debugging when looking at records in Biq query) run_id = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') dataset_id = '%s_%s' % (_DATASET_ID_PREFIX, run_id) # Big Query settings (common for both Stress Server and Client) bq_settings = BigQuerySettings(run_id, dataset_id, _SUMMARY_TABLE_ID, _QPS_TABLE_ID) bq_helper = BigQueryHelper(run_id, '', '', args.project_id, dataset_id, _SUMMARY_TABLE_ID, _QPS_TABLE_ID) bq_helper.initialize() try: is_success = _launch_server_and_client( gke_settings, stress_server_settings, stress_client_settings, bq_settings, test_settings.kubernetes_proxy_port) if not is_success: return False start_time = datetime.datetime.now() end_time = start_time + datetime.timedelta( seconds=test_settings.test_duration_secs) print 'Running the test until %s' % end_time.isoformat() while True: if datetime.datetime.now() > end_time: print 'Test was run for %d seconds' % test_settings.test_duration_secs break # Check if either stress server or clients have failed if bq_helper.check_if_any_tests_failed(): is_success = False print 'Some tests failed.' break # Things seem to be running fine. Wait until next poll time to check the # status print 'Sleeping for %d seconds..' % test_settings.test_poll_interval_secs time.sleep(test_settings.test_poll_interval_secs) # Print BiqQuery tables bq_helper.print_summary_records() bq_helper.print_qps_records() finally: # If is_success is False at this point, it means that the stress tests were # started successfully but failed while running the tests. In this case we # do should not delete the pods (since they contain all the failure # information) if is_success: _delete_server_and_client(stress_server_settings, stress_client_settings, test_settings.kubernetes_proxy_port) return is_success