Exemple #1
0
    if not jobs:
        print('No jobs to run.')
        for image in docker_images.itervalues():
            dockerjob.remove_image(image, skip_nonexistent=True)
        sys.exit(1)

    num_failures, resultset = jobset.run(jobs,
                                         newline_on_success=True,
                                         maxjobs=args.jobs)
    if num_failures:
        jobset.message('FAILED', 'Some tests failed', do_newline=True)
    else:
        jobset.message('SUCCESS', 'All tests passed', do_newline=True)

    report_utils.render_junit_xml_report(resultset, 'report.xml')

    for name, job in resultset.items():
        if "http2" in name:
            job[0].http2results = aggregate_http2_results(job[0].message)

    report_utils.render_interop_html_report(
        set([str(l)
             for l in languages]), servers, _TEST_CASES, _AUTH_TEST_CASES,
        _HTTP2_TEST_CASES, resultset, num_failures, args.cloud_to_prod_auth
        or args.cloud_to_prod, args.prod_servers, args.http2_interop)

finally:
    # Check if servers are still running.
    for server, job in server_jobs.items():
        if not job.is_running():
                                              six.iteritems(resultset)))
    finally:
      # Consider qps workers that need to be killed as failures
      qps_workers_killed += finish_qps_workers(scenario.workers)

    if perf_cmd and scenario_failures == 0 and not args.skip_generate_flamegraphs:
      workers_and_base_names = {}
      for worker in scenario.workers:
        if not worker.perf_file_base_name:
          raise Exception('using perf buf perf report filename is unspecified')
        workers_and_base_names[worker.host_and_port] = worker.perf_file_base_name
      perf_report_failures += run_collect_perf_profile_jobs(workers_and_base_names, scenario.name)


# Still write the index.html even if some scenarios failed.
# 'profile_output_files' will only have names for scenarios that passed
if perf_cmd and not args.skip_generate_flamegraphs:
  # write the index fil to the output dir, with all profiles from all scenarios/workers
  report_utils.render_perf_profiling_results('%s/index.html' % args.flame_graph_reports, profile_output_files)

report_utils.render_junit_xml_report(merged_resultset, args.xml_report,
                                     suite_name='benchmarks')

if total_scenario_failures > 0 or qps_workers_killed > 0:
  print('%s scenarios failed and %s qps worker jobs killed' % (total_scenario_failures, qps_workers_killed))
  sys.exit(1)

if perf_report_failures > 0:
  print('%s perf profile collection jobs failed' % perf_report_failures)
  sys.exit(1)
def main():
  argp = argparse.ArgumentParser(description='Run performance tests.')
  argp.add_argument('-l', '--language',
                    choices=['all'] + sorted(scenario_config.LANGUAGES.keys()),
                    nargs='+',
                    required=True,
                    help='Languages to benchmark.')
  argp.add_argument('--remote_driver_host',
                    default=None,
                    help='Run QPS driver on given host. By default, QPS driver is run locally.')
  argp.add_argument('--remote_worker_host',
                    nargs='+',
                    default=[],
                    help='Worker hosts where to start QPS workers.')
  argp.add_argument('--dry_run',
                    default=False,
                    action='store_const',
                    const=True,
                    help='Just list scenarios to be run, but don\'t run them.')
  argp.add_argument('-r', '--regex', default='.*', type=str,
                    help='Regex to select scenarios to run.')
  argp.add_argument('--bq_result_table', default=None, type=str,
                    help='Bigquery "dataset.table" to upload results to.')
  argp.add_argument('--category',
                    choices=['smoketest','all','scalable','sweep'],
                    default='all',
                    help='Select a category of tests to run.')
  argp.add_argument('--netperf',
                    default=False,
                    action='store_const',
                    const=True,
                    help='Run netperf benchmark as one of the scenarios.')
  argp.add_argument('--server_cpu_load',
                    default=0, type=int,
                    help='Select a targeted server cpu load to run. 0 means ignore this flag')
  argp.add_argument('-x', '--xml_report', default='report.xml', type=str,
                    help='Name of XML report file to generate.')
  argp.add_argument('--perf_args',
                    help=('Example usage: "--perf_args=record -F 99 -g". '
                          'Wrap QPS workers in a perf command '
                          'with the arguments to perf specified here. '
                          '".svg" flame graph profiles will be '
                          'created for each Qps Worker on each scenario. '
                          'Files will output to "<repo_root>/<args.flame_graph_reports>" '
                          'directory. Output files from running the worker '
                          'under perf are saved in the repo root where its ran. '
                          'Note that the perf "-g" flag is necessary for '
                          'flame graphs generation to work (assuming the binary '
                          'being profiled uses frame pointers, check out '
                          '"--call-graph dwarf" option using libunwind otherwise.) '
                          'Also note that the entire "--perf_args=<arg(s)>" must '
                          'be wrapped in quotes as in the example usage. '
                          'If the "--perg_args" is unspecified, "perf" will '
                          'not be used at all. '
                          'See http://www.brendangregg.com/perf.html '
                          'for more general perf examples.'))
  argp.add_argument('--skip_generate_flamegraphs',
                    default=False,
                    action='store_const',
                    const=True,
                    help=('Turn flame graph generation off. '
                          'May be useful if "perf_args" arguments do not make sense for '
                          'generating flamegraphs (e.g., "--perf_args=stat ...")'))
  argp.add_argument('-f', '--flame_graph_reports', default='perf_reports', type=str,
                    help='Name of directory to output flame graph profiles to, if any are created.')

  args = argp.parse_args()

  languages = set(scenario_config.LANGUAGES[l]
                  for l in itertools.chain.from_iterable(
                        six.iterkeys(scenario_config.LANGUAGES) if x == 'all'
                        else [x] for x in args.language))


  # Put together set of remote hosts where to run and build
  remote_hosts = set()
  if args.remote_worker_host:
    for host in args.remote_worker_host:
      remote_hosts.add(host)
  if args.remote_driver_host:
    remote_hosts.add(args.remote_driver_host)

  if not args.dry_run:
    if remote_hosts:
      archive_repo(languages=[str(l) for l in languages])
      prepare_remote_hosts(remote_hosts, prepare_local=True)
    else:
      prepare_remote_hosts([], prepare_local=True)

  build_local = False
  if not args.remote_driver_host:
    build_local = True
  if not args.dry_run:
    build_on_remote_hosts(remote_hosts, languages=[str(l) for l in languages], build_local=build_local)

  perf_cmd = None
  if args.perf_args:
    print('Running workers under perf profiler')
    # Expect /usr/bin/perf to be installed here, as is usual
    perf_cmd = ['/usr/bin/perf']
    perf_cmd.extend(re.split('\s+', args.perf_args))

  qpsworker_jobs = create_qpsworkers(languages, args.remote_worker_host, perf_cmd=perf_cmd)

  # get list of worker addresses for each language.
  workers_by_lang = dict([(str(language), []) for language in languages])
  for job in qpsworker_jobs:
    workers_by_lang[str(job.language)].append(job)

  scenarios = create_scenarios(languages,
                             workers_by_lang=workers_by_lang,
                             remote_host=args.remote_driver_host,
                             regex=args.regex,
                             category=args.category,
                             bq_result_table=args.bq_result_table,
                             netperf=args.netperf,
                             netperf_hosts=args.remote_worker_host,
                             server_cpu_load=args.server_cpu_load)

  if not scenarios:
    raise Exception('No scenarios to run')

  total_scenario_failures = 0
  qps_workers_killed = 0
  merged_resultset = {}
  perf_report_failures = 0

  for scenario in scenarios:
    if args.dry_run:
      print(scenario.name)
    else:
      scenario_failures = 0
      try:
        for worker in scenario.workers:
          worker.start()
        jobs = [scenario.jobspec]
        if scenario.workers:
          jobs.append(create_quit_jobspec(scenario.workers, remote_host=args.remote_driver_host))
        scenario_failures, resultset = jobset.run(jobs, newline_on_success=True, maxjobs=1)
        total_scenario_failures += scenario_failures
        merged_resultset = dict(itertools.chain(six.iteritems(merged_resultset),
                                                six.iteritems(resultset)))
      finally:
        # Consider qps workers that need to be killed as failures
        qps_workers_killed += finish_qps_workers(scenario.workers, qpsworker_jobs)

      if perf_cmd and scenario_failures == 0 and not args.skip_generate_flamegraphs:
        workers_and_base_names = {}
        for worker in scenario.workers:
          if not worker.perf_file_base_name:
            raise Exception('using perf buf perf report filename is unspecified')
          workers_and_base_names[worker.host_and_port] = worker.perf_file_base_name
        perf_report_failures += run_collect_perf_profile_jobs(workers_and_base_names, scenario.name, args.flame_graph_reports)


  # Still write the index.html even if some scenarios failed.
  # 'profile_output_files' will only have names for scenarios that passed
  if perf_cmd and not args.skip_generate_flamegraphs:
    # write the index fil to the output dir, with all profiles from all scenarios/workers
    report_utils.render_perf_profiling_results('%s/index.html' % args.flame_graph_reports, profile_output_files)

  report_utils.render_junit_xml_report(merged_resultset, args.xml_report,
                                       suite_name='benchmarks')

  if total_scenario_failures > 0 or qps_workers_killed > 0:
    print('%s scenarios failed and %s qps worker jobs killed' % (total_scenario_failures, qps_workers_killed))
    sys.exit(1)

  if perf_report_failures > 0:
    print('%s perf profile collection jobs failed' % perf_report_failures)
    sys.exit(1)
Exemple #4
0
  num_failures, resultset = jobset.run(jobs, newline_on_success=True,
                                       maxjobs=args.jobs,
                                       skip_jobs=args.manual_run)
  if num_failures:
    jobset.message('FAILED', 'Some tests failed', do_newline=True)
  else:
    jobset.message('SUCCESS', 'All tests passed', do_newline=True)

  write_cmdlog_maybe(server_manual_cmd_log, 'interop_server_cmds.sh')
  write_cmdlog_maybe(client_manual_cmd_log, 'interop_client_cmds.sh')

  xml_report_name = _XML_REPORT
  if args.internal_ci:
    xml_report_name = _INTERNAL_CL_XML_REPORT
  report_utils.render_junit_xml_report(resultset, xml_report_name)

  for name, job in resultset.items():
    if "http2" in name:
      job[0].http2results = aggregate_http2_results(job[0].message)

  http2_server_test_cases = (
      _HTTP2_SERVER_TEST_CASES if args.http2_server_interop else [])

  report_utils.render_interop_html_report(
      set([str(l) for l in languages]), servers, _TEST_CASES, _AUTH_TEST_CASES,
      _HTTP2_TEST_CASES, http2_server_test_cases, resultset, num_failures,
      args.cloud_to_prod_auth or args.cloud_to_prod, args.prod_servers,
      args.http2_interop)
  
  if num_failures:
Exemple #5
0
        if perf_cmd and scenario_failures == 0 and not args.skip_generate_flamegraphs:
            workers_and_base_names = {}
            for worker in scenario.workers:
                if not worker.perf_file_base_name:
                    raise Exception(
                        'using perf buf perf report filename is unspecified')
                workers_and_base_names[
                    worker.host_and_port] = worker.perf_file_base_name
            perf_report_failures += run_collect_perf_profile_jobs(
                workers_and_base_names, scenario.name)

# Still write the index.html even if some scenarios failed.
# 'profile_output_files' will only have names for scenarios that passed
if perf_cmd and not args.skip_generate_flamegraphs:
    # write the index fil to the output dir, with all profiles from all scenarios/workers
    report_utils.render_perf_profiling_results(
        '%s/index.html' % args.flame_graph_reports, profile_output_files)

report_utils.render_junit_xml_report(merged_resultset,
                                     args.xml_report,
                                     suite_name='benchmarks')

if total_scenario_failures > 0 or qps_workers_killed > 0:
    print('%s scenarios failed and %s qps worker jobs killed' %
          (total_scenario_failures, qps_workers_killed))
    sys.exit(1)

if perf_report_failures > 0:
    print('%s perf profile collection jobs failed' % perf_report_failures)
    sys.exit(1)
Exemple #6
0
        else:
            print('  %s' % job.shortname)
    print

    if args.dry_run:
        print('--dry_run was used, exiting')
        sys.exit(1)

    jobset.message('START', 'Running test matrix.', do_newline=True)
    num_failures, resultset = jobset.run(jobs,
                                         newline_on_success=True,
                                         travis=True,
                                         maxjobs=args.jobs)
    # Merge skipped tests into results to show skipped tests on report.xml
    if skipped_jobs:
        skipped_results = jobset.run(skipped_jobs, skip_jobs=True)
        resultset.update(skipped_results)
    report_utils.render_junit_xml_report(resultset,
                                         'report.xml',
                                         suite_name='aggregate_tests')

    if num_failures == 0:
        jobset.message('SUCCESS',
                       'All run_tests.py instance finished successfully.',
                       do_newline=True)
    else:
        jobset.message('FAILED',
                       'Some run_tests.py instance have failed.',
                       do_newline=True)
        sys.exit(1)
def run_one_scenario(scenario_config):
    jobset.message('START', 'Run scenario: %s' % scenario_config['name'])
    server_jobs = {}
    server_addresses = {}
    suppress_server_logs = True
    try:
        backend_addrs = []
        fallback_ips = []
        grpclb_ips = []
        shortname_prefix = scenario_config['name']
        # Start backends
        for i in xrange(len(scenario_config['backend_configs'])):
            backend_config = scenario_config['backend_configs'][i]
            backend_shortname = shortname(shortname_prefix, 'backend_server',
                                          i)
            backend_spec = backend_server_jobspec(
                backend_config['transport_sec'], backend_shortname)
            backend_job = dockerjob.DockerJob(backend_spec)
            server_jobs[backend_shortname] = backend_job
            backend_addrs.append(
                '%s:%d' % (backend_job.ip_address(), _BACKEND_SERVER_PORT))
        # Start fallbacks
        for i in xrange(len(scenario_config['fallback_configs'])):
            fallback_config = scenario_config['fallback_configs'][i]
            fallback_shortname = shortname(shortname_prefix, 'fallback_server',
                                           i)
            fallback_spec = fallback_server_jobspec(
                fallback_config['transport_sec'], fallback_shortname)
            fallback_job = dockerjob.DockerJob(fallback_spec)
            server_jobs[fallback_shortname] = fallback_job
            fallback_ips.append(fallback_job.ip_address())
        # Start balancers
        for i in xrange(len(scenario_config['balancer_configs'])):
            balancer_config = scenario_config['balancer_configs'][i]
            grpclb_shortname = shortname(shortname_prefix, 'grpclb_server', i)
            grpclb_spec = grpclb_jobspec(balancer_config['transport_sec'],
                                         balancer_config['short_stream'],
                                         backend_addrs, grpclb_shortname)
            grpclb_job = dockerjob.DockerJob(grpclb_spec)
            server_jobs[grpclb_shortname] = grpclb_job
            grpclb_ips.append(grpclb_job.ip_address())
        # Start DNS server
        dns_server_shortname = shortname(shortname_prefix, 'dns_server', 0)
        dns_server_spec = dns_server_in_docker_jobspec(
            grpclb_ips, fallback_ips, dns_server_shortname,
            scenario_config['cause_no_error_no_data_for_balancer_a_record'])
        dns_server_job = dockerjob.DockerJob(dns_server_spec)
        server_jobs[dns_server_shortname] = dns_server_job
        # Get the IP address of the docker container running the DNS server.
        # The DNS server is running on port 53 of that IP address. Note we will
        # point the DNS resolvers of grpc clients under test to our controlled
        # DNS server by effectively modifying the /etc/resolve.conf "nameserver"
        # lists of their docker containers.
        dns_server_ip = dns_server_job.ip_address()
        wait_until_dns_server_is_up(dns_server_ip)
        # Run clients
        jobs = []
        for lang_name in languages:
            # Skip languages that are known to not currently
            # work for this test.
            if not args.no_skips and lang_name in scenario_config.get(
                    'skip_langs', []):
                jobset.message(
                    'IDLE', 'Skipping scenario: %s for language: %s\n' %
                    (scenario_config['name'], lang_name))
                continue
            lang = _LANGUAGES[lang_name]
            test_job = lb_client_interop_jobspec(
                lang,
                dns_server_ip,
                docker_image=docker_images.get(lang.safename),
                transport_security=scenario_config['transport_sec'])
            jobs.append(test_job)
        jobset.message(
            'IDLE',
            'Jobs to run: \n%s\n' % '\n'.join(str(job) for job in jobs))
        num_failures, resultset = jobset.run(jobs,
                                             newline_on_success=True,
                                             maxjobs=args.jobs)
        report_utils.render_junit_xml_report(resultset, 'sponge_log.xml')
        if num_failures:
            suppress_server_logs = False
            jobset.message('FAILED',
                           'Scenario: %s. Some tests failed' %
                           scenario_config['name'],
                           do_newline=True)
        else:
            jobset.message('SUCCESS',
                           'Scenario: %s. All tests passed' %
                           scenario_config['name'],
                           do_newline=True)
        return num_failures
    finally:
        # Check if servers are still running.
        for server, job in server_jobs.items():
            if not job.is_running():
                print('Server "%s" has exited prematurely.' % server)
        suppress_failure = suppress_server_logs and not args.verbose
        dockerjob.finish_jobs([j for j in six.itervalues(server_jobs)],
                              suppress_failure=suppress_failure)
Exemple #8
0
    print('')

    if args.dry_run:
        print('--dry_run was used, exiting')
        sys.exit(1)

    jobset.message('START', 'Running test matrix.', do_newline=True)
    num_failures, resultset = jobset.run(jobs,
                                         newline_on_success=True,
                                         travis=True,
                                         maxjobs=args.jobs)
    # Merge skipped tests into results to show skipped tests on report.xml
    if skipped_jobs:
        ignored_num_skipped_failures, skipped_results = jobset.run(
            skipped_jobs, skip_jobs=True)
        resultset.update(skipped_results)
    report_utils.render_junit_xml_report(resultset,
                                         _report_filename(_MATRIX_REPORT_NAME),
                                         suite_name=_MATRIX_REPORT_NAME,
                                         multi_target=True)

    if num_failures == 0:
        jobset.message('SUCCESS',
                       'All run_tests.py instances finished successfully.',
                       do_newline=True)
    else:
        jobset.message('FAILED',
                       'Some run_tests.py instances have failed.',
                       do_newline=True)
        sys.exit(1)
Exemple #9
0
  if args.verbose:
    print('Jobs to run: \n%s\n' % '\n'.join(str(job) for job in jobs))

  num_failures, resultset = jobset.run(jobs, newline_on_success=True,
                                       maxjobs=args.jobs,
                                       skip_jobs=args.manual_run)
  if num_failures:
    jobset.message('FAILED', 'Some tests failed', do_newline=True)
  else:
    jobset.message('SUCCESS', 'All tests passed', do_newline=True)

  write_cmdlog_maybe(server_manual_cmd_log, 'interop_server_cmds.sh')
  write_cmdlog_maybe(client_manual_cmd_log, 'interop_client_cmds.sh')

  report_utils.render_junit_xml_report(resultset, 'report.xml')

  for name, job in resultset.items():
    if "http2" in name:
      job[0].http2results = aggregate_http2_results(job[0].message)

  http2_server_test_cases = (
      _HTTP2_SERVER_TEST_CASES if args.http2_server_interop else [])

  report_utils.render_interop_html_report(
      set([str(l) for l in languages]), servers, _TEST_CASES, _AUTH_TEST_CASES,
      _HTTP2_TEST_CASES, http2_server_test_cases, resultset, num_failures,
      args.cloud_to_prod_auth or args.cloud_to_prod, args.prod_servers,
      args.http2_interop)
except Exception as e:
  print('exception occurred:')
Exemple #10
0
    prebuild_jobs += target.pre_build_jobspecs()
if prebuild_jobs:
    num_failures, _ = jobset.run(prebuild_jobs,
                                 newline_on_success=True,
                                 maxjobs=args.jobs)
    if num_failures != 0:
        jobset.message('FAILED', 'Pre-build phase failed.', do_newline=True)
        sys.exit(1)

build_jobs = []
for target in targets:
    build_jobs.append(target.build_jobspec())
if not build_jobs:
    print('Nothing to build.')
    sys.exit(1)

jobset.message('START', 'Building targets.', do_newline=True)
num_failures, resultset = jobset.run(build_jobs,
                                     newline_on_success=True,
                                     maxjobs=args.jobs)
report_utils.render_junit_xml_report(resultset,
                                     'report_taskrunner_sponge_log.xml',
                                     suite_name='tasks')
if num_failures == 0:
    jobset.message('SUCCESS',
                   'All targets built successfully.',
                   do_newline=True)
else:
    jobset.message('FAILED', 'Failed to build targets.', do_newline=True)
    sys.exit(1)
Exemple #11
0
    if args.dry_run:
      print('  %s: "%s"' % (job.shortname, ' '.join(job.cmdline)))
    else:
      print('  %s' % job.shortname)
  print

  if args.dry_run:
    print('--dry_run was used, exiting')
    sys.exit(1)

  jobset.message('START', 'Running test matrix.', do_newline=True)
  num_failures, resultset = jobset.run(jobs,
                                       newline_on_success=True,
                                       travis=True,
                                       maxjobs=args.jobs)
  # Merge skipped tests into results to show skipped tests on report.xml
  if skipped_jobs:
    ignored_num_skipped_failures, skipped_results = jobset.run(
        skipped_jobs, skip_jobs=True)
    resultset.update(skipped_results)
  report_utils.render_junit_xml_report(resultset, _report_filename('aggregate_tests'),
                                       suite_name='aggregate_tests')

  if num_failures == 0:
    jobset.message('SUCCESS', 'All run_tests.py instance finished successfully.',
                   do_newline=True)
  else:
    jobset.message('FAILED', 'Some run_tests.py instance have failed.',
                   do_newline=True)
    sys.exit(1)
Exemple #12
0
            elif test_case == 'secondary_locality_gets_requests_on_primary_failure':
                test_secondary_locality_gets_requests_on_primary_failure(
                    gcp, backend_service, instance_group,
                    secondary_zone_instance_group)
            else:
                logger.error('Unknown test case: %s', test_case)
                sys.exit(1)
            result.state = 'PASSED'
            result.returncode = 0
        except Exception as e:
            result.state = 'FAILED'
            result.message = str(e)
        finally:
            if client_process:
                client_process.terminate()
            # Workaround for Python 3, as report_utils will invoke decode() on
            # result.message, which has a default value of ''.
            result.message = result.message.encode('UTF-8')
            test_results[test_case] = [result]
    if not os.path.exists(_TEST_LOG_BASE_DIR):
        os.makedirs(_TEST_LOG_BASE_DIR)
    report_utils.render_junit_xml_report(test_results,
                                         os.path.join(_TEST_LOG_BASE_DIR,
                                                      _SPONGE_XML_NAME),
                                         suite_name='xds_tests',
                                         multi_target=True)
finally:
    if not args.keep_gcp_resources:
        logger.info('Cleaning up GCP resources. This may take some time.')
        clean_up(gcp)
Exemple #13
0
    if args.dry_run:
      print('  %s: "%s"' % (job.shortname, ' '.join(job.cmdline)))
    else:
      print('  %s' % job.shortname)
  print

  if args.dry_run:
    print('--dry_run was used, exiting')
    sys.exit(1)

  jobset.message('START', 'Running test matrix.', do_newline=True)
  num_failures, resultset = jobset.run(jobs,
                                       newline_on_success=True,
                                       travis=True,
                                       maxjobs=args.jobs)
  # Merge skipped tests into results to show skipped tests on report.xml
  if skipped_jobs:
    skipped_results = jobset.run(skipped_jobs,
                                 skip_jobs=True)
    resultset.update(skipped_results)
  report_utils.render_junit_xml_report(resultset, 'report.xml',
                                       suite_name='aggregate_tests')

  if num_failures == 0:
    jobset.message('SUCCESS', 'All run_tests.py instance finished successfully.',
                   do_newline=True)
  else:
    jobset.message('FAILED', 'Some run_tests.py instance have failed.',
                   do_newline=True)
    sys.exit(1)
Exemple #14
0
    prebuild_jobs += target.pre_build_jobspecs()
if prebuild_jobs:
    num_failures, _ = jobset.run(prebuild_jobs,
                                 newline_on_success=True,
                                 maxjobs=args.jobs)
    if num_failures != 0:
        jobset.message('FAILED', 'Pre-build phase failed.', do_newline=True)
        sys.exit(1)

build_jobs = []
for target in targets:
    build_jobs.append(target.build_jobspec())
if not build_jobs:
    print('Nothing to build.')
    sys.exit(1)

jobset.message('START', 'Building targets.', do_newline=True)
num_failures, resultset = jobset.run(build_jobs,
                                     newline_on_success=True,
                                     maxjobs=args.jobs)
report_utils.render_junit_xml_report(resultset,
                                     args.xml_report,
                                     suite_name='tasks')
if num_failures == 0:
    jobset.message('SUCCESS',
                   'All targets built successfully.',
                   do_newline=True)
else:
    jobset.message('FAILED', 'Failed to build targets.', do_newline=True)
    sys.exit(1)
Exemple #15
0
                                         maxjobs=args.jobs,
                                         skip_jobs=args.manual_run)
    if args.bq_result_table and resultset:
        upload_interop_results_to_bq(resultset, args.bq_result_table, args)
    if num_failures:
        jobset.message('FAILED', 'Some tests failed', do_newline=True)
    else:
        jobset.message('SUCCESS', 'All tests passed', do_newline=True)

    write_cmdlog_maybe(server_manual_cmd_log, 'interop_server_cmds.sh')
    write_cmdlog_maybe(client_manual_cmd_log, 'interop_client_cmds.sh')

    xml_report_name = _XML_REPORT
    if args.internal_ci:
        xml_report_name = _INTERNAL_CL_XML_REPORT
    report_utils.render_junit_xml_report(resultset, xml_report_name)

    for name, job in resultset.items():
        if "http2" in name:
            job[0].http2results = aggregate_http2_results(job[0].message)

    http2_server_test_cases = (_HTTP2_SERVER_TEST_CASES
                               if args.http2_server_interop else [])

    report_utils.render_interop_html_report(
        set([str(l) for l in languages]), servers, _TEST_CASES,
        _AUTH_TEST_CASES, _HTTP2_TEST_CASES, http2_server_test_cases,
        resultset, num_failures, args.cloud_to_prod_auth or args.cloud_to_prod,
        args.prod_servers, args.http2_interop)

    if num_failures:
Exemple #16
0
# Execute pre-build phase
prebuild_jobs = []
for target in targets:
  prebuild_jobs += target.pre_build_jobspecs()
if prebuild_jobs:
  num_failures, _ = jobset.run(
    prebuild_jobs, newline_on_success=True, maxjobs=args.jobs)
  if num_failures != 0:
    jobset.message('FAILED', 'Pre-build phase failed.', do_newline=True)
    sys.exit(1)

build_jobs = []
for target in targets:
  build_jobs.append(target.build_jobspec())
if not build_jobs:
  print('Nothing to build.')
  sys.exit(1)

jobset.message('START', 'Building targets.', do_newline=True)
num_failures, resultset = jobset.run(
    build_jobs, newline_on_success=True, maxjobs=args.jobs)
report_utils.render_junit_xml_report(resultset, 'report_taskrunner_sponge_log.xml',
                                     suite_name='tasks')
if num_failures == 0:
  jobset.message('SUCCESS', 'All targets built successfully.',
                 do_newline=True)
else:
  jobset.message('FAILED', 'Failed to build targets.',
                 do_newline=True)
  sys.exit(1)
def main():
    argp = argparse.ArgumentParser(description='Run performance tests.')
    argp.add_argument(
        '-l',
        '--language',
        choices=['all'] + sorted(scenario_config.LANGUAGES.keys()),
        nargs='+',
        required=True,
        help='Languages to benchmark.')
    argp.add_argument(
        '--remote_driver_host',
        default=None,
        help=
        'Run QPS driver on given host. By default, QPS driver is run locally.')
    argp.add_argument(
        '--remote_worker_host',
        nargs='+',
        default=[],
        help='Worker hosts where to start QPS workers.')
    argp.add_argument(
        '--dry_run',
        default=False,
        action='store_const',
        const=True,
        help='Just list scenarios to be run, but don\'t run them.')
    argp.add_argument(
        '-r',
        '--regex',
        default='.*',
        type=str,
        help='Regex to select scenarios to run.')
    argp.add_argument(
        '--bq_result_table',
        default=None,
        type=str,
        help='Bigquery "dataset.table" to upload results to.')
    argp.add_argument(
        '--category',
        choices=['smoketest', 'all', 'scalable', 'sweep'],
        default='all',
        help='Select a category of tests to run.')
    argp.add_argument(
        '--netperf',
        default=False,
        action='store_const',
        const=True,
        help='Run netperf benchmark as one of the scenarios.')
    argp.add_argument(
        '--server_cpu_load',
        default=0,
        type=int,
        help='Select a targeted server cpu load to run. 0 means ignore this flag'
    )
    argp.add_argument(
        '-x',
        '--xml_report',
        default='report.xml',
        type=str,
        help='Name of XML report file to generate.')
    argp.add_argument(
        '--perf_args',
        help=('Example usage: "--perf_args=record -F 99 -g". '
              'Wrap QPS workers in a perf command '
              'with the arguments to perf specified here. '
              '".svg" flame graph profiles will be '
              'created for each Qps Worker on each scenario. '
              'Files will output to "<repo_root>/<args.flame_graph_reports>" '
              'directory. Output files from running the worker '
              'under perf are saved in the repo root where its ran. '
              'Note that the perf "-g" flag is necessary for '
              'flame graphs generation to work (assuming the binary '
              'being profiled uses frame pointers, check out '
              '"--call-graph dwarf" option using libunwind otherwise.) '
              'Also note that the entire "--perf_args=<arg(s)>" must '
              'be wrapped in quotes as in the example usage. '
              'If the "--perg_args" is unspecified, "perf" will '
              'not be used at all. '
              'See http://www.brendangregg.com/perf.html '
              'for more general perf examples.'))
    argp.add_argument(
        '--skip_generate_flamegraphs',
        default=False,
        action='store_const',
        const=True,
        help=('Turn flame graph generation off. '
              'May be useful if "perf_args" arguments do not make sense for '
              'generating flamegraphs (e.g., "--perf_args=stat ...")'))
    argp.add_argument(
        '-f',
        '--flame_graph_reports',
        default='perf_reports',
        type=str,
        help=
        'Name of directory to output flame graph profiles to, if any are created.'
    )
    argp.add_argument(
        '-u',
        '--remote_host_username',
        default='',
        type=str,
        help='Use a username that isn\'t "Jenkins" to SSH into remote workers.')

    args = argp.parse_args()

    global _REMOTE_HOST_USERNAME
    if args.remote_host_username:
        _REMOTE_HOST_USERNAME = args.remote_host_username

    languages = set(
        scenario_config.LANGUAGES[l]
        for l in itertools.chain.from_iterable(
            six.iterkeys(scenario_config.LANGUAGES) if x == 'all' else [x]
            for x in args.language))

    # Put together set of remote hosts where to run and build
    remote_hosts = set()
    if args.remote_worker_host:
        for host in args.remote_worker_host:
            remote_hosts.add(host)
    if args.remote_driver_host:
        remote_hosts.add(args.remote_driver_host)

    if not args.dry_run:
        if remote_hosts:
            archive_repo(languages=[str(l) for l in languages])
            prepare_remote_hosts(remote_hosts, prepare_local=True)
        else:
            prepare_remote_hosts([], prepare_local=True)

    build_local = False
    if not args.remote_driver_host:
        build_local = True
    if not args.dry_run:
        build_on_remote_hosts(
            remote_hosts,
            languages=[str(l) for l in languages],
            build_local=build_local)

    perf_cmd = None
    if args.perf_args:
        print('Running workers under perf profiler')
        # Expect /usr/bin/perf to be installed here, as is usual
        perf_cmd = ['/usr/bin/perf']
        perf_cmd.extend(re.split('\s+', args.perf_args))

    qpsworker_jobs = create_qpsworkers(
        languages, args.remote_worker_host, perf_cmd=perf_cmd)

    # get list of worker addresses for each language.
    workers_by_lang = dict([(str(language), []) for language in languages])
    for job in qpsworker_jobs:
        workers_by_lang[str(job.language)].append(job)

    scenarios = create_scenarios(
        languages,
        workers_by_lang=workers_by_lang,
        remote_host=args.remote_driver_host,
        regex=args.regex,
        category=args.category,
        bq_result_table=args.bq_result_table,
        netperf=args.netperf,
        netperf_hosts=args.remote_worker_host,
        server_cpu_load=args.server_cpu_load)

    if not scenarios:
        raise Exception('No scenarios to run')

    total_scenario_failures = 0
    qps_workers_killed = 0
    merged_resultset = {}
    perf_report_failures = 0

    for scenario in scenarios:
        if args.dry_run:
            print(scenario.name)
        else:
            scenario_failures = 0
            try:
                for worker in scenario.workers:
                    worker.start()
                jobs = [scenario.jobspec]
                if scenario.workers:
                    # TODO(jtattermusch): ideally the "quit" job won't show up
                    # in the report
                    jobs.append(
                        create_quit_jobspec(
                            scenario.workers,
                            remote_host=args.remote_driver_host))
                scenario_failures, resultset = jobset.run(
                    jobs, newline_on_success=True, maxjobs=1)
                total_scenario_failures += scenario_failures
                merged_resultset = dict(
                    itertools.chain(
                        six.iteritems(merged_resultset),
                        six.iteritems(resultset)))
            finally:
                # Consider qps workers that need to be killed as failures
                qps_workers_killed += finish_qps_workers(
                    scenario.workers, qpsworker_jobs)

            if perf_cmd and scenario_failures == 0 and not args.skip_generate_flamegraphs:
                workers_and_base_names = {}
                for worker in scenario.workers:
                    if not worker.perf_file_base_name:
                        raise Exception(
                            'using perf buf perf report filename is unspecified'
                        )
                    workers_and_base_names[
                        worker.host_and_port] = worker.perf_file_base_name
                perf_report_failures += run_collect_perf_profile_jobs(
                    workers_and_base_names, scenario.name,
                    args.flame_graph_reports)

    # Still write the index.html even if some scenarios failed.
    # 'profile_output_files' will only have names for scenarios that passed
    if perf_cmd and not args.skip_generate_flamegraphs:
        # write the index fil to the output dir, with all profiles from all scenarios/workers
        report_utils.render_perf_profiling_results(
            '%s/index.html' % args.flame_graph_reports, profile_output_files)

    report_utils.render_junit_xml_report(
        merged_resultset,
        args.xml_report,
        suite_name='benchmarks',
        multi_target=True)

    if total_scenario_failures > 0 or qps_workers_killed > 0:
        print('%s scenarios failed and %s qps worker jobs killed' %
              (total_scenario_failures, qps_workers_killed))
        sys.exit(1)

    if perf_report_failures > 0:
        print('%s perf profile collection jobs failed' % perf_report_failures)
        sys.exit(1)
Exemple #18
0
            print('  %s' % job.shortname)
    print

    if args.dry_run:
        print('--dry_run was used, exiting')
        sys.exit(1)

    jobset.message('START', 'Running test matrix.', do_newline=True)
    num_failures, resultset = jobset.run(jobs,
                                         newline_on_success=True,
                                         travis=True,
                                         maxjobs=args.jobs)
    # Merge skipped tests into results to show skipped tests on report.xml
    if skipped_jobs:
        ignored_num_skipped_failures, skipped_results = jobset.run(
            skipped_jobs, skip_jobs=True)
        resultset.update(skipped_results)
    report_utils.render_junit_xml_report(resultset,
                                         _report_filename('aggregate_tests'),
                                         suite_name='aggregate_tests')

    if num_failures == 0:
        jobset.message('SUCCESS',
                       'All run_tests.py instance finished successfully.',
                       do_newline=True)
    else:
        jobset.message('FAILED',
                       'Some run_tests.py instance have failed.',
                       do_newline=True)
        sys.exit(1)
Exemple #19
0
    if args.dry_run:
      print('  %s: "%s"' % (job.shortname, ' '.join(job.cmdline)))
    else:
      print('  %s' % job.shortname)
  print

  if args.dry_run:
    print('--dry_run was used, exiting')
    sys.exit(1)

  jobset.message('START', 'Running test matrix.', do_newline=True)
  num_failures, resultset = jobset.run(jobs,
                                       newline_on_success=True,
                                       travis=True,
                                       maxjobs=args.jobs)
  # Merge skipped tests into results to show skipped tests on report.xml
  if skipped_jobs:
    ignored_num_skipped_failures, skipped_results = jobset.run(
        skipped_jobs, skip_jobs=True)
    resultset.update(skipped_results)
  report_utils.render_junit_xml_report(resultset, 'report_%s' % _REPORT_SUFFIX,
                                       suite_name='aggregate_tests')

  if num_failures == 0:
    jobset.message('SUCCESS', 'All run_tests.py instance finished successfully.',
                   do_newline=True)
  else:
    jobset.message('FAILED', 'Some run_tests.py instance have failed.',
                   do_newline=True)
    sys.exit(1)
Exemple #20
0
            print('  %s' % job.shortname)
    print

    if args.dry_run:
        print('--dry_run was used, exiting')
        sys.exit(1)

    jobset.message('START', 'Running test matrix.', do_newline=True)
    num_failures, resultset = jobset.run(jobs,
                                         newline_on_success=True,
                                         travis=True,
                                         maxjobs=args.jobs)
    # Merge skipped tests into results to show skipped tests on report.xml
    if skipped_jobs:
        ignored_num_skipped_failures, skipped_results = jobset.run(
            skipped_jobs, skip_jobs=True)
        resultset.update(skipped_results)
    report_utils.render_junit_xml_report(resultset,
                                         'report_%s' % _REPORT_SUFFIX,
                                         suite_name='aggregate_tests')

    if num_failures == 0:
        jobset.message('SUCCESS',
                       'All run_tests.py instance finished successfully.',
                       do_newline=True)
    else:
        jobset.message('FAILED',
                       'Some run_tests.py instance have failed.',
                       do_newline=True)
        sys.exit(1)
def run_one_scenario(scenario_config):
    jobset.message('START', 'Run scenario: %s' % scenario_config['name'])
    server_jobs = {}
    server_addresses = {}
    suppress_server_logs = True
    try:
        backend_addrs = []
        fallback_ips = []
        grpclb_ips = []
        shortname_prefix = scenario_config['name']
        # Start backends
        for i in xrange(len(scenario_config['backend_configs'])):
            backend_config = scenario_config['backend_configs'][i]
            backend_shortname = shortname(shortname_prefix, 'backend_server', i)
            backend_spec = backend_server_jobspec(
                backend_config['transport_sec'], backend_shortname)
            backend_job = dockerjob.DockerJob(backend_spec)
            server_jobs[backend_shortname] = backend_job
            backend_addrs.append('%s:%d' % (backend_job.ip_address(),
                                            _BACKEND_SERVER_PORT))
        # Start fallbacks
        for i in xrange(len(scenario_config['fallback_configs'])):
            fallback_config = scenario_config['fallback_configs'][i]
            fallback_shortname = shortname(shortname_prefix, 'fallback_server',
                                           i)
            fallback_spec = fallback_server_jobspec(
                fallback_config['transport_sec'], fallback_shortname)
            fallback_job = dockerjob.DockerJob(fallback_spec)
            server_jobs[fallback_shortname] = fallback_job
            fallback_ips.append(fallback_job.ip_address())
        # Start balancers
        for i in xrange(len(scenario_config['balancer_configs'])):
            balancer_config = scenario_config['balancer_configs'][i]
            grpclb_shortname = shortname(shortname_prefix, 'grpclb_server', i)
            grpclb_spec = grpclb_jobspec(balancer_config['transport_sec'],
                                         balancer_config['short_stream'],
                                         backend_addrs, grpclb_shortname)
            grpclb_job = dockerjob.DockerJob(grpclb_spec)
            server_jobs[grpclb_shortname] = grpclb_job
            grpclb_ips.append(grpclb_job.ip_address())
        # Start DNS server
        dns_server_shortname = shortname(shortname_prefix, 'dns_server', 0)
        dns_server_spec = dns_server_in_docker_jobspec(
            grpclb_ips, fallback_ips, dns_server_shortname,
            scenario_config['cause_no_error_no_data_for_balancer_a_record'])
        dns_server_job = dockerjob.DockerJob(dns_server_spec)
        server_jobs[dns_server_shortname] = dns_server_job
        # Get the IP address of the docker container running the DNS server.
        # The DNS server is running on port 53 of that IP address. Note we will
        # point the DNS resolvers of grpc clients under test to our controlled
        # DNS server by effectively modifying the /etc/resolve.conf "nameserver"
        # lists of their docker containers.
        dns_server_ip = dns_server_job.ip_address()
        wait_until_dns_server_is_up(dns_server_ip)
        # Run clients
        jobs = []
        for lang_name in languages:
            # Skip languages that are known to not currently
            # work for this test.
            if not args.no_skips and lang_name in scenario_config.get(
                    'skip_langs', []):
                jobset.message('IDLE',
                               'Skipping scenario: %s for language: %s\n' %
                               (scenario_config['name'], lang_name))
                continue
            lang = _LANGUAGES[lang_name]
            test_job = lb_client_interop_jobspec(
                lang,
                dns_server_ip,
                docker_image=docker_images.get(lang.safename),
                transport_security=scenario_config['transport_sec'])
            jobs.append(test_job)
        jobset.message('IDLE', 'Jobs to run: \n%s\n' % '\n'.join(
            str(job) for job in jobs))
        num_failures, resultset = jobset.run(
            jobs, newline_on_success=True, maxjobs=args.jobs)
        report_utils.render_junit_xml_report(resultset, 'sponge_log.xml')
        if num_failures:
            suppress_server_logs = False
            jobset.message(
                'FAILED',
                'Scenario: %s. Some tests failed' % scenario_config['name'],
                do_newline=True)
        else:
            jobset.message(
                'SUCCESS',
                'Scenario: %s. All tests passed' % scenario_config['name'],
                do_newline=True)
        return num_failures
    finally:
        # Check if servers are still running.
        for server, job in server_jobs.items():
            if not job.is_running():
                print('Server "%s" has exited prematurely.' % server)
        suppress_failure = suppress_server_logs and not args.verbose
        dockerjob.finish_jobs(
            [j for j in six.itervalues(server_jobs)],
            suppress_failure=suppress_failure)