def SetUpPKB(): """Set globals and environment variables for PKB. After SetUpPKB() returns, it should be possible to call PKB functions, like benchmark_spec.Prepare() or benchmark_spec.Run(). SetUpPKB() also modifies the local file system by creating a temp directory and storing new SSH keys. """ try: _InitializeRunUri() except errors.Error as e: logging.error(e) sys.exit(1) # Initialize logging. vm_util.GenTempDir() if FLAGS.use_pkb_logging: log_util.ConfigureLogging( stderr_log_level=log_util.LOG_LEVELS[FLAGS.log_level], log_path=vm_util.PrependTempDir(LOG_FILE_NAME), run_uri=FLAGS.run_uri, file_log_level=log_util.LOG_LEVELS[FLAGS.file_log_level]) logging.info('PerfKitBenchmarker version: %s', version.VERSION) # Translate deprecated flags and log all provided flag values. disk.WarnAndTranslateDiskFlags() _LogCommandLineFlags() # Register skip pending runs functionality. RegisterSkipPendingRunsCheck(_SkipPendingRunsFile) # Check environment. if not FLAGS.ignore_package_requirements: requirements.CheckBasicRequirements() for executable in REQUIRED_EXECUTABLES: if not vm_util.ExecutableOnPath(executable): raise errors.Setup.MissingExecutableError( 'Could not find required executable "%s"', executable) # Check mutually exclusive flags if FLAGS.run_stage_iterations > 1 and FLAGS.run_stage_time > 0: raise errors.Setup.InvalidFlagConfigurationError( 'Flags run_stage_iterations and run_stage_time are mutually exclusive' ) vm_util.SSHKeyGen() if FLAGS.static_vm_file: with open(FLAGS.static_vm_file) as fp: static_virtual_machine.StaticVirtualMachine.ReadStaticVirtualMachineFile( fp) events.initialization_complete.send(parsed_flags=FLAGS) benchmark_lookup.SetBenchmarkModuleFunction(benchmark_sets.BenchmarkModule) package_lookup.SetPackageModuleFunction(benchmark_sets.PackageModule)
def SubmitJob(self, jarfile, classname, job_poll_interval=None, job_arguments=None, job_stdout_file=None, job_type=None): """See base class.""" if job_type == self.BEAM_JOB_TYPE: full_cmd, base_dir = beam_benchmark_helper.BuildBeamCommand( self.spec, classname, job_arguments) stdout, _, retcode = vm_util.IssueCommand( full_cmd, cwd=base_dir, timeout=FLAGS.beam_it_timeout, raise_on_failure=False) assert retcode == 0, "Integration Test Failed." return worker_machine_type = self.spec.worker_group.vm_spec.machine_type num_workers = self.spec.worker_count max_num_workers = self.spec.worker_count if self.spec.worker_group.disk_spec and \ self.spec.worker_group.disk_spec.disk_size: disk_size_gb = self.spec.worker_group.disk_spec.disk_size elif self.spec.worker_group.vm_spec.boot_disk_size: disk_size_gb = self.spec.worker_group.vm_spec.boot_disk_size else: disk_size_gb = None cmd = [] # Needed to verify java executable is on the path dataflow_executable = 'java' if not vm_util.ExecutableOnPath(dataflow_executable): raise errors.Setup.MissingExecutableError( 'Could not find required executable "%s"' % dataflow_executable) cmd.append(dataflow_executable) cmd.append('-cp') cmd.append(jarfile) cmd.append(classname) cmd += job_arguments cmd.append('--workerMachineType={}'.format(worker_machine_type)) cmd.append('--numWorkers={}'.format(num_workers)) cmd.append('--maxNumWorkers={}'.format(max_num_workers)) if disk_size_gb: cmd.append('--diskSizeGb={}'.format(disk_size_gb)) cmd.append('--defaultWorkerLogLevel={}'.format(FLAGS.dpb_log_level)) stdout, _, _ = vm_util.IssueCommand(cmd)
def _BuildPythonCommand(benchmark_spec, classname, job_arguments): """Constructs Gradle command for Python benchmark. Python integration tests can be invoked from Gradle task `integrationTest`. How Python Gradle command constructed is different from Java. We can use following system properties in commandline: -Dtests: fully qualified class/module name of the test to run. e.g. apache_beam.examples.wordcount_it_test:WordCountIT -Dattr: a set of tests that are annotated by this attribute tag. -DpipelineOptions: a set of pipeline options needed to run Beam job Args: benchmark_spec: The PKB spec for the benchmark to run. classname: The fully qualified class/module name of the test to run. job_arguments: The additional job arguments provided for the run. Returns: cmd: Array holds the execution command. """ cmd = [] gradle_executable = _GetGradleCommand() if not vm_util.ExecutableOnPath(gradle_executable): raise errors.Setup.MissingExecutableError( 'Could not find required executable "%s"' % gradle_executable) cmd.append(gradle_executable) cmd.append('integrationTest') cmd.append('-Dtests={}'.format(classname)) AddModuleArgument(cmd, FLAGS.beam_it_module) AddPythonAttributes(cmd, FLAGS.beam_python_attr) beam_args = job_arguments if job_arguments else [] if benchmark_spec.service_type == dpb_service.DATAFLOW: beam_args.append('"--runner={}"'.format(FLAGS.beam_runner)) sdk_location = FLAGS.beam_python_sdk_location if not sdk_location: tar_list = _FindFiles(_GetBeamPythonDir(), DEFAULT_PYTHON_TAR_PATTERN) if not tar_list: raise RuntimeError('No python sdk tar file is available.') else: sdk_location = tar_list[0] beam_args.append('"--sdk_location={}"'.format(sdk_location)) cmd.append('-DpipelineOptions={}'.format(' '.join(beam_args))) cmd.append('--info') cmd.append('--scan') return cmd
def _BuildPythonCommand(benchmark_spec, modulename, job_arguments): """ Constructs Gradle command for Python benchmark. Python integration tests can be invoked from Gradle task `beam-sdks-python:integrationTest`. How Python Gradle command constructs is different from Java. In order to run tests, we can use following project properties: -Pattr: a nose flag that filters tests by attributes -Ptests: a nose flag that filters tests by name -PpipelineOptions: a set of pipeline options needed to run Beam job Args: benchmark_spec: The PKB spec for the benchmark to run. modulename: The name of the python module to run. job_arguments: The additional job arguments provided for the run. Returns: cmd: Array containg the built command. """ cmd = [] gradle_executable = _GetGradleCommand() if not vm_util.ExecutableOnPath(gradle_executable): raise errors.Setup.MissingExecutableError( 'Could not find required executable "%s"' % gradle_executable) cmd.append(gradle_executable) cmd.append('beam-sdks-python:integrationTest') cmd.append('-Ptests={}'.format(modulename)) cmd.append('-Pattr={}'.format(FLAGS.beam_python_attr)) beam_args = job_arguments if job_arguments else [] if benchmark_spec.service_type == dpb_service.DATAFLOW: beam_args.append('--runner={}'.format(FLAGS.beam_runner)) sdk_location = FLAGS.beam_python_sdk_location if not sdk_location: tar_list = _FindFiles(_GetBeamPythonDir(), DEFAULT_PYTHON_TAR_PATTERN) if not tar_list: raise RuntimeError('No python sdk tar file is available.') else: sdk_location = tar_list[0] beam_args.append('--sdk_location={}'.format(sdk_location)) cmd.append('-PpipelineOptions={}'.format(' '.join(beam_args))) cmd.append('--info') cmd.append('--scan') return cmd
def _BuildMavenCommand(benchmark_spec, classname, job_arguments): """ Constructs a maven command for the benchmark. Args: benchmark_spec: The PKB spec for the benchmark to run. classname: The classname of the class to run. job_arguments: The additional job arguments provided for the run. Returns: cmd: Array containing the built command. """ cmd = [] maven_executable = FLAGS.maven_binary if not vm_util.ExecutableOnPath(maven_executable): raise errors.Setup.MissingExecutableError( 'Could not find required executable "%s"' % maven_executable) cmd.append(maven_executable) cmd.append('-e') cmd.append('verify') cmd.append('-Dit.test={}'.format(classname)) cmd.append('-DskipITs=false') if FLAGS.beam_it_module: cmd.append('-pl') cmd.append(FLAGS.beam_it_module) if FLAGS.beam_it_profile: cmd.append('-P{}'.format(FLAGS.beam_it_profile)) beam_args = job_arguments if job_arguments else [] # Don't add any args when the user overrides beam_runner_profile since it is # expected that they know what they are doing and we can't know what args # to pass since it differs by runner. if (benchmark_spec.service_type == dpb_service.DATAFLOW and (FLAGS.beam_runner_profile is not None and len(FLAGS.beam_runner_profile) > 0)): beam_args.append('"--defaultWorkerLogLevel={}"'.format( FLAGS.dpb_log_level)) AddRunnerProfileMvnArgument(benchmark_spec.service_type, cmd, FLAGS.beam_runner_profile) AddRunnerOptionMvnArgument(benchmark_spec.service_type, beam_args, FLAGS.beam_runner_option) AddExtraMvnProperties(cmd, FLAGS.beam_extra_mvn_properties) cmd.append("-DintegrationTestPipelineOptions=" "[{}]".format(','.join(beam_args))) return cmd
def SetUpPKB(): """Set globals and environment variables for PKB. After SetUpPKB() returns, it should be possible to call PKB functions, like benchmark_spec.Prepare() or benchmark_spec.Run(). SetUpPKB() also modifies the local file system by creating a temp directory and storing new SSH keys. """ if not FLAGS.ignore_package_requirements: requirements.CheckBasicRequirements() for executable in REQUIRED_EXECUTABLES: if not vm_util.ExecutableOnPath(executable): raise errors.Setup.MissingExecutableError( 'Could not find required executable "%s"', executable) if FLAGS.run_uri is None: if stages.PROVISION in FLAGS.run_stage: FLAGS.run_uri = str(uuid.uuid4())[-8:] else: # Attempt to get the last modified run directory. run_uri = vm_util.GetLastRunUri() if run_uri: FLAGS.run_uri = run_uri logging.warning( 'No run_uri specified. Attempting to run the following stages with ' '--run_uri=%s: %s', FLAGS.run_uri, ', '.join(FLAGS.run_stage)) else: raise errors.Setup.NoRunURIError( 'No run_uri specified. Could not run the following stages: %s' % ', '.join(FLAGS.run_stage)) elif not FLAGS.run_uri.isalnum() or len( FLAGS.run_uri) > MAX_RUN_URI_LENGTH: raise errors.Setup.BadRunURIError( 'run_uri must be alphanumeric and less ' 'than or equal to 8 characters in ' 'length.') vm_util.GenTempDir() log_util.ConfigureLogging( stderr_log_level=log_util.LOG_LEVELS[FLAGS.log_level], log_path=vm_util.PrependTempDir(LOG_FILE_NAME), run_uri=FLAGS.run_uri, file_log_level=log_util.LOG_LEVELS[FLAGS.file_log_level]) logging.info('PerfKitBenchmarker version: %s', version.VERSION) vm_util.SSHKeyGen() events.initialization_complete.send(parsed_flags=FLAGS)
def SetUpPKB(): """Set globals and environment variables for PKB. After SetUpPKB() returns, it should be possible to call PKB functions, like benchmark_spec.Prepare() or benchmark_spec.Run(). SetUpPKB() also modifies the local file system by creating a temp directory and storing new SSH keys. """ try: _InitializeRunUri() except errors.Error as e: logging.error(e) sys.exit(1) # Initialize logging. vm_util.GenTempDir() log_util.ConfigureLogging( stderr_log_level=log_util.LOG_LEVELS[FLAGS.log_level], log_path=vm_util.PrependTempDir(LOG_FILE_NAME), run_uri=FLAGS.run_uri, file_log_level=log_util.LOG_LEVELS[FLAGS.file_log_level]) logging.info('PerfKitBenchmarker version: %s', version.VERSION) # Translate deprecated flags and log all provided flag values. disk.WarnAndTranslateDiskFlags() _LogCommandLineFlags() # Check environment. if not FLAGS.ignore_package_requirements: requirements.CheckBasicRequirements() if FLAGS.os_type == os_types.WINDOWS and not vm_util.RunningOnWindows(): logging.error('In order to run benchmarks on Windows VMs, you must be ' 'running on Windows.') sys.exit(1) for executable in REQUIRED_EXECUTABLES: if not vm_util.ExecutableOnPath(executable): raise errors.Setup.MissingExecutableError( 'Could not find required executable "%s"', executable) vm_util.SSHKeyGen() if FLAGS.static_vm_file: with open(FLAGS.static_vm_file) as fp: static_virtual_machine.StaticVirtualMachine.ReadStaticVirtualMachineFile( fp) events.initialization_complete.send(parsed_flags=FLAGS)
def BuildMavenCommand(benchmark_spec, classname, job_arguments): """ Constructs a maven command for the benchmark. Args: benchmark_spec: The PKB spec for the benchmark to run. classname: The classname of the class to run. job_arguments: The additional job arguments provided for the run. Returns: cmd: Array containing the built command. beam_dir: The directory in which to run the command. """ if benchmark_spec.service_type not in SUPPORTED_RUNNERS: raise NotImplementedError('Unsupported Runner') cmd = [] maven_executable = FLAGS.maven_binary if not vm_util.ExecutableOnPath(maven_executable): raise errors.Setup.MissingExecutableError( 'Could not find required executable "%s"' % maven_executable) cmd.append(maven_executable) cmd.append('-e') cmd.append('verify') cmd.append('-Dit.test={}'.format(classname)) cmd.append('-DskipITs=false') if FLAGS.beam_it_module: cmd.append('-pl') cmd.append(FLAGS.beam_it_module) if FLAGS.beam_it_profile: cmd.append('-P{}'.format(FLAGS.beam_it_profile)) beam_args = job_arguments if job_arguments else [] if benchmark_spec.service_type == dpb_service.DATAFLOW: cmd.append('-P{}'.format('dataflow-runner')) beam_args.append('"--runner=org.apache.beam.runners.' 'dataflow.testing.TestDataflowRunner"') beam_args.append('"--defaultWorkerLogLevel={}"'.format(FLAGS.dpb_log_level)) cmd.append("-DintegrationTestPipelineOptions=" "[{}]".format(','.join(beam_args))) # TODO: This is temporary, find a better way. beam_dir = FLAGS.beam_location if FLAGS.beam_location else os.path.join( vm_util.GetTempDir(), 'beam') return cmd, beam_dir
def SubmitJob(self, jarfile, classname, job_poll_interval=None, job_arguments=None, job_stdout_file=None, job_type=None): """See base class.""" worker_machine_type = self.spec.worker_group.vm_spec.machine_type num_workers = self.spec.worker_count max_num_workers = self.spec.worker_count if self.spec.worker_group.disk_spec and \ self.spec.worker_group.disk_spec.disk_size: disk_size_gb = self.spec.worker_group.disk_spec.disk_size elif self.spec.worker_group.vm_spec.boot_disk_size: disk_size_gb = self.spec.worker_group.vm_spec.boot_disk_size else: disk_size_gb = None cmd = [] # Needed to verify java executable is on the path dataflow_executable = 'java' if not vm_util.ExecutableOnPath(dataflow_executable): raise errors.Setup.MissingExecutableError( 'Could not find required executable "%s"' % dataflow_executable) cmd.append(dataflow_executable) cmd.append('-cp') cmd.append(jarfile) cmd.append(classname) cmd += job_arguments cmd.append('--workerMachineType={}'.format(worker_machine_type)) cmd.append('--numWorkers={}'.format(num_workers)) cmd.append('--maxNumWorkers={}'.format(max_num_workers)) if disk_size_gb: cmd.append('--diskSizeGb={}'.format(disk_size_gb)) cmd.append('--defaultWorkerLogLevel={}'.format(FLAGS.dpb_log_level)) stdout, _, _ = vm_util.IssueCommand(cmd)
def _BuildGradleCommand(classname, job_arguments): """ Constructs a Gradle command for the benchmark. Args: classname: The classname of the class to run. job_arguments: The additional job arguments provided for the run. Returns: cmd: Array containing the built command. """ cmd = [] gradle_executable = _GetGradleCommand() if not vm_util.ExecutableOnPath(gradle_executable): raise errors.Setup.MissingExecutableError( 'Could not find required executable "%s"' % gradle_executable) cmd.append(gradle_executable) cmd.append('integrationTest') cmd.append('--tests={}'.format(classname)) beam_args = job_arguments if job_arguments else [] AddModuleArgument(cmd, FLAGS.beam_it_module) AddRunnerArgument(cmd, FLAGS.beam_runner) AddRunnerPipelineOption(beam_args, FLAGS.beam_runner, FLAGS.beam_runner_option) AddFilesystemArgument(cmd, FLAGS.beam_filesystem) AddExtraProperties(cmd, FLAGS.beam_extra_properties) cmd.append('-DintegrationTestPipelineOptions=' '[{}]'.format(','.join(beam_args))) cmd.append('--stacktrace') cmd.append('--info') cmd.append('--scan') return cmd
def _BuildPythonCommand(benchmark_spec, modulename, job_arguments): """ Constructs a Python command for the benchmark. Args: benchmark_spec: The PKB spec for the benchmark to run. modulename: The name of the python module to run. job_arguments: The additional job arguments provided for the run. Returns: cmd: Array containg the built command. """ cmd = [] python_executable = FLAGS.python_binary if not vm_util.ExecutableOnPath(python_executable): raise errors.Setup.MissingExecutableError( 'Could not find required executable "%s"' % python_executable) cmd.append(python_executable) cmd.append('setup.py') cmd.append('nosetests') cmd.append('--tests={}'.format(modulename)) cmd.append('--attr={}'.format(FLAGS.beam_python_attr)) beam_args = job_arguments if job_arguments else [] if benchmark_spec.service_type == dpb_service.DATAFLOW: python_binary = _FindFiles(os.path.join(_GetBeamPythonDir(), 'target'), 'apache-beam*.tar.gz') if len(python_binary) == 0: raise RuntimeError('No python binary is found') beam_args.append('--runner=TestDataflowRunner') beam_args.append('--sdk_location={}'.format(python_binary[0])) cmd.append('--test-pipeline-options=' '{}'.format(' '.join(beam_args))) return cmd
def RunBenchmarks(publish=True): """Runs all benchmarks in PerfKitBenchmarker. Args: publish: A boolean indicating whether results should be published. Returns: Exit status for the process. """ if FLAGS.version: print version.VERSION return for executable in REQUIRED_EXECUTABLES: if not vm_util.ExecutableOnPath(executable): logging.error('Could not find required executable "%s".' % executable) return 1 if FLAGS.run_uri is None: if FLAGS.run_stage not in [STAGE_ALL, STAGE_PREPARE]: # Attempt to get the last modified run directory. run_uri = vm_util.GetLastRunUri() if run_uri: FLAGS.run_uri = run_uri logging.warning( 'No run_uri specified. Attempting to run "%s" with --run_uri=%s.', FLAGS.run_stage, FLAGS.run_uri) else: logging.error('No run_uri specified. Could not run "%s".', FLAGS.run_stage) return 1 else: FLAGS.run_uri = str(uuid.uuid4())[-8:] elif not FLAGS.run_uri.isalnum() or len( FLAGS.run_uri) > MAX_RUN_URI_LENGTH: logging.error('run_uri must be alphanumeric and less than or equal ' 'to 10 characters in length.') return 1 vm_util.GenTempDir() log_util.ConfigureLogging( stderr_log_level=log_util.LOG_LEVELS[FLAGS.log_level], log_path=vm_util.PrependTempDir(LOG_FILE_NAME), run_uri=FLAGS.run_uri) _LogCommandLineFlags() if FLAGS.os_type == benchmark_spec.WINDOWS and not vm_util.RunningOnWindows( ): logging.error('In order to run benchmarks on Windows VMs, you must be ' 'running on Windows.') return 1 vm_util.SSHKeyGen() collector = SampleCollector() events.initialization_complete.send(parsed_flags=FLAGS) if FLAGS.static_vm_file: with open(FLAGS.static_vm_file) as fp: static_virtual_machine.StaticVirtualMachine.ReadStaticVirtualMachineFile( fp) if FLAGS.benchmark_config_pair: # Convert benchmark_config_pair into a {benchmark_name: file_name} # dictionary. tmp_dict = {} for config_pair in FLAGS.benchmark_config_pair: pair = config_pair.split(':') tmp_dict[pair[0]] = pair[1] FLAGS.benchmark_config_pair = tmp_dict try: benchmark_list = benchmark_sets.GetBenchmarksFromFlags() total_benchmarks = len(benchmark_list) if FLAGS.parallelism > 1: sequence_range = range(total_benchmarks, 0, -1) args = [((benchmark, collector, sequence_counter, total_benchmarks), {}) for benchmark, sequence_counter in zip( benchmark_list, sequence_range)] vm_util.RunThreaded(RunBenchmark, args, max_concurrent_threads=FLAGS.parallelism) else: sequence_range = range(1, total_benchmarks + 1) for benchmark, sequence_counter in zip(benchmark_list, sequence_range): RunBenchmark(benchmark, collector, sequence_counter, total_benchmarks) finally: if collector.samples: collector.PublishSamples() logging.info('Complete logs can be found at: %s', vm_util.PrependTempDir(LOG_FILE_NAME)) if FLAGS.run_stage not in [STAGE_ALL, STAGE_CLEANUP]: logging.info('To run again with this setup, please use --run_uri=%s', FLAGS.run_uri)