Exemple #1
0
def SetUpPKB():
    """Set globals and environment variables for PKB.

  After SetUpPKB() returns, it should be possible to call PKB
  functions, like benchmark_spec.Prepare() or benchmark_spec.Run().

  SetUpPKB() also modifies the local file system by creating a temp
  directory and storing new SSH keys.
  """
    try:
        _InitializeRunUri()
    except errors.Error as e:
        logging.error(e)
        sys.exit(1)

    # Initialize logging.
    vm_util.GenTempDir()
    if FLAGS.use_pkb_logging:
        log_util.ConfigureLogging(
            stderr_log_level=log_util.LOG_LEVELS[FLAGS.log_level],
            log_path=vm_util.PrependTempDir(LOG_FILE_NAME),
            run_uri=FLAGS.run_uri,
            file_log_level=log_util.LOG_LEVELS[FLAGS.file_log_level])
    logging.info('PerfKitBenchmarker version: %s', version.VERSION)

    # Translate deprecated flags and log all provided flag values.
    disk.WarnAndTranslateDiskFlags()
    _LogCommandLineFlags()

    # Register skip pending runs functionality.
    RegisterSkipPendingRunsCheck(_SkipPendingRunsFile)

    # Check environment.
    if not FLAGS.ignore_package_requirements:
        requirements.CheckBasicRequirements()

    for executable in REQUIRED_EXECUTABLES:
        if not vm_util.ExecutableOnPath(executable):
            raise errors.Setup.MissingExecutableError(
                'Could not find required executable "%s"', executable)

    # Check mutually exclusive flags
    if FLAGS.run_stage_iterations > 1 and FLAGS.run_stage_time > 0:
        raise errors.Setup.InvalidFlagConfigurationError(
            'Flags run_stage_iterations and run_stage_time are mutually exclusive'
        )

    vm_util.SSHKeyGen()

    if FLAGS.static_vm_file:
        with open(FLAGS.static_vm_file) as fp:
            static_virtual_machine.StaticVirtualMachine.ReadStaticVirtualMachineFile(
                fp)

    events.initialization_complete.send(parsed_flags=FLAGS)

    benchmark_lookup.SetBenchmarkModuleFunction(benchmark_sets.BenchmarkModule)
    package_lookup.SetPackageModuleFunction(benchmark_sets.PackageModule)
Exemple #2
0
    def SubmitJob(self,
                  jarfile,
                  classname,
                  job_poll_interval=None,
                  job_arguments=None,
                  job_stdout_file=None,
                  job_type=None):
        """See base class."""

        if job_type == self.BEAM_JOB_TYPE:
            full_cmd, base_dir = beam_benchmark_helper.BuildBeamCommand(
                self.spec, classname, job_arguments)
            stdout, _, retcode = vm_util.IssueCommand(
                full_cmd,
                cwd=base_dir,
                timeout=FLAGS.beam_it_timeout,
                raise_on_failure=False)
            assert retcode == 0, "Integration Test Failed."
            return

        worker_machine_type = self.spec.worker_group.vm_spec.machine_type
        num_workers = self.spec.worker_count
        max_num_workers = self.spec.worker_count
        if self.spec.worker_group.disk_spec and \
                self.spec.worker_group.disk_spec.disk_size:
            disk_size_gb = self.spec.worker_group.disk_spec.disk_size
        elif self.spec.worker_group.vm_spec.boot_disk_size:
            disk_size_gb = self.spec.worker_group.vm_spec.boot_disk_size
        else:
            disk_size_gb = None

        cmd = []

        # Needed to verify java executable is on the path
        dataflow_executable = 'java'
        if not vm_util.ExecutableOnPath(dataflow_executable):
            raise errors.Setup.MissingExecutableError(
                'Could not find required executable "%s"' %
                dataflow_executable)
        cmd.append(dataflow_executable)

        cmd.append('-cp')
        cmd.append(jarfile)

        cmd.append(classname)
        cmd += job_arguments

        cmd.append('--workerMachineType={}'.format(worker_machine_type))
        cmd.append('--numWorkers={}'.format(num_workers))
        cmd.append('--maxNumWorkers={}'.format(max_num_workers))

        if disk_size_gb:
            cmd.append('--diskSizeGb={}'.format(disk_size_gb))
        cmd.append('--defaultWorkerLogLevel={}'.format(FLAGS.dpb_log_level))
        stdout, _, _ = vm_util.IssueCommand(cmd)
def _BuildPythonCommand(benchmark_spec, classname, job_arguments):
    """Constructs Gradle command for Python benchmark.

  Python integration tests can be invoked from Gradle task
  `integrationTest`. How Python Gradle command constructed
  is different from Java. We can use following system properties
  in commandline:

    -Dtests: fully qualified class/module name of the test to run.
      e.g. apache_beam.examples.wordcount_it_test:WordCountIT
    -Dattr: a set of tests that are annotated by this attribute tag.
    -DpipelineOptions: a set of pipeline options needed to run Beam job

  Args:
    benchmark_spec: The PKB spec for the benchmark to run.
    classname: The fully qualified class/module name of the test to run.
    job_arguments: The additional job arguments provided for the run.

  Returns:
    cmd: Array holds the execution command.
  """

    cmd = []

    gradle_executable = _GetGradleCommand()

    if not vm_util.ExecutableOnPath(gradle_executable):
        raise errors.Setup.MissingExecutableError(
            'Could not find required executable "%s"' % gradle_executable)

    cmd.append(gradle_executable)
    cmd.append('integrationTest')
    cmd.append('-Dtests={}'.format(classname))
    AddModuleArgument(cmd, FLAGS.beam_it_module)
    AddPythonAttributes(cmd, FLAGS.beam_python_attr)

    beam_args = job_arguments if job_arguments else []
    if benchmark_spec.service_type == dpb_service.DATAFLOW:
        beam_args.append('"--runner={}"'.format(FLAGS.beam_runner))

        sdk_location = FLAGS.beam_python_sdk_location
        if not sdk_location:
            tar_list = _FindFiles(_GetBeamPythonDir(),
                                  DEFAULT_PYTHON_TAR_PATTERN)
            if not tar_list:
                raise RuntimeError('No python sdk tar file is available.')
            else:
                sdk_location = tar_list[0]
        beam_args.append('"--sdk_location={}"'.format(sdk_location))
    cmd.append('-DpipelineOptions={}'.format(' '.join(beam_args)))

    cmd.append('--info')
    cmd.append('--scan')

    return cmd
def _BuildPythonCommand(benchmark_spec, modulename, job_arguments):
    """ Constructs Gradle command for Python benchmark.

  Python integration tests can be invoked from Gradle task
  `beam-sdks-python:integrationTest`. How Python Gradle command constructs
  is different from Java. In order to run tests, we can use following
  project properties:

    -Pattr: a nose flag that filters tests by attributes
    -Ptests: a nose flag that filters tests by name
    -PpipelineOptions: a set of pipeline options needed to run Beam job

  Args:
    benchmark_spec: The PKB spec for the benchmark to run.
    modulename: The name of the python module to run.
    job_arguments: The additional job arguments provided for the run.

  Returns:
    cmd: Array containg the built command.
  """

    cmd = []

    gradle_executable = _GetGradleCommand()

    if not vm_util.ExecutableOnPath(gradle_executable):
        raise errors.Setup.MissingExecutableError(
            'Could not find required executable "%s"' % gradle_executable)

    cmd.append(gradle_executable)
    cmd.append('beam-sdks-python:integrationTest')
    cmd.append('-Ptests={}'.format(modulename))
    cmd.append('-Pattr={}'.format(FLAGS.beam_python_attr))

    beam_args = job_arguments if job_arguments else []

    if benchmark_spec.service_type == dpb_service.DATAFLOW:
        beam_args.append('--runner={}'.format(FLAGS.beam_runner))

        sdk_location = FLAGS.beam_python_sdk_location
        if not sdk_location:
            tar_list = _FindFiles(_GetBeamPythonDir(),
                                  DEFAULT_PYTHON_TAR_PATTERN)
            if not tar_list:
                raise RuntimeError('No python sdk tar file is available.')
            else:
                sdk_location = tar_list[0]
        beam_args.append('--sdk_location={}'.format(sdk_location))

    cmd.append('-PpipelineOptions={}'.format(' '.join(beam_args)))
    cmd.append('--info')
    cmd.append('--scan')

    return cmd
Exemple #5
0
def _BuildMavenCommand(benchmark_spec, classname, job_arguments):
    """ Constructs a maven command for the benchmark.

  Args:
    benchmark_spec: The PKB spec for the benchmark to run.
    classname: The classname of the class to run.
    job_arguments: The additional job arguments provided for the run.

  Returns:
    cmd: Array containing the built command.
  """
    cmd = []
    maven_executable = FLAGS.maven_binary

    if not vm_util.ExecutableOnPath(maven_executable):
        raise errors.Setup.MissingExecutableError(
            'Could not find required executable "%s"' % maven_executable)
    cmd.append(maven_executable)

    cmd.append('-e')
    cmd.append('verify')
    cmd.append('-Dit.test={}'.format(classname))
    cmd.append('-DskipITs=false')

    if FLAGS.beam_it_module:
        cmd.append('-pl')
        cmd.append(FLAGS.beam_it_module)

    if FLAGS.beam_it_profile:
        cmd.append('-P{}'.format(FLAGS.beam_it_profile))

    beam_args = job_arguments if job_arguments else []

    # Don't add any args when the user overrides beam_runner_profile since it is
    # expected that they know what they are doing and we can't know what args
    # to pass since it differs by runner.
    if (benchmark_spec.service_type == dpb_service.DATAFLOW
            and (FLAGS.beam_runner_profile is not None
                 and len(FLAGS.beam_runner_profile) > 0)):
        beam_args.append('"--defaultWorkerLogLevel={}"'.format(
            FLAGS.dpb_log_level))

    AddRunnerProfileMvnArgument(benchmark_spec.service_type, cmd,
                                FLAGS.beam_runner_profile)
    AddRunnerOptionMvnArgument(benchmark_spec.service_type, beam_args,
                               FLAGS.beam_runner_option)
    AddExtraMvnProperties(cmd, FLAGS.beam_extra_mvn_properties)

    cmd.append("-DintegrationTestPipelineOptions="
               "[{}]".format(','.join(beam_args)))

    return cmd
Exemple #6
0
def SetUpPKB():
    """Set globals and environment variables for PKB.

  After SetUpPKB() returns, it should be possible to call PKB
  functions, like benchmark_spec.Prepare() or benchmark_spec.Run().

  SetUpPKB() also modifies the local file system by creating a temp
  directory and storing new SSH keys.
  """
    if not FLAGS.ignore_package_requirements:
        requirements.CheckBasicRequirements()

    for executable in REQUIRED_EXECUTABLES:
        if not vm_util.ExecutableOnPath(executable):
            raise errors.Setup.MissingExecutableError(
                'Could not find required executable "%s"', executable)

    if FLAGS.run_uri is None:
        if stages.PROVISION in FLAGS.run_stage:
            FLAGS.run_uri = str(uuid.uuid4())[-8:]
        else:
            # Attempt to get the last modified run directory.
            run_uri = vm_util.GetLastRunUri()
            if run_uri:
                FLAGS.run_uri = run_uri
                logging.warning(
                    'No run_uri specified. Attempting to run the following stages with '
                    '--run_uri=%s: %s', FLAGS.run_uri,
                    ', '.join(FLAGS.run_stage))
            else:
                raise errors.Setup.NoRunURIError(
                    'No run_uri specified. Could not run the following stages: %s'
                    % ', '.join(FLAGS.run_stage))
    elif not FLAGS.run_uri.isalnum() or len(
            FLAGS.run_uri) > MAX_RUN_URI_LENGTH:
        raise errors.Setup.BadRunURIError(
            'run_uri must be alphanumeric and less '
            'than or equal to 8 characters in '
            'length.')

    vm_util.GenTempDir()
    log_util.ConfigureLogging(
        stderr_log_level=log_util.LOG_LEVELS[FLAGS.log_level],
        log_path=vm_util.PrependTempDir(LOG_FILE_NAME),
        run_uri=FLAGS.run_uri,
        file_log_level=log_util.LOG_LEVELS[FLAGS.file_log_level])
    logging.info('PerfKitBenchmarker version: %s', version.VERSION)

    vm_util.SSHKeyGen()

    events.initialization_complete.send(parsed_flags=FLAGS)
Exemple #7
0
def SetUpPKB():
    """Set globals and environment variables for PKB.

  After SetUpPKB() returns, it should be possible to call PKB
  functions, like benchmark_spec.Prepare() or benchmark_spec.Run().

  SetUpPKB() also modifies the local file system by creating a temp
  directory and storing new SSH keys.
  """
    try:
        _InitializeRunUri()
    except errors.Error as e:
        logging.error(e)
        sys.exit(1)

    # Initialize logging.
    vm_util.GenTempDir()
    log_util.ConfigureLogging(
        stderr_log_level=log_util.LOG_LEVELS[FLAGS.log_level],
        log_path=vm_util.PrependTempDir(LOG_FILE_NAME),
        run_uri=FLAGS.run_uri,
        file_log_level=log_util.LOG_LEVELS[FLAGS.file_log_level])
    logging.info('PerfKitBenchmarker version: %s', version.VERSION)

    # Translate deprecated flags and log all provided flag values.
    disk.WarnAndTranslateDiskFlags()
    _LogCommandLineFlags()

    # Check environment.
    if not FLAGS.ignore_package_requirements:
        requirements.CheckBasicRequirements()

    if FLAGS.os_type == os_types.WINDOWS and not vm_util.RunningOnWindows():
        logging.error('In order to run benchmarks on Windows VMs, you must be '
                      'running on Windows.')
        sys.exit(1)

    for executable in REQUIRED_EXECUTABLES:
        if not vm_util.ExecutableOnPath(executable):
            raise errors.Setup.MissingExecutableError(
                'Could not find required executable "%s"', executable)

    vm_util.SSHKeyGen()

    if FLAGS.static_vm_file:
        with open(FLAGS.static_vm_file) as fp:
            static_virtual_machine.StaticVirtualMachine.ReadStaticVirtualMachineFile(
                fp)

    events.initialization_complete.send(parsed_flags=FLAGS)
Exemple #8
0
def BuildMavenCommand(benchmark_spec, classname, job_arguments):
  """ Constructs a maven command for the benchmark.

  Args:
    benchmark_spec: The PKB spec for the benchmark to run.
    classname: The classname of the class to run.
    job_arguments: The additional job arguments provided for the run.

  Returns:
    cmd: Array containing the built command.
    beam_dir: The directory in which to run the command.
  """
  if benchmark_spec.service_type not in SUPPORTED_RUNNERS:
    raise NotImplementedError('Unsupported Runner')

  cmd = []
  maven_executable = FLAGS.maven_binary

  if not vm_util.ExecutableOnPath(maven_executable):
    raise errors.Setup.MissingExecutableError(
        'Could not find required executable "%s"' % maven_executable)
  cmd.append(maven_executable)

  cmd.append('-e')
  cmd.append('verify')
  cmd.append('-Dit.test={}'.format(classname))
  cmd.append('-DskipITs=false')

  if FLAGS.beam_it_module:
    cmd.append('-pl')
    cmd.append(FLAGS.beam_it_module)

  if FLAGS.beam_it_profile:
    cmd.append('-P{}'.format(FLAGS.beam_it_profile))

  beam_args = job_arguments if job_arguments else []

  if benchmark_spec.service_type == dpb_service.DATAFLOW:
    cmd.append('-P{}'.format('dataflow-runner'))
    beam_args.append('"--runner=org.apache.beam.runners.'
                     'dataflow.testing.TestDataflowRunner"')
    beam_args.append('"--defaultWorkerLogLevel={}"'.format(FLAGS.dpb_log_level))

  cmd.append("-DintegrationTestPipelineOptions="
             "[{}]".format(','.join(beam_args)))

  # TODO: This is temporary, find a better way.
  beam_dir = FLAGS.beam_location if FLAGS.beam_location else os.path.join(
      vm_util.GetTempDir(), 'beam')
  return cmd, beam_dir
    def SubmitJob(self,
                  jarfile,
                  classname,
                  job_poll_interval=None,
                  job_arguments=None,
                  job_stdout_file=None,
                  job_type=None):
        """See base class."""
        worker_machine_type = self.spec.worker_group.vm_spec.machine_type
        num_workers = self.spec.worker_count
        max_num_workers = self.spec.worker_count
        if self.spec.worker_group.disk_spec and \
                self.spec.worker_group.disk_spec.disk_size:
            disk_size_gb = self.spec.worker_group.disk_spec.disk_size
        elif self.spec.worker_group.vm_spec.boot_disk_size:
            disk_size_gb = self.spec.worker_group.vm_spec.boot_disk_size
        else:
            disk_size_gb = None

        cmd = []

        # Needed to verify java executable is on the path
        dataflow_executable = 'java'
        if not vm_util.ExecutableOnPath(dataflow_executable):
            raise errors.Setup.MissingExecutableError(
                'Could not find required executable "%s"' %
                dataflow_executable)
        cmd.append(dataflow_executable)

        cmd.append('-cp')
        cmd.append(jarfile)

        cmd.append(classname)
        cmd += job_arguments

        cmd.append('--workerMachineType={}'.format(worker_machine_type))
        cmd.append('--numWorkers={}'.format(num_workers))
        cmd.append('--maxNumWorkers={}'.format(max_num_workers))

        if disk_size_gb:
            cmd.append('--diskSizeGb={}'.format(disk_size_gb))
        cmd.append('--defaultWorkerLogLevel={}'.format(FLAGS.dpb_log_level))
        stdout, _, _ = vm_util.IssueCommand(cmd)
def _BuildGradleCommand(classname, job_arguments):
    """ Constructs a Gradle command for the benchmark.

  Args:
    classname: The classname of the class to run.
    job_arguments: The additional job arguments provided for the run.

  Returns:
    cmd: Array containing the built command.
  """
    cmd = []

    gradle_executable = _GetGradleCommand()

    if not vm_util.ExecutableOnPath(gradle_executable):
        raise errors.Setup.MissingExecutableError(
            'Could not find required executable "%s"' % gradle_executable)

    cmd.append(gradle_executable)
    cmd.append('integrationTest')
    cmd.append('--tests={}'.format(classname))

    beam_args = job_arguments if job_arguments else []

    AddModuleArgument(cmd, FLAGS.beam_it_module)
    AddRunnerArgument(cmd, FLAGS.beam_runner)
    AddRunnerPipelineOption(beam_args, FLAGS.beam_runner,
                            FLAGS.beam_runner_option)
    AddFilesystemArgument(cmd, FLAGS.beam_filesystem)
    AddExtraProperties(cmd, FLAGS.beam_extra_properties)

    cmd.append('-DintegrationTestPipelineOptions='
               '[{}]'.format(','.join(beam_args)))

    cmd.append('--stacktrace')
    cmd.append('--info')
    cmd.append('--scan')

    return cmd
Exemple #11
0
def _BuildPythonCommand(benchmark_spec, modulename, job_arguments):
    """ Constructs a Python command for the benchmark.

  Args:
    benchmark_spec: The PKB spec for the benchmark to run.
    modulename: The name of the python module to run.
    job_arguments: The additional job arguments provided for the run.

  Returns:
    cmd: Array containg the built command.
  """

    cmd = []

    python_executable = FLAGS.python_binary
    if not vm_util.ExecutableOnPath(python_executable):
        raise errors.Setup.MissingExecutableError(
            'Could not find required executable "%s"' % python_executable)
    cmd.append(python_executable)

    cmd.append('setup.py')
    cmd.append('nosetests')
    cmd.append('--tests={}'.format(modulename))
    cmd.append('--attr={}'.format(FLAGS.beam_python_attr))

    beam_args = job_arguments if job_arguments else []

    if benchmark_spec.service_type == dpb_service.DATAFLOW:
        python_binary = _FindFiles(os.path.join(_GetBeamPythonDir(), 'target'),
                                   'apache-beam*.tar.gz')
        if len(python_binary) == 0:
            raise RuntimeError('No python binary is found')

        beam_args.append('--runner=TestDataflowRunner')
        beam_args.append('--sdk_location={}'.format(python_binary[0]))

    cmd.append('--test-pipeline-options=' '{}'.format(' '.join(beam_args)))

    return cmd
Exemple #12
0
def RunBenchmarks(publish=True):
    """Runs all benchmarks in PerfKitBenchmarker.

  Args:
    publish: A boolean indicating whether results should be published.

  Returns:
    Exit status for the process.
  """
    if FLAGS.version:
        print version.VERSION
        return

    for executable in REQUIRED_EXECUTABLES:
        if not vm_util.ExecutableOnPath(executable):
            logging.error('Could not find required executable "%s".' %
                          executable)
            return 1

    if FLAGS.run_uri is None:
        if FLAGS.run_stage not in [STAGE_ALL, STAGE_PREPARE]:
            # Attempt to get the last modified run directory.
            run_uri = vm_util.GetLastRunUri()
            if run_uri:
                FLAGS.run_uri = run_uri
                logging.warning(
                    'No run_uri specified. Attempting to run "%s" with --run_uri=%s.',
                    FLAGS.run_stage, FLAGS.run_uri)
            else:
                logging.error('No run_uri specified. Could not run "%s".',
                              FLAGS.run_stage)
                return 1
        else:
            FLAGS.run_uri = str(uuid.uuid4())[-8:]
    elif not FLAGS.run_uri.isalnum() or len(
            FLAGS.run_uri) > MAX_RUN_URI_LENGTH:
        logging.error('run_uri must be alphanumeric and less than or equal '
                      'to 10 characters in length.')
        return 1

    vm_util.GenTempDir()
    log_util.ConfigureLogging(
        stderr_log_level=log_util.LOG_LEVELS[FLAGS.log_level],
        log_path=vm_util.PrependTempDir(LOG_FILE_NAME),
        run_uri=FLAGS.run_uri)
    _LogCommandLineFlags()

    if FLAGS.os_type == benchmark_spec.WINDOWS and not vm_util.RunningOnWindows(
    ):
        logging.error('In order to run benchmarks on Windows VMs, you must be '
                      'running on Windows.')
        return 1

    vm_util.SSHKeyGen()
    collector = SampleCollector()
    events.initialization_complete.send(parsed_flags=FLAGS)

    if FLAGS.static_vm_file:
        with open(FLAGS.static_vm_file) as fp:
            static_virtual_machine.StaticVirtualMachine.ReadStaticVirtualMachineFile(
                fp)

    if FLAGS.benchmark_config_pair:
        # Convert benchmark_config_pair into a {benchmark_name: file_name}
        # dictionary.
        tmp_dict = {}
        for config_pair in FLAGS.benchmark_config_pair:
            pair = config_pair.split(':')
            tmp_dict[pair[0]] = pair[1]
        FLAGS.benchmark_config_pair = tmp_dict

    try:
        benchmark_list = benchmark_sets.GetBenchmarksFromFlags()
        total_benchmarks = len(benchmark_list)
        if FLAGS.parallelism > 1:
            sequence_range = range(total_benchmarks, 0, -1)
            args = [((benchmark, collector, sequence_counter,
                      total_benchmarks), {})
                    for benchmark, sequence_counter in zip(
                        benchmark_list, sequence_range)]
            vm_util.RunThreaded(RunBenchmark,
                                args,
                                max_concurrent_threads=FLAGS.parallelism)
        else:
            sequence_range = range(1, total_benchmarks + 1)
            for benchmark, sequence_counter in zip(benchmark_list,
                                                   sequence_range):
                RunBenchmark(benchmark, collector, sequence_counter,
                             total_benchmarks)
    finally:
        if collector.samples:
            collector.PublishSamples()

        logging.info('Complete logs can be found at: %s',
                     vm_util.PrependTempDir(LOG_FILE_NAME))

    if FLAGS.run_stage not in [STAGE_ALL, STAGE_CLEANUP]:
        logging.info('To run again with this setup, please use --run_uri=%s',
                     FLAGS.run_uri)