Example #1
0
def Install(vm):
    """Installs NVIDIA CUDA Deep Neural Network library."""
    src_path = data.ResourcePath(FLAGS.cudnn)
    dest_path = os.path.join('/tmp', FLAGS.cudnn)
    vm.RemoteCopy(src_path, dest_path)
    vm.RemoteCommand('tar -zxf %s' % dest_path, should_log=True)
    vm.RemoteCommand('sudo cp cuda/lib64/* %s/lib64/' %
                     CUDA_TOOLKIT_INSTALL_DIR)
    vm.RemoteCommand('sudo cp cuda/include/cudnn.h %s/include/' %
                     CUDA_TOOLKIT_INSTALL_DIR)
Example #2
0
def _PrepareWithIsoFile(vm, speccpu_vm_state):
  """Prepares the VM to run using the iso file.

  Copies the iso to the VM, mounts it, and extracts the contents. Copies the
  config file to the VM. Runs the SPEC install.sh script on the VM.

  Args:
    vm: BaseVirtualMachine. Recipient of the iso file.
    speccpu_vm_state: SpecInstallConfigurations. Modified by this function to
        reflect any changes to the VM that may need to be cleaned up.
  """
  scratch_dir = vm.GetScratchDir()

  # Make cpu2006 or cpu2017 directory on the VM.
  vm.RemoteCommand('mkdir {0}'.format(speccpu_vm_state.spec_dir))

  # Copy the iso to the VM.
  local_iso_file_path = data.ResourcePath(speccpu_vm_state.base_iso_file_path)
  vm.PushFile(local_iso_file_path, scratch_dir)

  # Extract files from the iso to the cpu2006 or cpu2017 directory.
  vm.RemoteCommand('mkdir {0}'.format(speccpu_vm_state.mount_dir))
  vm.RemoteCommand('sudo mount -t iso9660 -o loop {0} {1}'.format(
      speccpu_vm_state.iso_file_path, speccpu_vm_state.mount_dir))
  vm.RemoteCommand('cp -r {0}/* {1}'.format(speccpu_vm_state.mount_dir,
                                            speccpu_vm_state.spec_dir))

  # cpu2017 iso does not come with config directory nor clang.xml
  if speccpu_vm_state.clang_flag_file_path:
    vm.RemoteCommand('mkdir -p {0}'.format(
        os.path.dirname(speccpu_vm_state.clang_flag_file_path)))
    vm.PushFile(data.ResourcePath(speccpu_vm_state.base_clang_flag_file_path),
                speccpu_vm_state.clang_flag_file_path)

  vm.RemoteCommand('chmod -R 777 {0}'.format(speccpu_vm_state.spec_dir))

  # Copy the cfg to the VM.
  local_cfg_file_path = data.ResourcePath(speccpu_vm_state.runspec_config)
  vm.PushFile(local_cfg_file_path, speccpu_vm_state.cfg_file_path)

  # Run SPEC CPU2006 or 2017 installation.
  install_script_path = posixpath.join(speccpu_vm_state.spec_dir, 'install.sh')
  vm.RobustRemoteCommand('yes | {0}'.format(install_script_path))
def CheckPrerequisites(benchmark_config):
  """Verifies that the required resources are present.

  Args:
   benchmark_config: Unused
  Raises:
    perfkitbenchmarker.data.ResourceNotFound: On missing resource.
  """
  del benchmark_config  # unused
  data.ResourcePath(DATA_FILE)
def CheckPrerequisites(_):
    """Verifies that the required resources are present.

  Raises:
    perfkitbenchmarker.data.ResourceNotFound: On missing resource.
  """
    data.ResourcePath(_BOOT_TEMPLATE)
    data.ResourcePath(_LISTENER_SERVER)
    data.ResourcePath(_CLEAN_UP_TEMPLATE)
    if FLAGS.cloud == 'Azure' and FLAGS.vms_contact_launcher and not _IsLinux(
    ):
        raise errors.Benchmarks.PrepareException(
            'Booting Windows VMs on Azure with a start-up script is not supported. '
            'See https://github.com/Azure/azure-powershell/issues/9600.')
    if FLAGS.vms_contact_launcher and FLAGS.use_public_ip:
        raise errors.Benchmarks.PrepareException(
            'After VMs contact launcher server, launcher will check connectivity '
            'of the VMs using the client address of the curl request. This option '
            'is only applicable when launcher makes the initial contact.')
Example #5
0
    def Prepare(self, benchmark_name: str) -> None:
        """Prepares the client vm to execute query.

    Installs the bq tool dependencies and authenticates using a service account.

    Args:
      benchmark_name: String name of the benchmark, to allow extraction and
        usage of benchmark specific artifacts (certificates, etc.) during client
        vm preparation.
    """
        self.client_vm.Install('pip')
        self.client_vm.RemoteCommand('sudo pip install absl-py')
        self.client_vm.Install('google_cloud_sdk')

        # Push the service account file to the working directory on client vm
        key_file_name = FLAGS.gcp_service_account_key_file.split('/')[-1]
        if '/' in FLAGS.gcp_service_account_key_file:
            self.client_vm.PushFile(FLAGS.gcp_service_account_key_file)
        else:
            self.client_vm.InstallPreprovisionedBenchmarkData(
                benchmark_name, [FLAGS.gcp_service_account_key_file], '')

        # Authenticate using the service account file
        vm_gcloud_path = google_cloud_sdk.GCLOUD_PATH
        activate_cmd = (
            '{} auth activate-service-account {} --key-file={}'.format(
                vm_gcloud_path, FLAGS.gcp_service_account, key_file_name))
        self.client_vm.RemoteCommand(activate_cmd)

        # Push the framework to execute a sql query and gather performance details
        service_specific_dir = os.path.join('edw', Bigquery.SERVICE_TYPE)
        self.client_vm.PushFile(
            data.ResourcePath(
                os.path.join(service_specific_dir, 'script_runner.sh')))
        runner_permission_update_cmd = 'chmod 755 {}'.format(
            'script_runner.sh')
        self.client_vm.RemoteCommand(runner_permission_update_cmd)
        self.client_vm.PushFile(
            data.ResourcePath(os.path.join('edw', 'script_driver.py')))
        self.client_vm.PushFile(
            data.ResourcePath(
                os.path.join(service_specific_dir,
                             'provider_specific_script_driver.py')))
Example #6
0
def _GetWorkloadFileList():
  """Returns the list of workload files to run.

  Returns:
    In order of preference:
      * The argument to --ycsb_workload_files.
      * Bundled YCSB workloads A and B.
  """
  return [data.ResourcePath(workload)
          for workload in FLAGS.ycsb_workload_files]
def PrepareDataFile(vm):
    """Generate data file on vm to destination directory.

  Args:
    vm: The VM needs data file.
  """
    file_path = data.ResourcePath(DATA_FILE)
    vm.PushFile(file_path, '%s/' % vm.GetScratchDir(0))
    vm.RemoteCommand('cd %s/; bash cloud-storage-workload.sh' %
                     vm.GetScratchDir(0))
def _Install(launcher_vm, booter_template_vm):
  """Installs benchmark scripts and packages on the launcher vm."""
  launcher_vm.InstallCli()
  # Render boot script on launcher server VM(s)
  context = _BuildContext(launcher_vm, booter_template_vm)
  launcher_vm.RenderTemplate(data.ResourcePath(_BOOT_TEMPLATE), _BOOT_PATH,
                             context)
  launcher_vm.RenderTemplate(data.ResourcePath(_STATUS_TEMPLATE), _STATUS_PATH,
                             context)

  # Installs and start listener server on launcher VM(s).
  launcher_vm.InstallPackages('netcat')
  launcher_vm.PushDataFile(_LISTENER_SERVER, _REMOTE_DIR)
  client_port = _SSH_PORT if _IsLinux() else _RDP_PORT
  launcher_vm.RemoteCommand('touch log')
  launcher_vm.RemoteCommand(_GetServerStartCommand(client_port, launcher_vm))
  # Render clean up script on launcher server VM(s).
  launcher_vm.RenderTemplate(data.ResourcePath(_CLEAN_UP_TEMPLATE),
                             _CLEAN_UP_SCRIPT_PATH, context)
Example #9
0
def _CopyLib(vm):
    # If the cudnn flag was passed on the command line,
    # use that value for the cudnn path. Otherwise, chose
    # an intelligent default given the cuda toolkit version
    # specified.
    cudnn_path = FLAGS.cudnn
    src_path = data.ResourcePath(cudnn_path)
    dest_path = posixpath.join('/tmp', cudnn_path)
    vm.RemoteCopy(src_path, dest_path)
    return dest_path
Example #10
0
    def PushDataFile(self, data_file):
        """Upload a file in perfkitbenchmarker.data directory to the VM.

    Args:
      data_file: The filename of the file to upload.
    Raises:
      perfkitbenchmarker.data.ResourceNotFound: if 'data_file' does not exist.
    """
        file_path = data.ResourcePath(data_file)
        self.PushFile(file_path)
Example #11
0
def CreateRenderedManifestFile(filename, config):
    """Returns a file containing a rendered Jinja manifest (.j2) template."""
    manifest_filename = data.ResourcePath(filename)
    environment = jinja2.Environment(undefined=jinja2.StrictUndefined)
    with open(manifest_filename) as manifest_file:
        manifest_template = environment.from_string(manifest_file.read())
    rendered_yaml = tempfile.NamedTemporaryFile(mode='w')
    rendered_yaml.write(manifest_template.render(config))
    rendered_yaml.flush()
    return rendered_yaml
def _LocalDataPath(local_file):
    """Return local data path for given file.

  Args:
    local_file: name of local file to create full data path for

  Returns:
    path of local_file, in the data directory
  """
    return data.ResourcePath(local_file)
def Prepare(benchmark_spec):
  """Installs and sets up dataset on the Spark clusters.

  Copies scripts and all the queries to cloud.
  Creates external Hive tables for data (unless BigQuery is being used).

  Args:
    benchmark_spec: The benchmark specification
  """
  dpb_service_instance = benchmark_spec.dpb_service
  run_uri = benchmark_spec.uuid.split('-')[0]
  dpb_service_instance.CreateBucket(run_uri)

  temp_run_dir = temp_dir.GetRunDirPath()
  spark_sql_perf_dir = os.path.join(temp_run_dir, 'spark_sql_perf_dir')
  vm_util.IssueCommand(['git', 'clone', SPARK_SQL_PERF_GIT, spark_sql_perf_dir])
  vm_util.IssueCommand(['git', 'checkout', SPARK_SQL_PERF_GIT_COMMIT],
                       cwd=spark_sql_perf_dir)
  query_dir = os.path.join(spark_sql_perf_dir, 'src', 'main', 'resources',
                           FLAGS.dpb_sparksql_query)

  storage_service = object_storage_service.GetObjectStorageClass(FLAGS.cloud)()
  dst_url = '{prefix}{uri}'.format(
      prefix=dpb_service_instance.PERSISTENT_FS_PREFIX, uri=run_uri)
  for dir_name, _, files in os.walk(query_dir):
    for filename in files:
      match = re.match(r'q?([0-9]+)a?.sql', filename)
      if match:
        query_id = match.group(1)
        # if order is specified only upload those queries
        if not FLAGS.dpb_sparksql_order or query_id in FLAGS.dpb_sparksql_order:
          query = '{}.sql'.format(query_id)
          src_url = os.path.join(dir_name, filename)
          storage_service.Copy(src_url, os.path.join(dst_url, query))
  for script in [SPARK_TABLE_SCRIPT, SPARK_SQL_RUNNER_SCRIPT]:
    src_url = data.ResourcePath(script)
    storage_service.Copy(src_url, dst_url)
  benchmark_spec.base_dir = dst_url

  # Create external Hive tables if not reading the data from BigQuery
  if FLAGS.dpb_sparksql_data:
    stdout = storage_service.List(FLAGS.dpb_sparksql_data)

    for table_dir in stdout.split('\n'):
      # The directory name is the table name.
      if not table_dir:
        continue
      table = re.split(' |/', table_dir.rstrip('/')).pop()
      stats = dpb_service_instance.SubmitJob(
          pyspark_file=os.path.join(dst_url, SPARK_TABLE_SCRIPT),
          job_type=BaseDpbService.PYSPARK_JOB_TYPE,
          job_arguments=[FLAGS.dpb_sparksql_data, table])
      logging.info(stats)
      if not stats['success']:
        logging.warning('Creates table %s from %s failed', table, table_dir)
def Prepare(benchmark_spec):
    """Installs and sets up dataset on the Spark clusters.

  Copies scripts and all the queries to cloud.
  Creates external Hive tables for data (unless BigQuery is being used).

  Args:
    benchmark_spec: The benchmark specification
  """
    dpb_service_instance = benchmark_spec.dpb_service
    # buckets must start with a letter
    bucket = 'pkb-' + benchmark_spec.uuid.split('-')[0]
    storage_service = dpb_service_instance.storage_service
    storage_service.MakeBucket(bucket)
    benchmark_spec.base_dir = dpb_service_instance.PERSISTENT_FS_PREFIX + bucket

    temp_run_dir = temp_dir.GetRunDirPath()
    spark_sql_perf_dir = os.path.join(temp_run_dir, 'spark_sql_perf_dir')
    vm_util.IssueCommand(
        ['git', 'clone', SPARK_SQL_PERF_GIT, spark_sql_perf_dir])
    vm_util.IssueCommand(['git', 'checkout', SPARK_SQL_PERF_GIT_COMMIT],
                         cwd=spark_sql_perf_dir)
    query_dir = os.path.join(spark_sql_perf_dir, 'src', 'main', 'resources',
                             FLAGS.dpb_sparksql_query)
    for dir_name, _, files in os.walk(query_dir):
        for filename in files:
            match = re.match(r'q?([0-9]+)a?.sql', filename)
            if match:
                query_id = match.group(1)
                # if order is specified only upload those queries
                if not FLAGS.dpb_sparksql_order or query_id in FLAGS.dpb_sparksql_order:
                    query = '{}.sql'.format(query_id)
                    src_url = os.path.join(dir_name, filename)
                    storage_service.CopyToBucket(src_url, bucket, query)
    for script in [SPARK_TABLE_SCRIPT, SPARK_SQL_RUNNER_SCRIPT]:
        src_url = data.ResourcePath(script)
        storage_service.CopyToBucket(src_url, bucket, script)

    # Create external Hive tables if not reading the data from BigQuery
    if FLAGS.dpb_sparksql_data:
        stdout = storage_service.List(FLAGS.dpb_sparksql_data)

        table_subdirs = [
            re.split(' |/', line.rstrip('/')).pop()
            for line in stdout.split('\n') if line
        ]
        stats = dpb_service_instance.SubmitJob(
            pyspark_file=os.path.join(benchmark_spec.base_dir,
                                      SPARK_TABLE_SCRIPT),
            job_type=BaseDpbService.PYSPARK_JOB_TYPE,
            job_arguments=[FLAGS.dpb_sparksql_data + ','.join(table_subdirs)])
        logging.info(stats)
        if not stats['success']:
            raise errors.Benchmarks.PrepareException(
                'Creating tables from {}/* failed'.format(FLAGS.sparksql_data))
Example #15
0
def Prepare(benchmark_spec):
    """Installs and sets up dataset on the Spark clusters.

  Copies scripts and all the queries to cloud.
  Creates external Hive tables for data (unless BigQuery is being used).

  Args:
    benchmark_spec: The benchmark specification
  """
    dpb_service_instance = benchmark_spec.dpb_service
    # buckets must start with a letter
    bucket = 'pkb-' + benchmark_spec.uuid.split('-')[0]
    storage_service = dpb_service_instance.storage_service
    storage_service.MakeBucket(bucket)
    benchmark_spec.bucket = bucket
    benchmark_spec.base_dir = dpb_service_instance.PERSISTENT_FS_PREFIX + bucket
    benchmark_spec.staged_queries = _LoadAndStageQueries(
        storage_service, benchmark_spec.base_dir)

    for script in [SPARK_TABLE_SCRIPT, SPARK_SQL_RUNNER_SCRIPT]:
        src_url = data.ResourcePath(script)
        storage_service.CopyToBucket(src_url, bucket, script)

    benchmark_spec.table_subdirs = []
    if FLAGS.dpb_sparksql_data:
        table_dir = FLAGS.dpb_sparksql_data.rstrip('/') + '/'
        stdout = storage_service.List(table_dir)
        for line in stdout.split('\n'):
            # GCS will sometimes list the directory itself.
            if line and line != table_dir:
                benchmark_spec.table_subdirs.append(
                    re.split(' |/', line.rstrip('/')).pop())

        benchmark_spec.data_dir = FLAGS.dpb_sparksql_data
        if FLAGS.dpb_sparksql_copy_to_hdfs:
            benchmark_spec.data_dir = 'hdfs:/tmp/spark_sql/'
            dpb_service_instance.DistributedCopy(table_dir,
                                                 benchmark_spec.data_dir)

    # Create external Hive tables
    if FLAGS.dpb_sparksql_create_hive_tables:
        try:
            result = dpb_service_instance.SubmitJob(
                pyspark_file=os.path.join(benchmark_spec.base_dir,
                                          SPARK_TABLE_SCRIPT),
                job_type=BaseDpbService.PYSPARK_JOB_TYPE,
                job_arguments=[
                    benchmark_spec.data_dir,
                    ','.join(benchmark_spec.table_subdirs)
                ])
            logging.info(result)
        except dpb_service.JobSubmissionError as e:
            raise errors.Benchmarks.PrepareException(
                'Creating tables from {}/* failed'.format(
                    benchmark_spec.data_dir)) from e
Example #16
0
def _PrepareWithIsoFile(vm, speccpu_vm_state):
    """Prepares the VM to run using the iso file.

  Copies the iso to the VM, mounts it, and extracts the contents. Copies the
  config file to the VM. Runs the SPEC install.sh script on the VM.

  Args:
    vm: BaseVirtualMachine. Recipient of the iso file.
    speccpu_vm_state: _SpecCpu2006SpecificState. Modified by this function to
        reflect any changes to the VM that may need to be cleaned up.
  """
    scratch_dir = vm.GetScratchDir()

    # Make cpu2006 directory on the VM.
    vm.RemoteCommand('mkdir {0}'.format(speccpu_vm_state.spec_dir))

    # Copy the iso to the VM.
    local_iso_file_path = data.ResourcePath(_SPECCPU2006_ISO)
    speccpu_vm_state.iso_file_path = posixpath.join(scratch_dir,
                                                    _SPECCPU2006_ISO)
    vm.PushFile(local_iso_file_path, scratch_dir)

    # Extract files from the iso to the cpu2006 directory.
    speccpu_vm_state.mount_dir = posixpath.join(scratch_dir, _MOUNT_DIR)
    vm.RemoteCommand('mkdir {0}'.format(speccpu_vm_state.mount_dir))
    vm.RemoteCommand('sudo mount -t iso9660 -o loop {0} {1}'.format(
        speccpu_vm_state.iso_file_path, speccpu_vm_state.mount_dir))
    vm.RemoteCommand('cp -r {0}/* {1}'.format(speccpu_vm_state.mount_dir,
                                              speccpu_vm_state.spec_dir))
    vm.RemoteCommand('chmod -R 777 {0}'.format(speccpu_vm_state.spec_dir))

    # Copy the cfg to the VM.
    local_cfg_file_path = data.ResourcePath(FLAGS.runspec_config)
    cfg_file_name = os.path.basename(local_cfg_file_path)
    speccpu_vm_state.cfg_file_path = posixpath.join(speccpu_vm_state.spec_dir,
                                                    'config', cfg_file_name)
    vm.PushFile(local_cfg_file_path, speccpu_vm_state.cfg_file_path)

    # Run SPEC CPU2006 installation.
    install_script_path = posixpath.join(speccpu_vm_state.spec_dir,
                                         'install.sh')
    vm.RobustRemoteCommand('yes | {0}'.format(install_script_path))
def CreateHpccinf(vm: linux_vm.BaseLinuxVirtualMachine,
                  benchmark_spec: bm_spec.BenchmarkSpec) -> HpccDimensions:
    """Creates the HPCC input file."""
    dimensions = _CalculateHpccDimensions(len(benchmark_spec.vms),
                                          vm.NumCpusForBenchmark(),
                                          vm.total_free_memory_kb)
    vm.RemoteCommand(f'rm -f {HPCCINF_FILE}')
    vm.RenderTemplate(data.ResourcePath(LOCAL_HPCCINF_FILE),
                      remote_path=HPCCINF_FILE,
                      context=dataclasses.asdict(dimensions))
    return dimensions
def CheckPrerequisites():
    """Verifies that the required resources are present.

  Raises:
    perfkitbenchmarker.data.ResourceNotFound: On missing resource.
  """
    if FLAGS['num_vms'].present and FLAGS.num_vms < 3:
        raise ValueError('cassandra_ycsb requires at least 3 Cassandra VMs.')
    cassandra.CheckPrerequisites()
    ycsb.CheckPrerequisites()
    data.ResourcePath(CREATE_TABLE_SCRIPT)
Example #19
0
def _RenderConfig(vm,
                  leader,
                  workers,
                  memory_fraction=SPARK_MEMORY_FRACTION,
                  configure_s3=False):
    """Load Spark Condfiguration on VM."""
    # Use first worker to get worker configuration
    worker = workers[0]
    worker_cores = worker.NumCpusForBenchmark()
    worker_memory_mb = int((worker.total_memory_kb / 1024) * memory_fraction)
    driver_memory_mb = int((leader.total_memory_kb / 1024) * memory_fraction)

    spark_conf = GetConfiguration(driver_memory_mb=driver_memory_mb,
                                  worker_memory_mb=worker_memory_mb,
                                  worker_cores=worker_cores,
                                  num_workers=len(workers),
                                  configure_s3=configure_s3)

    if vm.scratch_disks:
        # TODO(pclay): support multiple scratch disks. A current suboptimal
        # workaround is RAID0 local_ssds with --num_striped_disks.
        scratch_dir = posixpath.join(vm.GetScratchDir(), 'spark')
    else:
        scratch_dir = posixpath.join('/tmp/pkb/local_scratch', 'spark')

    optional_tools = None
    if configure_s3:
        optional_tools = 'hadoop-aws'

    context = {
        'spark_conf': spark_conf,
        'leader_ip': leader.internal_ip,
        'worker_ips': [vm.internal_ip for vm in workers],
        'scratch_dir': scratch_dir,
        'worker_vcpus': worker_cores,
        'spark_private_key': SPARK_PRIVATE_KEY,
        'worker_memory': spark_conf[SPARK_WORKER_MEMORY],
        'hadoop_cmd': hadoop.HADOOP_CMD,
        'python_cmd': 'python3',
        'optional_tools': optional_tools
    }

    for file_name in DATA_FILES:
        file_path = data.ResourcePath(file_name)
        if file_name == 'spark/workers.j2':
            # Spark calls its worker list slaves.
            file_name = 'spark/slaves.j2'
        remote_path = posixpath.join(SPARK_CONF_DIR,
                                     os.path.basename(file_name))
        if file_name.endswith('.j2'):
            vm.RenderTemplate(file_path,
                              os.path.splitext(remote_path)[0], context)
        else:
            vm.RemoteCopy(file_path, remote_path)
Example #20
0
  def _InstallPostgresServer(self):
    if self.spec.engine_version == POSTGRES_13_VERSION:
      self.server_vm.Install('postgres13')
    else:
      raise UnsupportedError('Only postgres version 13 is currently supported')

    vm = self.server_vm
    version = self.spec.engine_version
    postgres_conf_path = POSTGRES_CONFIG_PATH.format(version)
    postgres_conf_file = postgres_conf_path + POSTGRES_CONFIG
    postgres_hba_conf_file = postgres_conf_path + POSTGRES_HBA_CONFIG
    vm.PushFile(data.ResourcePath(
        posixpath.join(POSTGRES_RESOURCE_PATH, POSTGRES_HBA_CONFIG)))
    vm.RemoteCommand('sudo -u postgres psql postgres -c '
                     '"ALTER USER postgres PASSWORD \'%s\';"'
                     % self.spec.database_password)
    vm.RemoteCommand('sudo -u postgres psql postgres -c '
                     '"CREATE ROLE %s LOGIN SUPERUSER PASSWORD \'%s\';"' %
                     (self.spec.database_username,
                      self.spec.database_password))

    # Change the directory to scratch
    vm.RemoteCommand(
        'sudo sed -i.bak '
        '"s:\'/var/lib/postgresql/{0}/main\':\'{1}/postgresql/{0}/main\':" '
        '/etc/postgresql/{0}/main/postgresql.conf'.format(
            version, self.server_vm.GetScratchDir()))

    # Accept remote connection
    vm.RemoteCommand(
        'sudo sed -i.bak '
        r'"s:\#listen_addresses ='
        ' \'localhost\':listen_addresses = \'*\':" '
        '{}'.format(postgres_conf_file))

    # Set the size of the shared buffer
    vm.RemoteCommand(
        'sudo sed -i.bak "s:shared_buffers = 128MB:shared_buffers = {}GB:" '
        '{}'.format(self.postgres_shared_buffer_size, postgres_conf_file))
    # Update data path to new location
    vm.RemoteCommand('sudo rsync -av /var/lib/postgresql /scratch')

    # # Use cat to move files because mv will override file permissions
    self.server_vm.RemoteCommand(
        "sudo bash -c "
        "'cat pg_hba.conf > "
        "{}'".format(postgres_hba_conf_file))

    self.server_vm.RemoteCommand(
        'sudo cat {}'.format(postgres_conf_file))
    self.server_vm.RemoteCommand(
        'sudo cat {}'.format(postgres_hba_conf_file))
    vm.RemoteCommand('sudo systemctl restart postgresql')
def _PrepareSpec(vm):
    """Prepares a SPEC client by copying SPEC to the VM."""
    mount_dir = 'spec_mnt'
    vm.RemoteCommand('mkdir %s' % mount_dir)
    vm.RemoteCommand('mkdir %s' % _SPEC_DIR)
    vm.InstallPreprovisionedBenchmarkData('specsfs2014', [_SPEC_SFS_2014_ISO],
                                          '~/')
    vm.PushFile(data.ResourcePath(_SPEC_SFS_2014_LICENSE), _SPEC_DIR)
    vm.RemoteCommand('sudo mount -t iso9660 -o loop %s %s' %
                     (_SPEC_SFS_2014_ISO, mount_dir))
    vm.RemoteCommand('cp -r %s/* %s' % (mount_dir, _SPEC_DIR))
    vm.RemoteCommand('sudo umount {0} && sudo rm -rf {0}'.format(mount_dir))
Example #22
0
def _GetWorkloadFileList():
  """Returns the list of workload files to run.

  Returns:
    In order of preference:
      * The argument to --ycsb_workload_files.
      * Bundled YCSB workloads A and B.
  """
  if FLAGS.ycsb_workload_files:
    return FLAGS.ycsb_workload_files
  return [data.ResourcePath(os.path.join('ycsb', workload))
          for workload in ('workloada', 'workloadb')]
Example #23
0
    def PushDataFile(self, data_file, remote_path=''):
        """Upload a file in perfkitbenchmarker.data directory to the VM.

    Args:
      data_file: The filename of the file to upload.
      remote_path: The destination for 'data_file' on the VM. If not specified,
        the file will be placed in the user's home directory.
    Raises:
      perfkitbenchmarker.data.ResourceNotFound: if 'data_file' does not exist.
    """
        file_path = data.ResourcePath(data_file)
        self.PushFile(file_path, remote_path)
def PrepareBinaries(vms: List[linux_vm.BaseLinuxVirtualMachine]) -> None:
    """Prepare binaries on all vms."""
    if hpcc.USE_INTEL_COMPILED_HPL.value:
        intelmpi.NfsExportIntelDirectory(vms)
        vm_util.RunThreaded(lambda vm: vm.Install('numactl'), vms)
        return
    headnode_vm = vms[0]
    if FLAGS.hpcc_binary:
        headnode_vm.PushFile(data.ResourcePath(FLAGS.hpcc_binary), './hpcc')
    else:
        headnode_vm.RemoteCommand(f'cp {hpcc.HPCC_DIR}/hpcc hpcc')
    vm_util.RunThreaded(lambda vm: _PrepareBinaries(headnode_vm, vm), vms[1:])
Example #25
0
def ConfigureAndStart(master, workers, start_yarn=True):
  """Configure hadoop on a cluster.

  Args:
    master: VM. Master VM - will be the HDFS NameNode, YARN ResourceManager.
    workers: List of VMs. Each VM will run an HDFS DataNode, YARN node.
    start_yarn: bool. Start YARN and JobHistory server? Set to False if HDFS is
        the only service required. Default: True.
  """
  vms = [master] + workers
  fn = functools.partial(_RenderConfig, master_ip=master.internal_ip,
                         worker_ips=[worker.internal_ip for worker in workers])
  vm_util.RunThreaded(fn, vms)

  master.RemoteCommand(
      "rm -f {0} && ssh-keygen -q -t rsa -N '' -f {0}".format(
          HADOOP_PRIVATE_KEY))

  public_key = master.RemoteCommand('cat {0}.pub'.format(HADOOP_PRIVATE_KEY))[0]

  def AddKey(vm):
    vm.RemoteCommand('echo "{0}" >> ~/.ssh/authorized_keys'.format(public_key))
  vm_util.RunThreaded(AddKey, vms)

  context = {'hadoop_dir': HADOOP_DIR,
             'vm_ips': [vm.internal_ip for vm in vms],
             'start_yarn': start_yarn}

  # HDFS setup and formatting, YARN startup
  script_path = posixpath.join(HADOOP_DIR, 'start-hadoop.sh')
  master.RenderTemplate(data.ResourcePath(START_HADOOP_SCRIPT),
                        script_path, context=context)
  master.RemoteCommand('bash {0}'.format(script_path), should_log=True)

  logging.info('Sleeping 10s for Hadoop nodes to join.')
  time.sleep(10)

  logging.info('Checking HDFS status.')
  hdfs_online_count = _GetHDFSOnlineNodeCount(master)
  if hdfs_online_count != len(workers):
    raise ValueError('Not all nodes running HDFS: {0} < {1}'.format(
        hdfs_online_count, len(workers)))
  else:
    logging.info('HDFS running on all %d workers', len(workers))

  if start_yarn:
    logging.info('Checking YARN status.')
    yarn_online_count = _GetYARNOnlineNodeCount(master)
    if yarn_online_count != len(workers):
      raise ValueError('Not all nodes running YARN: {0} < {1}'.format(
          yarn_online_count, len(workers)))
    else:
      logging.info('YARN running on all %d workers', len(workers))
Example #26
0
  def _PostCreate(self):
    """Acquire cluster authentication."""
    cmd = util.GcloudCommand(
        self, 'container', 'clusters', 'get-credentials', self.name)
    env = self._GetRequiredGkeEnv()
    env['KUBECONFIG'] = FLAGS.kubeconfig
    cmd.IssueRetryable(env=env)

    if self.gpu_count:
      kubernetes_helper.CreateFromFile(NVIDIA_DRIVER_SETUP_DAEMON_SET_SCRIPT)
      kubernetes_helper.CreateFromFile(
          data.ResourcePath(NVIDIA_UNRESTRICTED_PERMISSIONS_DAEMON_SET))
Example #27
0
def _PrepareBucket(benchmark_spec):
    """Prepares the GCS bucket for object storage test.

  First creates a bucket in the specified bucket region. Then populates the test
  bucket with contents of https://github.com/cloudharmony/web-probe as well as
  larger generated test files. The new bucket is saved to later be set as the
  test endpoint for the object storage test.

  Args:
    benchmark_spec: the benchmark specification. Contains all data that is
      required to run the benchmark.
  """
    # set up Google Cloud Service
    service = gcs.GoogleCloudStorageService()
    location = cloud_harmony_util.GetRegionFromZone(ENDPOINT_ZONE.value)
    service.PrepareService(location)
    # create bucket in specified bucket region
    bucket = f'ch-{location}-{FLAGS.run_uri}'
    bucket_uri = f'gs://{bucket}'
    service.MakeBucket(bucket)
    # set default permissions to allow cloudharmony test file access
    perm_cmd = ['gsutil', 'defacl', 'set', 'public-read', bucket_uri]
    vm_util.IssueCommand(perm_cmd)
    # set bucket lifecyle to ensure bucket deletion after 30 days
    lifecyle_config_file = data.ResourcePath(
        'cloudharmony_network_gcp_lifecycle.json')
    lc_cmd = ['gsutil', 'lifecycle', 'set', lifecyle_config_file, bucket_uri]
    vm_util.IssueCommand(lc_cmd)
    # prepare preprovisioned test data
    tmp_dir = vm_util.GetTempDir()
    tmp_probe_file = posixpath.join(tmp_dir,
                                    cloud_harmony_network.WEB_PROBE_TAR)
    wget_install_cmd = ['sudo', 'apt-get', 'install', 'wget']
    vm_util.IssueCommand(wget_install_cmd)
    wget_cmd = ['wget', '-O', tmp_probe_file, cloud_harmony_network.WEB_PROBE]
    vm_util.IssueCommand(wget_cmd)
    tar_cmd = ['tar', 'zxf', tmp_probe_file, '-C', tmp_dir]
    vm_util.IssueCommand(tar_cmd)
    remote_probe_dir = posixpath.join(tmp_dir, 'probe')
    dd_cmd = [
        'dd', 'if=/dev/urandom', f'of={remote_probe_dir}/test10gb.bin',
        'bs=10240', 'count=1048576'
    ]
    vm_util.IssueCommand(dd_cmd)
    # copy preprovisioned test data to test bucket
    src_path = posixpath.join(remote_probe_dir, '*')
    dst_url = f'{bucket_uri}/probe'
    cp_cmd = ['gsutil', 'cp', '-r', src_path, dst_url]
    vm_util.IssueCommand(cp_cmd, raise_on_timeout=False)
    # save the service and the bucket name
    benchmark_spec.service = service
    benchmark_spec.bucket = bucket
Example #28
0
    def Prepare(self, benchmark_name: str) -> None:
        """Prepares the client vm to execute query.

    Installs the sql server tool dependencies.

    Args:
      benchmark_name: String name of the benchmark, to allow extraction and
        usage of benchmark specific artifacts (certificates, etc.) during client
        vm preparation.
    """
        self.client_vm.Install('pip')
        self.client_vm.RemoteCommand('sudo pip install absl-py')
        self.client_vm.Install('mssql_tools')
        self.whitelist_ip = self.client_vm.ip_address

        cmd = [
            azure.AZURE_PATH, 'sql', 'server', 'firewall-rule', 'create',
            '--name', self.whitelist_ip, '--resource-group',
            self.resource_group, '--server', self.server_name,
            '--end-ip-address', self.whitelist_ip, '--start-ip-address',
            self.whitelist_ip
        ]
        vm_util.IssueCommand(cmd)

        # Push the framework to execute a sql query and gather performance details
        service_specific_dir = os.path.join('edw',
                                            Azuresqldatawarehouse.SERVICE_TYPE)
        self.client_vm.PushFile(
            data.ResourcePath(
                os.path.join(service_specific_dir, 'script_runner.sh')))
        runner_permission_update_cmd = 'chmod 755 {}'.format(
            'script_runner.sh')
        self.client_vm.RemoteCommand(runner_permission_update_cmd)
        self.client_vm.PushFile(
            data.ResourcePath(os.path.join('edw', 'script_driver.py')))
        self.client_vm.PushFile(
            data.ResourcePath(
                os.path.join(service_specific_dir,
                             'provider_specific_script_driver.py')))
def PrepareBinaries(vms):
    """Prepare binaries on all vms."""
    master_vm = vms[0]
    if FLAGS.hpcc_binary:
        master_vm.PushFile(data.ResourcePath(FLAGS.hpcc_binary), './hpcc')
    else:
        master_vm.RemoteCommand('cp %s/hpcc hpcc' % hpcc.HPCC_DIR)

    for vm in vms[1:]:
        vm.Install('fortran')
        master_vm.MoveFile(vm, 'hpcc', 'hpcc')
        master_vm.MoveFile(vm, '/usr/bin/orted', 'orted')
        vm.RemoteCommand('sudo mv orted /usr/bin/orted')
def PrepareVM(vm, service):
    vm.Install('pip')
    vm.RemoteCommand('sudo pip install python-gflags==2.0')
    vm.RemoteCommand('sudo pip install pyyaml')

    vm.Install('openssl')

    # Prepare data on vm, create a run directory in temporary directory, and add
    # permission.
    vm.RemoteCommand('sudo mkdir -p /tmp/run/')
    vm.RemoteCommand('sudo chmod 777 /tmp/run/')

    vm.RemoteCommand('sudo mkdir -p /tmp/run/temp/')
    vm.RemoteCommand('sudo chmod 777 /tmp/run/temp/')

    file_path = data.ResourcePath(DATA_FILE)
    vm.PushFile(file_path, '/tmp/run/')

    for file_name in API_TEST_SCRIPT_FILES + service.APIScriptFiles():
        path = data.ResourcePath(os.path.join(API_TEST_SCRIPTS_DIR, file_name))
        logging.info('Uploading %s to %s', path, vm)
        vm.PushFile(path, '/tmp/run/')