Esempio n. 1
0
 def List(self, buckets):
     """See base class."""
     stdout, _, _ = vm_util.IssueCommand(
         ['aws', 's3', 'ls', buckets, '--region', self.region])
     return stdout
Esempio n. 2
0
 def testNoTimeout(self):
     _, _, retcode = vm_util.IssueCommand(['sleep', '2s'], timeout=None)
     self.assertEqual(retcode, 0)
 def _AddTag(self, key, value):
     cmd = self.cmd_prefix + [
         'emr', 'add-tags', '--resource-id', self.cluster_id, '--tag',
         '{}={}'.format(key, value)
     ]
     vm_util.IssueCommand(cmd)
Esempio n. 4
0
 def Copy(self, src_url, dst_url):
   """See base class."""
   vm_util.IssueCommand(['gsutil', 'cp', src_url, dst_url])
Esempio n. 5
0
 def List(self, buckets):
   """See base class."""
   stdout, _, _ = vm_util.IssueCommand(['gsutil', 'ls', buckets])
   return stdout
Esempio n. 6
0
 def _Delete(self):
     vm_util.IssueCommand(
         [azure.AZURE_PATH, 'group', 'delete',
          '--quiet',
          self.name])
    def SubmitJob(self,
                  jarfile=None,
                  classname=None,
                  pyspark_file=None,
                  query_file=None,
                  job_poll_interval=5,
                  job_stdout_file=None,
                  job_arguments=None,
                  job_files=None,
                  job_jars=None,
                  job_type=None,
                  properties=None):
        """See base class."""
        assert job_type

        # Create job definition
        job_name = f'{self.cluster_id}-{self._job_counter}'
        self._job_counter += 1
        glue_command = {}
        glue_default_args = {}
        if job_type == self.PYSPARK_JOB_TYPE:
            glue_command = {
                'Name': 'glueetl',
                'ScriptLocation': self._glue_script_wrapper_url,
            }
            all_properties = self.GetJobProperties()
            if properties:
                all_properties.update(properties)
            glue_default_args = {
                '--extra-py-files': pyspark_file,
                **all_properties
            }
        else:
            raise ValueError(f'Unsupported job type {job_type} for AWS Glue.')
        vm_util.IssueCommand(self.cmd_prefix + [
            'glue',
            'create-job',
            '--name',
            job_name,
            '--role',
            self.role,
            '--command',
            json.dumps(glue_command),
            '--default-arguments',
            json.dumps(glue_default_args),
            '--glue-version',
            self.dpb_version,
            '--number-of-workers',
            str(self.spec.worker_count),
            '--worker-type',
            self.spec.worker_group.vm_spec.machine_type,
        ])

        # Run job definition
        stdout, _, _ = vm_util.IssueCommand(self.cmd_prefix + [
            'glue', 'start-job-run', '--job-name', job_name, '--arguments',
            json.dumps({
                '--pkb_main': _ModuleFromPyFilename(pyspark_file),
                '--pkb_args': json.dumps(job_arguments)
            })
        ])
        job_run_id = json.loads(stdout)['JobRunId']

        return self._WaitForJob((job_name, job_run_id), GLUE_TIMEOUT,
                                job_poll_interval)
Esempio n. 8
0
 def EmptyBucket(self, bucket):
     vm_util.IssueCommand([
         'aws', 's3', 'rm',
         's3://%s' % bucket, '--region', self.region, '--recursive'
     ])
Esempio n. 9
0
def Prepare(benchmark_spec):
    """Installs and sets up dataset on the Spark clusters.

  Copies scripts and all the queries to cloud.
  Creates external Hive tables for data (unless BigQuery is being used).

  Args:
    benchmark_spec: The benchmark specification
  """
    dpb_service_instance = benchmark_spec.dpb_service
    # buckets must start with a letter
    bucket = 'pkb-' + benchmark_spec.uuid.split('-')[0]
    storage_service = dpb_service_instance.storage_service
    storage_service.MakeBucket(bucket)
    benchmark_spec.base_dir = dpb_service_instance.PERSISTENT_FS_PREFIX + bucket

    temp_run_dir = temp_dir.GetRunDirPath()
    spark_sql_perf_dir = os.path.join(temp_run_dir, 'spark_sql_perf_dir')
    vm_util.IssueCommand(
        ['git', 'clone', SPARK_SQL_PERF_GIT, spark_sql_perf_dir])
    vm_util.IssueCommand(['git', 'checkout', SPARK_SQL_PERF_GIT_COMMIT],
                         cwd=spark_sql_perf_dir)
    benchmark_spec.queries = []
    query_dir = os.path.join(spark_sql_perf_dir, 'src', 'main', 'resources',
                             FLAGS.dpb_sparksql_query)
    for dir_name, _, files in os.walk(query_dir):
        for filename in files:
            match = re.match(r'q?([0-9]+)a?.sql', filename)
            if match:
                query_id = match.group(1)
                # if order is specified only upload those queries
                if not FLAGS.dpb_sparksql_order or query_id in FLAGS.dpb_sparksql_order:
                    benchmark_spec.queries.append(query_id)
                    query = '{}.sql'.format(query_id)
                    src_url = os.path.join(dir_name, filename)
                    storage_service.CopyToBucket(src_url, bucket, query)
    if not benchmark_spec.queries:
        raise errors.Benchmarks.PrepareException('No queries were staged')

    for script in [SPARK_TABLE_SCRIPT, SPARK_SQL_RUNNER_SCRIPT]:
        src_url = data.ResourcePath(script)
        storage_service.CopyToBucket(src_url, bucket, script)

    benchmark_spec.table_subdirs = []
    if FLAGS.dpb_sparksql_data:
        table_dir = FLAGS.dpb_sparksql_data.rstrip('/') + '/'
        stdout = storage_service.List(table_dir)
        for line in stdout.split('\n'):
            # GCS will sometimes list the directory itself.
            if line and line != table_dir:
                benchmark_spec.table_subdirs.append(
                    re.split(' |/', line.rstrip('/')).pop())

    # Create external Hive tables
    if FLAGS.dpb_sparksql_create_hive_tables:
        try:
            result = dpb_service_instance.SubmitJob(
                pyspark_file=os.path.join(benchmark_spec.base_dir,
                                          SPARK_TABLE_SCRIPT),
                job_type=BaseDpbService.PYSPARK_JOB_TYPE,
                job_arguments=[
                    FLAGS.dpb_sparksql_data,
                    ','.join(benchmark_spec.table_subdirs)
                ])
            logging.info(result)
        except dpb_service.JobSubmissionError as e:
            raise errors.Benchmarks.PrepareException(
                'Creating tables from {}/* failed'.format(
                    FLAGS.dpb_sparksql_data)) from e
Esempio n. 10
0
 def MakeBucket(self, bucket_name):
     vm_util.IssueCommand([
         'aws', 's3', 'mb',
         's3://%s' % bucket_name,
         '--region=%s' % self.region
     ])
Esempio n. 11
0
 def DeleteBucket(self, bucket):
     vm_util.IssueCommand([
         'aws', 's3', 'rb',
         's3://%s' % bucket, '--region', self.region, '--force'
     ])  # --force deletes even if bucket contains objects.
Esempio n. 12
0
 def _Delete(self):
     cmd = self.cmd_prefix + [
         'ec2', 'delete-security-group', '--group-id=' + self.group_id
     ]
     vm_util.IssueCommand(cmd)
Esempio n. 13
0
 def CreateBucket(self, source_bucket):
     mb_cmd = self.cmd_prefix + ['s3', 'mb', source_bucket]
     stdout, _, _ = vm_util.IssueCommand(mb_cmd)
Esempio n. 14
0
    def _Create(self):
        """Creates the cluster."""
        name = 'pkb_' + FLAGS.run_uri

        # Set up ebs details if disk_spec is present int he config
        ebs_configuration = None
        if self.spec.worker_group.disk_spec:
            # Make sure nothing we are ignoring is included in the disk spec
            assert self.spec.worker_group.disk_spec.device_path is None
            assert self.spec.worker_group.disk_spec.disk_number is None
            assert self.spec.worker_group.disk_spec.mount_point is None
            assert self.spec.worker_group.disk_spec.iops is None
            ebs_configuration = {
                'EbsBlockDeviceConfigs': [{
                    'VolumeSpecification': {
                        'SizeInGB': self.spec.worker_group.disk_spec.disk_size,
                        'VolumeType':
                        self.spec.worker_group.disk_spec.disk_type
                    },
                    'VolumesPerInstance':
                    self.spec.worker_group.disk_spec.num_striped_disks
                }]
            }

        # Create the specification for the master and the worker nodes
        instance_groups = []
        core_instances = {
            'InstanceCount': self.spec.worker_count,
            'InstanceGroupType': 'CORE',
            'InstanceType': self.spec.worker_group.vm_spec.machine_type
        }
        if ebs_configuration:
            core_instances.update({'EbsConfiguration': ebs_configuration})

        master_instance = {
            'InstanceCount': 1,
            'InstanceGroupType': 'MASTER',
            'InstanceType': self.spec.worker_group.vm_spec.machine_type
        }
        if ebs_configuration:
            master_instance.update({'EbsConfiguration': ebs_configuration})

        instance_groups.append(core_instances)
        instance_groups.append(master_instance)

        # Create the log bucket to hold job's log output
        logs_bucket = FLAGS.aws_emr_loguri or self._CreateLogBucket()

        cmd = self.cmd_prefix + [
            'emr', 'create-cluster', '--name', name, '--release-label',
            self.emr_release_label, '--use-default-roles', '--instance-groups',
            json.dumps(instance_groups), '--application', 'Name=Spark',
            'Name=Hadoop', '--log-uri', logs_bucket
        ]

        if self.network:
            cmd += ['--ec2-attributes', 'SubnetId=' + self.network.subnet.id]

        stdout, _, _ = vm_util.IssueCommand(cmd)
        result = json.loads(stdout)
        self.cluster_id = result['ClusterId']
        logging.info('Cluster created with id %s', self.cluster_id)
        for tag_key, tag_value in util.MakeDefaultTags().items():
            self._AddTag(tag_key, tag_value)
Esempio n. 15
0
 def _Create(self):
     vm_util.IssueCommand(
         [azure.AZURE_PATH, 'network', 'nsg', 'create',
          '--location', self.location,
          self.name] + self.resource_group.args)
    def Prepare(self, vm):
        """Prepares the DB and everything for the AWS-RDS provider.

    Args:
      vm: The VM to be used as the test client.

    """
        logging.info('Preparing MySQL Service benchmarks for RDS.')

        # TODO: Refactor the RDS DB instance creation and deletion logic out
        # to a new class called RDSDBInstance that Inherits from
        # perfkitbenchmarker.resource.BaseResource.
        # And do the same for GCP.

        # First is to create another subnet in the same VPC as the VM but in a
        # different zone. RDS requires two subnets in two different zones to create
        # a DB instance, EVEN IF you do not specify multi-AZ in your DB creation
        # request.

        # Get a list of zones and pick one that's different from the zone VM is in.
        new_subnet_zone = None
        get_zones_cmd = util.AWS_PREFIX + [
            'ec2', 'describe-availability-zones'
        ]
        stdout, _, _ = vm_util.IssueCommand(get_zones_cmd)
        response = json.loads(stdout)
        all_zones = response['AvailabilityZones']
        for zone in all_zones:
            if zone['ZoneName'] != vm.zone:
                new_subnet_zone = zone['ZoneName']
                break

        if new_subnet_zone is None:
            raise DBStatusQueryError(
                'Cannot find a zone to create the required '
                'second subnet for the DB instance.')

        # Now create a new subnet in the zone that's different from where the VM is
        logging.info('Creating a second subnet in zone %s', new_subnet_zone)
        new_subnet = aws_network.AwsSubnet(new_subnet_zone, vm.network.vpc.id,
                                           '10.0.1.0/24')
        new_subnet.Create()
        logging.info('Successfully created a new subnet, subnet id is: %s',
                     new_subnet.id)
        # Remember this so we can cleanup properly.
        vm.extra_subnet_for_db = new_subnet

        # Now we can create a new DB subnet group that has two subnets in it.
        db_subnet_group_name = 'pkb%s' % FLAGS.run_uri
        create_db_subnet_group_cmd = util.AWS_PREFIX + [
            'rds', 'create-db-subnet-group', '--db-subnet-group-name',
            db_subnet_group_name, '--db-subnet-group-description',
            'pkb_subnet_group_for_db', '--subnet-ids', vm.network.subnet.id,
            new_subnet.id
        ]
        stdout, stderr, _ = vm_util.IssueCommand(create_db_subnet_group_cmd)
        logging.info(
            'Created a DB subnet group, stdout is:\n%s\nstderr is:\n%s',
            stdout, stderr)
        vm.db_subnet_group_name = db_subnet_group_name

        # open up tcp port 3306 in the VPC's security group, we need that to connect
        # to the DB.
        open_port_cmd = util.AWS_PREFIX + [
            'ec2', 'authorize-security-group-ingress', '--group-id',
            vm.group_id, '--source-group', vm.group_id, '--protocol', 'tcp',
            '--port', MYSQL_PORT
        ]
        stdout, stderr, _ = vm_util.IssueCommand(open_port_cmd)
        logging.info('Granted DB port ingress, stdout is:\n%s\nstderr is:\n%s',
                     stdout, stderr)

        # Finally, it's time to create the DB instance!
        vm.db_instance_id = 'pkb-DB-%s' % FLAGS.run_uri
        db_class = \
            RDS_CORE_TO_DB_CLASS_MAP['%s' % FLAGS.mysql_svc_db_instance_cores]
        vm.db_instance_master_user = MYSQL_ROOT_USER
        vm.db_instance_master_password = _GenerateRandomPassword()

        create_db_cmd = util.AWS_PREFIX + [
            'rds', 'create-db-instance', '--db-instance-identifier',
            vm.db_instance_id, '--db-instance-class', db_class, '--engine',
            RDS_DB_ENGINE, '--engine-version', RDS_DB_ENGINE_VERSION,
            '--storage-type', RDS_DB_STORAGE_TYPE_GP2, '--allocated-storage',
            RDS_DB_STORAGE_GP2_SIZE, '--vpc-security-group-ids', vm.group_id,
            '--master-username', vm.db_instance_master_user,
            '--master-user-password', vm.db_instance_master_password,
            '--availability-zone', vm.zone, '--db-subnet-group-name',
            vm.db_subnet_group_name
        ]

        status_query_cmd = util.AWS_PREFIX + [
            'rds', 'describe-db-instances', '--db-instance-id',
            vm.db_instance_id
        ]

        stdout, stderr, _ = vm_util.IssueCommand(create_db_cmd)
        logging.info(
            'Request to create the DB has been issued, stdout:\n%s\n'
            'stderr:%s\n', stdout, stderr)
        response = json.loads(stdout)

        db_creation_status = _RDSParseDBInstanceStatus(response)

        for status_query_count in xrange(1, DB_STATUS_QUERY_LIMIT + 1):
            if db_creation_status == 'available':
                break

            if db_creation_status not in RDS_DB_CREATION_PENDING_STATUS:
                raise DBStatusQueryError(
                    'Invalid status in DB creation response. '
                    ' stdout is\n%s, stderr is\n%s' % (stdout, stderr))

            logging.info(
                'Querying db creation status, current state is %s, query '
                'count is %d', db_creation_status, status_query_count)
            time.sleep(DB_STATUS_QUERY_INTERVAL)

            stdout, stderr, _ = vm_util.IssueCommand(status_query_cmd)
            response = json.loads(stdout)
            db_creation_status = _RDSParseDBInstanceStatus(response)
        else:
            raise DBStatusQueryError(
                'DB creation timed-out, we have '
                'waited at least %s * %s seconds.' %
                (DB_STATUS_QUERY_INTERVAL, DB_STATUS_QUERY_LIMIT))

        # We are good now, db has been created. Now get the endpoint address.
        # On RDS, you always connect with a DNS name, if you do that from a EC2 VM,
        # that DNS name will be resolved to an internal IP address of the DB.
        if 'DBInstance' in response:
            vm.db_instance_address = response['DBInstance']['Endpoint'][
                'Address']
        else:
            if 'DBInstances' in response:
                vm.db_instance_address = \
                    response['DBInstances'][0]['Endpoint']['Address']

        logging.info('Successfully created an RDS DB instance. Address is %s',
                     vm.db_instance_address)
        logging.info('Complete output is:\n %s', response)
Esempio n. 17
0
    def _Exists(self):
        _, _, retcode = vm_util.IssueCommand(
            [azure.AZURE_PATH, 'resource', 'list', self.name],
            suppress_warning=True)

        return retcode == 0
    def Cleanup(self, vm):
        """Clean up RDS instances, cleanup the extra subnet created for the
       creation of the RDS instance.

    Args:
      vm: The VM that was used as the test client, which also stores states
          for clean-up.
    """

        # Now, we can delete the DB instance. vm.db_instance_id is the id to call.
        # We need to keep querying the status of the deletion here before we let
        # this go. RDS DB deletion takes some time to finish. And we have to
        # wait until this DB is deleted before we proceed because this DB holds
        # references to various other resources: subnet groups, subnets, vpc, etc.
        delete_db_cmd = util.AWS_PREFIX + [
            'rds', 'delete-db-instance', '--db-instance-identifier',
            vm.db_instance_id, '--skip-final-snapshot'
        ]

        logging.info('Deleting db instance %s...', vm.db_instance_id)

        # Note below, the status of this deletion command is validated below in the
        # loop. both stdout and stderr are checked.
        stdout, stderr, _ = vm_util.IssueCommand(delete_db_cmd)
        logging.info(
            'Request to delete the DB has been issued, stdout:\n%s\n'
            'stderr:%s\n', stdout, stderr)

        status_query_cmd = util.AWS_PREFIX + [
            'rds', 'describe-db-instances', '--db-instance-id',
            vm.db_instance_id
        ]

        db_status = None
        for status_query_count in xrange(1, DB_STATUS_QUERY_LIMIT + 1):
            try:
                response = json.loads(stdout)
            except ValueError:
                # stdout cannot be parsed into json, it might simply be empty because
                # deletion has been completed.
                break

            db_status = _RDSParseDBInstanceStatus(response)
            if db_status == 'deleting':
                logging.info(
                    'DB is still in the deleting state, status_query_count '
                    'is %d', status_query_count)
                # Wait for a few seconds and query status
                time.sleep(DB_STATUS_QUERY_INTERVAL)
                stdout, stderr, _ = vm_util.IssueCommand(status_query_cmd)
            else:
                logging.info(
                    'DB deletion status is no longer in deleting, it is %s',
                    db_status)
                break
        else:
            logging.warn(
                'DB is still in deleting state after long wait, bail.')

        db_instance_deletion_failed = False
        if db_status == 'deleted' or re.findall('DBInstanceNotFound', stderr):
            # Sometimes we get a 'deleted' status from DB status query command,
            # but even more times, the DB status query command would fail with
            # an "not found" error, both are positive confirmation that the DB has
            # been deleted.
            logging.info('DB has been successfully deleted, got confirmation.')
        else:
            # We did not get a positive confirmation that the DB is deleted even after
            # long wait, we have to bail. But we will log an error message, and
            # then raise an exception at the end of this function so this particular
            # run will show as a failed run to the user and allow them to examine
            # the logs
            db_instance_deletion_failed = True
            logging.error(
                'RDS DB instance %s failed to be deleted, we did not get '
                'final confirmation from stderr, which is:\n %s',
                vm.db_instance_id, stderr)

        if hasattr(vm, 'db_subnet_group_name'):
            delete_db_subnet_group_cmd = util.AWS_PREFIX + [
                'rds', 'delete-db-subnet-group', '--db-subnet-group-name',
                vm.db_subnet_group_name
            ]
            stdout, stderr, _ = vm_util.IssueCommand(
                delete_db_subnet_group_cmd)
            logging.info(
                'Deleted the db subnet group. stdout is:\n%s, stderr: \n%s',
                stdout, stderr)

        if hasattr(vm, 'extra_subnet_for_db'):
            vm.extra_subnet_for_db.Delete()

        if db_instance_deletion_failed:
            raise DBStatusQueryError(
                'Failed to get confirmation of DB instance '
                'deletion! Check the log for details!')
Esempio n. 19
0
    def RemoteHostCommand(self,
                          command,
                          should_log=False,
                          retries=SSH_RETRIES,
                          ignore_failure=False,
                          login_shell=False,
                          suppress_warning=False,
                          timeout=None):
        """Runs a command on the VM.

    This is guaranteed to run on the host VM, whereas RemoteCommand might run
    within i.e. a container in the host VM.

    Args:
      command: A valid bash command.
      should_log: A boolean indicating whether the command result should be
          logged at the info level. Even if it is false, the results will
          still be logged at the debug level.
      retries: The maximum number of times RemoteCommand should retry SSHing
          when it receives a 255 return code.
      ignore_failure: Ignore any failure if set to true.
      login_shell: Run command in a login shell.
      suppress_warning: Suppress the result logging from IssueCommand when the
          return code is non-zero.

    Returns:
      A tuple of stdout and stderr from running the command.

    Raises:
      RemoteCommandError: If there was a problem establishing the connection.
    """
        if vm_util.RunningOnWindows():
            # Multi-line commands passed to ssh won't work on Windows unless the
            # newlines are escaped.
            command = command.replace('\n', '\\n')

        user_host = '%s@%s' % (self.user_name, self.ip_address)
        ssh_cmd = ['ssh', '-A', '-p', str(self.ssh_port), user_host]
        ssh_cmd.extend(vm_util.GetSshOptions(self.ssh_private_key))
        try:
            if login_shell:
                ssh_cmd.extend(['-t', '-t', 'bash -l -c "%s"' % command])
                self._pseudo_tty_lock.acquire()
            else:
                ssh_cmd.append(command)

            for _ in range(retries):
                stdout, stderr, retcode = vm_util.IssueCommand(
                    ssh_cmd,
                    force_info_log=should_log,
                    suppress_warning=suppress_warning,
                    timeout=timeout)
                if retcode != 255:  # Retry on 255 because this indicates an SSH failure
                    break
        finally:
            if login_shell:
                self._pseudo_tty_lock.release()

        if retcode:
            full_cmd = ' '.join(ssh_cmd)
            error_text = ('Got non-zero return code (%s) executing %s\n'
                          'Full command: %s\nSTDOUT: %sSTDERR: %s' %
                          (retcode, command, full_cmd, stdout, stderr))
            if not ignore_failure:
                raise errors.VirtualMachine.RemoteCommandError(error_text)

        return stdout, stderr
    def Prepare(self, vm):
        """Prepares the DB and everything for the provider GCP (Cloud SQL).

    Args:
      vm: The VM to be used as the test client

    """
        # TODO: Refactor the GCP Cloud SQL instance creation and deletion logic out
        # to a new class called GCPCloudSQLInstance that Inherits from
        # perfkitbenchmarker.resource.BaseResource.

        logging.info(
            'Preparing MySQL Service benchmarks for Google Cloud SQL.')

        vm.db_instance_name = 'pkb%s' % FLAGS.run_uri
        db_tier = 'db-n1-standard-%s' % FLAGS.mysql_svc_db_instance_cores
        # Currently, we create DB instance in the same zone as the test VM.
        db_instance_zone = vm.zone
        # Currently GCP REQUIRES you to connect to the DB instance via external IP
        # (i.e., using external IPs of the DB instance AND the VM instance).
        authorized_network = '%s/32' % vm.ip_address
        create_db_cmd = [
            FLAGS.gcloud_path, 'sql', 'instances', 'create',
            vm.db_instance_name, '--quiet', '--format=json', '--async',
            '--activation-policy=ALWAYS', '--assign-ip',
            '--authorized-networks=%s' % authorized_network,
            '--backup-start-time=%s' % DEFAULT_BACKUP_START_TIME,
            '--enable-bin-log',
            '--tier=%s' % db_tier,
            '--gce-zone=%s' % db_instance_zone,
            '--database-version=%s' % GCP_MY_SQL_VERSION,
            '--pricing-plan=%s' % GCP_PRICING_PLAN
        ]

        stdout, _, _ = vm_util.IssueCommand(create_db_cmd)
        response = json.loads(stdout)
        if response['operation'] is None or response[
                'operationType'] != 'CREATE':
            raise DBStatusQueryError('Invalid operation or unrecognized '
                                     'operationType in DB creation response. '
                                     ' stdout is %s' % stdout)

        status_query_cmd = [
            FLAGS.gcloud_path, 'sql', 'instances', 'describe',
            vm.db_instance_name, '--format', 'json'
        ]

        stdout, _, _ = vm_util.IssueCommand(status_query_cmd)
        response = json.loads(stdout)

        query_count = 1
        while True:
            state = response['state']
            if state is None:
                raise ValueError(
                    'Cannot parse response from status query command. '
                    'The state is missing. stdout is %s' % stdout)

            if state == 'RUNNABLE':
                break
            else:
                if query_count > DB_STATUS_QUERY_LIMIT:
                    raise DBStatusQueryError(
                        'DB creation timed-out, we have '
                        'waited at least %s * %s seconds.' %
                        (DB_STATUS_QUERY_INTERVAL, DB_STATUS_QUERY_LIMIT))

                logging.info(
                    'Querying db creation status, current state is %s, query '
                    'count is %d', state, query_count)
                time.sleep(DB_STATUS_QUERY_INTERVAL)

                stdout, _, _ = vm_util.IssueCommand(status_query_cmd)
                response = json.loads(stdout)
                query_count += 1

        logging.info(
            'Successfully created the DB instance. Complete response is '
            '%s', response)

        vm.db_instance_address = response['ipAddresses'][0]['ipAddress']
        logging.info('DB IP address is: %s', vm.db_instance_address)

        # Set the root password to a common one that can be referred to in common
        # code across providers.
        vm.db_instance_master_user = MYSQL_ROOT_USER
        vm.db_instance_master_password = _GenerateRandomPassword()
        set_password_cmd = [
            FLAGS.gcloud_path, 'sql', 'instances', 'set-root-password',
            vm.db_instance_name, '--password', vm.db_instance_master_password
        ]
        stdout, stderr, _ = vm_util.IssueCommand(set_password_cmd)
        logging.info('Set root password completed. Stdout:\n%s\nStderr:\n%s',
                     stdout, stderr)
Esempio n. 21
0
 def EmptyBucket(self, bucket):
   # Ignore failures here and retry in DeleteBucket.  See more comments there.
   vm_util.IssueCommand(
       ['gsutil', '-m', 'rm', '-r',
        'gs://%s/*' % bucket], raise_on_failure=False)
    def _Create(self):
        """Creates the AWS RDS instance.

    Raises:
      Exception: if unknown how to create self.spec.engine.

    """
        if self.spec.engine in _RDS_ENGINES:

            instance_identifier = self.instance_id
            self.all_instance_ids.append(instance_identifier)
            cmd = util.AWS_PREFIX + [
                'rds', 'create-db-instance',
                '--db-instance-identifier=%s' % instance_identifier,
                '--engine=%s' % self.spec.engine,
                '--master-username=%s' % self.spec.database_username,
                '--master-user-password=%s' % self.spec.database_password,
                '--allocated-storage=%s' % self.spec.disk_spec.disk_size,
                '--storage-type=%s' % self.spec.disk_spec.disk_type,
                '--db-instance-class=%s' % self.spec.vm_spec.machine_type,
                '--no-auto-minor-version-upgrade',
                '--region=%s' % self.region,
                '--engine-version=%s' % self.spec.engine_version,
                '--db-subnet-group-name=%s' % self.db_subnet_group_name,
                '--vpc-security-group-ids=%s' % self.security_group_id,
                '--availability-zone=%s' % self.spec.vm_spec.zone, '--tags'
            ] + util.MakeFormattedDefaultTags()

            if self.spec.disk_spec.disk_type == aws_disk.IO1:
                cmd.append('--iops=%s' % self.spec.disk_spec.iops)
            # TODO(ferneyhough): add backup_enabled and backup_window

            vm_util.IssueCommand(cmd)

        elif self.spec.engine in _AURORA_ENGINES:
            zones_needed_for_high_availability = len(self.zones) > 1
            if zones_needed_for_high_availability != self.spec.high_availability:
                raise Exception(
                    'When managed_db_high_availability is true, multiple '
                    'zones must be specified.  When '
                    'managed_db_high_availability is false, one zone '
                    'should be specified.   '
                    'managed_db_high_availability: {0}  '
                    'zone count: {1} '.format(
                        zones_needed_for_high_availability, len(self.zones)))

            cluster_identifier = 'pkb-db-cluster-' + FLAGS.run_uri
            # Create the cluster.
            cmd = util.AWS_PREFIX + [
                'rds', 'create-db-cluster',
                '--db-cluster-identifier=%s' % cluster_identifier,
                '--engine=%s' % self.spec.engine,
                '--engine-version=%s' % self.spec.engine_version,
                '--master-username=%s' % self.spec.database_username,
                '--master-user-password=%s' % self.spec.database_password,
                '--region=%s' % self.region,
                '--db-subnet-group-name=%s' % self.db_subnet_group_name,
                '--vpc-security-group-ids=%s' % self.security_group_id,
                '--availability-zones=%s' % self.spec.zones[0], '--tags'
            ] + util.MakeFormattedDefaultTags()

            self.cluster_id = cluster_identifier
            vm_util.IssueCommand(cmd)

            for zone in self.zones:

                # The first instance is assumed to be writer -
                # and so use the instance_id  for that id.
                if zone == self.zones[0]:
                    instance_identifier = self.instance_id
                else:
                    instance_identifier = self.instance_id + '-' + zone

                self.all_instance_ids.append(instance_identifier)

                cmd = util.AWS_PREFIX + [
                    'rds', 'create-db-instance',
                    '--db-instance-identifier=%s' % instance_identifier,
                    '--db-cluster-identifier=%s' % cluster_identifier,
                    '--engine=%s' % self.spec.engine,
                    '--engine-version=%s' % self.spec.engine_version,
                    '--no-auto-minor-version-upgrade',
                    '--db-instance-class=%s' % self.spec.vm_spec.machine_type,
                    '--region=%s' % self.region,
                    '--availability-zone=%s' % zone, '--tags'
                ] + util.MakeFormattedDefaultTags()
                vm_util.IssueCommand(cmd)

        else:
            raise Exception(
                'Unknown how to create AWS data base engine {0}'.format(
                    self.spec.engine))
Esempio n. 23
0
 def CopyToBucket(self, src_path, bucket, object_path):
   """See base class."""
   dst_url = self.MakeRemoteCliDownloadUrl(bucket, object_path)
   vm_util.IssueCommand(['gsutil', 'cp', src_path, dst_url])
Esempio n. 24
0
def GetDefaultUser():
    """Get the default project."""
    cmd = [FLAGS.gcloud_path, 'config', 'list', '--format=json']
    stdout, _, _ = vm_util.IssueCommand(cmd)
    result = json.loads(stdout)
    return result['core']['account']
Esempio n. 25
0
 def testTimeoutReached(self):
     _, _, retcode = vm_util.IssueCommand(['sleep', '2s'], timeout=1)
     self.assertEqual(retcode, -9)
    def _Create(self):
        """Creates the AWS RDS instance.

    Raises:
      Exception: if unknown how to create self.spec.engine.

    """
        if (self.spec.engine == managed_relational_db.MYSQL
                or self.spec.engine == managed_relational_db.POSTGRES):

            instance_identifier = self.instance_id
            self.all_instance_ids.append(instance_identifier)
            cmd = util.AWS_PREFIX + [
                'rds',
                'create-db-instance',
                '--db-instance-identifier=%s' % instance_identifier,
                '--engine=%s' % self.spec.engine,
                '--master-username=%s' % self.spec.database_username,
                '--master-user-password=%s' % self.spec.database_password,
                '--allocated-storage=%s' % self.spec.disk_spec.disk_size,
                '--storage-type=%s' % self.spec.disk_spec.disk_type,
                '--db-instance-class=%s' % self.spec.vm_spec.machine_type,
                '--no-auto-minor-version-upgrade',
                '--region=%s' % self.region,
                '--engine-version=%s' % self.spec.engine_version,
                '--db-subnet-group-name=%s' % self.db_subnet_group_name,
                '--vpc-security-group-ids=%s' % self.security_group_id,
            ]

            if self.spec.disk_spec.disk_type == aws_disk.IO1:
                cmd.append('--iops=%s' % self.spec.disk_spec.iops)

            if self.spec.high_availability:
                cmd.append('--multi-az')
            else:
                cmd.append('--availability-zone=%s' % self.spec.vm_spec.zone)

            # TODO(ferneyhough): add backup_enabled and backup_window

            vm_util.IssueCommand(cmd)

        elif self.spec.engine == managed_relational_db.AURORA_POSTGRES:

            cluster_identifier = 'pkb-db-cluster-' + FLAGS.run_uri
            # Create the cluster.
            cmd = util.AWS_PREFIX + [
                'rds', 'create-db-cluster',
                '--db-cluster-identifier=%s' % cluster_identifier,
                '--engine=aurora-postgresql',
                '--master-username=%s' % self.spec.database_username,
                '--master-user-password=%s' % self.spec.database_password,
                '--region=%s' % self.region,
                '--db-subnet-group-name=%s' % self.db_subnet_group_name,
                '--vpc-security-group-ids=%s' % self.security_group_id,
                '--availability-zones=%s' % self.spec.zones[0]
            ]
            self.cluster_id = cluster_identifier
            vm_util.IssueCommand(cmd)

            for zone in self.zones:

                # The first instance is assumed to be writer -
                # and so use the instance_id  for that id.
                if zone == self.zones[0]:
                    instance_identifier = self.instance_id
                else:
                    instance_identifier = self.instance_id + '-' + zone

                self.all_instance_ids.append(instance_identifier)

                cmd = util.AWS_PREFIX + [
                    'rds', 'create-db-instance',
                    '--db-instance-identifier=%s' % instance_identifier,
                    '--db-cluster-identifier=%s' % cluster_identifier,
                    '--engine=aurora-postgresql',
                    '--no-auto-minor-version-upgrade',
                    '--db-instance-class=%s' % self.spec.machine_type,
                    '--region=%s' % self.region,
                    '--availability-zone=%s' % zone
                ]
                vm_util.IssueCommand(cmd)

        else:
            raise Exception(
                'Unknown how to create AWS data base engine {0}'.format(
                    self.spec.engine))
    def _Create(self):
        """Creates the cluster."""
        name = 'pkb_' + FLAGS.run_uri

        # Set up ebs details if disk_spec is present in the config
        ebs_configuration = None
        if self.spec.worker_group.disk_spec:
            # Make sure nothing we are ignoring is included in the disk spec
            assert self.spec.worker_group.disk_spec.device_path is None
            assert self.spec.worker_group.disk_spec.disk_number is None
            assert self.spec.worker_group.disk_spec.iops is None
            self.dpb_hdfs_type = disk_to_hdfs_map[
                self.spec.worker_group.disk_spec.disk_type]
            if self.spec.worker_group.disk_spec.disk_type != disk.LOCAL:
                ebs_configuration = {
                    'EbsBlockDeviceConfigs': [{
                        'VolumeSpecification': {
                            'SizeInGB':
                            self.spec.worker_group.disk_spec.disk_size,
                            'VolumeType':
                            self.spec.worker_group.disk_spec.disk_type
                        },
                        'VolumesPerInstance':
                        self.spec.worker_group.disk_count
                    }]
                }

        # Create the specification for the master and the worker nodes
        instance_groups = []
        core_instances = {
            'InstanceCount': self.spec.worker_count,
            'InstanceGroupType': 'CORE',
            'InstanceType': self.spec.worker_group.vm_spec.machine_type
        }
        if ebs_configuration:
            core_instances.update({'EbsConfiguration': ebs_configuration})

        master_instance = {
            'InstanceCount': 1,
            'InstanceGroupType': 'MASTER',
            'InstanceType': self.spec.worker_group.vm_spec.machine_type
        }
        if ebs_configuration:
            master_instance.update({'EbsConfiguration': ebs_configuration})

        instance_groups.append(core_instances)
        instance_groups.append(master_instance)

        # Spark SQL needs to access Hive
        cmd = self.cmd_prefix + [
            'emr', 'create-cluster', '--name', name, '--release-label',
            self.dpb_version, '--use-default-roles', '--instance-groups',
            json.dumps(instance_groups), '--application', 'Name=Spark',
            'Name=Hadoop', 'Name=Hive', '--log-uri', self.base_dir
        ]

        ec2_attributes = [
            'KeyName=' +
            aws_virtual_machine.AwsKeyFileManager.GetKeyNameForRun(),
            'SubnetId=' + self.network.subnet.id,
            # Place all VMs in default security group for simplicity and speed of
            # provisioning
            'EmrManagedMasterSecurityGroup=' + self.security_group_id,
            'EmrManagedSlaveSecurityGroup=' + self.security_group_id,
        ]
        cmd += ['--ec2-attributes', ','.join(ec2_attributes)]

        if FLAGS.dpb_cluster_properties:
            cmd += ['--configurations', _GetClusterConfiguration()]

        stdout, _, _ = vm_util.IssueCommand(cmd)
        result = json.loads(stdout)
        self.cluster_id = result['ClusterId']
        logging.info('Cluster created with id %s', self.cluster_id)
        for tag_key, tag_value in util.MakeDefaultTags().items():
            self._AddTag(tag_key, tag_value)
Esempio n. 28
0
 def _Create(self):
     vm_util.IssueCommand(
         [azure.AZURE_PATH, 'network', 'vnet', 'subnet', 'create',
          '--vnet-name', self.vnet.name,
          '--address-prefix', self.vnet.address_space,
          self.name] + self.resource_group.args)
 def _Delete(self):
     if self.cluster_id:
         delete_cmd = self.cmd_prefix + [
             'emr', 'terminate-clusters', '--cluster-ids', self.cluster_id
         ]
         vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
Esempio n. 30
0
 def CopyToBucket(self, src_path, bucket, object_path):
     """See base class."""
     dst_url = self.MakeRemoteCliDownloadUrl(bucket, object_path)
     vm_util.IssueCommand(
         ['aws', 's3', 'cp', src_path, dst_url, '--region', self.region])