def aws_start(self): """AWS start Start a VM in AWS. """ try: self.ec2c.start_instances( InstanceIds=[self.dataset_obj['aws_instance_id']], DryRun=True) except ClientError as e: if 'DryRunOperation' not in str(e): raise try: response = self.ec2c.start_instances( InstanceIds=[self.dataset_obj['aws_instance_id']], DryRun=False) current_state = ( response['StartingInstances'][0]['CurrentState']['Code']) log.debug(response) if current_state and current_state == AWS_RETURN_CODES['running']: log.info('{} is already running.'.format( self.dataset_obj['hostname'])) return except ClientError as e: raise VMError(e) host_up = wait_until( str(self.dataset_obj['intern_ip']), waitmsg='Waiting for SSH to respond', ) if not host_up: raise VMError('The server is not reachable with SSH')
def aws_disk_set(self, size: int, timeout_disk_resize: int = 60) -> None: """AWS disk set Resize a disk in AWS. :param: size: New disk_size :param: timeout_disk_resize: Timeout to for disk resizing within VM :raises: VMError: Generic exception for VM errors of all kinds """ ec2 = boto3.resource('ec2') response = ec2.Instance(self.dataset_obj['aws_instance_id']) for vol in response.volumes.all(): volume_id = vol.id break ec2 = boto3.client('ec2') ec2.modify_volume(VolumeId=volume_id, Size=int(size)) partition = self.run('findmnt -nro SOURCE /') disk = self.run('lsblk -nro PKNAME {}'.format(partition)) new_disk_size = self.run('lsblk -bdnro size /dev/{}'.format(disk)) new_disk_size_gib = int(new_disk_size) / 1024 / 1024 / 1024 while timeout_disk_resize and size != new_disk_size_gib: timeout_disk_resize -= 1 time.sleep(1) new_disk_size = self.run('lsblk -bdnro size /dev/{}'.format(disk)) new_disk_size_gib = int(new_disk_size) / 1024 / 1024 / 1024 if timeout_disk_resize == 0: raise VMError('Timeout for disk resize reached') with settings( host_string=self.dataset_obj['hostname'], warn_only=True, ): disk_resize = self.run('growpart /dev/{} 1'.format(disk)) if disk_resize.succeeded: fs_resize = self.run('resize2fs {}'.format(partition)) if fs_resize.succeeded: log.info('successfully resized disk of {} to {}GB'.format( self.dataset_obj['hostname'], size)) return raise VMError('disk resize for {} failed'.format( self.dataset_obj['hostname']))
def run_puppet(self, clear_cert=False, debug=False): """Runs Puppet in chroot on the hypervisor.""" if clear_cert: clean_cert(self.dataset_obj) if self.dataset_obj['datacenter_type'] == 'kvm.dct': self.block_autostart() puppet_command = ( '( /opt/puppetlabs/puppet/bin/puppet agent ' '--detailed-exitcodes ' '--fqdn={} --server={} --ca_server={} ' '--no-report --waitforcert=60 --onetime --no-daemonize ' '--skip_tags=chroot_unsafe --verbose{} ) ;' '[ $? -eq 2 ]'.format( self.fqdn, self.dataset_obj['puppet_master'], self.dataset_obj['puppet_ca'], ' --debug' if debug else '', )) try: self.run(puppet_command) except RemoteCommandError as e: raise VMError('Initial puppetrun failed') from e self.unblock_autostart()
def run_puppet(self, clear_cert=False, debug=False): """Runs Puppet in chroot on the hypervisor.""" if clear_cert: with settings( host_string=self.dataset_obj['puppet_ca'], user='******', warn_only=True, ): run( '/usr/bin/puppet cert clean {}'.format(self.fqdn), shell=False, ) self.block_autostart() puppet_command = ( '( /opt/puppetlabs/puppet/bin/puppet agent ' '--detailed-exitcodes ' '--fqdn={} --server={} --ca_server={} ' '--no-report --waitforcert=60 --onetime --no-daemonize ' '--skip_tags=chroot_unsafe --verbose{} ) ;' '[ $? -eq 2 ]'.format( self.fqdn, self.dataset_obj['puppet_master'], self.dataset_obj['puppet_ca'], ' --debug' if debug else '', )) try: self.run(puppet_command) except RemoteCommandError as e: raise VMError('Initial puppetrun failed') from e self.unblock_autostart()
def start(self, force_stop_failed=True, transaction=None): self.hypervisor.start_vm(self) if not self.wait_for_running(running=True): raise VMError('VM did not come online in time') host_up = wait_until( str(self.dataset_obj['intern_ip']), waitmsg='Waiting for SSH to respond', ) if not host_up and force_stop_failed: # If there is a network or booting error VM must be destroyed # if starting has failed. self.hypervisor.stop_vm_force(self) if not host_up: raise VMError('The server is not reachable with SSH') if transaction: transaction.on_rollback('stop VM', self.shutdown)
def memory_free(self): meminfo = self.meminfo() if 'MemAvailable' in meminfo: kib_free = parse_size(meminfo['MemAvailable'], 'K') # MemAvailable might not be present on old systems elif 'MemFree' in meminfo: kib_free = parse_size(meminfo['MemFree'], 'K') else: raise VMError('/proc/meminfo contains no parsable entries') return round(float(kib_free) / 1024, 2)
def aws_delete(self): """AWS delete Delete a VM in AWS. """ try: response = self.ec2c.terminate_instances( InstanceIds=[self.dataset_obj['aws_instance_id']]) log.debug(response) except ClientError as e: raise VMError(e)
def aws_delete(self): """AWS delete Delete a VM in AWS. """ ec2 = boto3.client('ec2') try: response = ec2.terminate_instances( InstanceIds=[self.dataset_obj['aws_instance_id']]) log.info(response) except ClientError as e: raise VMError(e)
def aws_vpc(self) -> Vpc: if self.__vpc: return self.__vpc for vpc in self.ec2r.vpcs.filter( Filters=[{ 'Name': 'vpc-id', 'Values': [self.dataset_obj['aws_vpc_id']], }]): self.__vpc = vpc if self.__vpc is None: raise VMError("Can't find VPC for this VM!") return self.__vpc
def aws_shutdown(self, timeout: int = 120) -> None: """AWS shutdown Shutdown a VM in AWS. :param: timeout: Timeout value for VM shutdown """ ec2 = self.aws_session.client('ec2') try: ec2.stop_instances( InstanceIds=[self.dataset_obj['aws_instance_id']], DryRun=True) except ClientError as e: if 'DryRunOperation' not in str(e): raise try: response = ec2.stop_instances( InstanceIds=[self.dataset_obj['aws_instance_id']], DryRun=False) current_state = response['StoppingInstances'][0]['CurrentState'][ 'Code'] log.debug(response) if current_state and current_state == AWS_RETURN_CODES['stopped']: log.info('{} is already stopped.'.format( self.dataset_obj['hostname'])) except ClientError as e: raise VMError(e) for retry in range(timeout): if AWS_RETURN_CODES[ 'stopped'] == self.aws_describe_instance_status( self.dataset_obj['aws_instance_id']): log.info('"{}" is stopped.'.format( self.dataset_obj['hostname'])) break log.info('Waiting for VM "{}" to shutdown'.format( self.dataset_obj['hostname'])) time.sleep(1)
def aws_disk_set(self, size: int, timeout_disk_resize: int = 60) -> None: """AWS disk set Resize a disk in AWS. :param: size: New disk_size :param: timeout_disk_resize: Timeout to for disk resizing within VM :raises: VMError: Generic exception for VM errors of all kinds """ if size < self.dataset_obj['disk_size_gib']: raise NotImplementedError('Cannot shrink the disk.') response = self.ec2r.Instance(self.dataset_obj['aws_instance_id']) for vol in response.volumes.all(): volume_id = vol.id break try: volume_state = self.ec2c.describe_volumes_modifications( VolumeIds=[volume_id])['VolumesModifications'][0] if volume_state['ModificationState'] == 'optimizing': raise VMError('disk resize already in progress ' 'for {} (state: {})'.format( self.dataset_obj['hostname'], volume_state['ModificationState'])) except ClientError: log.debug('First disk resize of {} ({}) - ' 'no modification state available in AWS'.format( self.dataset_obj['hostname'], volume_id)) pass self.ec2c.modify_volume(VolumeId=volume_id, Size=int(size)) partition = self.run('findmnt -nro SOURCE /') disk = self.run('lsblk -nro PKNAME {}'.format(partition)) new_disk_size = self.run('lsblk -bdnro size /dev/{}'.format(disk)) new_disk_size_gib = int(new_disk_size) / 1024 / 1024 / 1024 while timeout_disk_resize and size != new_disk_size_gib: timeout_disk_resize -= 1 time.sleep(1) new_disk_size = self.run('lsblk -bdnro size /dev/{}'.format(disk)) new_disk_size_gib = int(new_disk_size) / 1024 / 1024 / 1024 if timeout_disk_resize == 0: raise VMError('Timeout for disk resize reached') with settings( host_string=self.dataset_obj['hostname'], warn_only=True, ): disk_resize = self.run('growpart /dev/{} 1'.format(disk)) if disk_resize.succeeded: fs_resize = self.run('resize2fs {}'.format(partition)) if fs_resize.succeeded: log.info('successfully resized disk of {} to {}GB'.format( self.dataset_obj['hostname'], size)) return raise VMError('disk resize for {} failed'.format( self.dataset_obj['hostname']))
def aws_build(self, run_puppet: bool = True, debug_puppet: bool = False, postboot: Optional[str] = None, timeout_vm_setup: int = 300, timeout_cloud_init: int = 1200) -> None: """AWS build Build a VM in AWS. :param: run_puppet: Run puppet (incl. cert clean) after VM creation :param: debug_puppet: Run puppet in debug mode :param: postboot: cloudinit configuration put as userdata :param: timeout_vm_setup: Timeout value for the VM creation :param: timeout_cloud_init: Timeout value for the cloudinit provisioning :raises: VMError: Generic exception for VM errors of all kinds """ vm_types_overview = self.aws_get_instances_overview() if vm_types_overview: vm_types = self.aws_get_fitting_vm_types(vm_types_overview) else: vm_types = [self.dataset_obj['aws_instance_type']] root_device = list( self.ec2r.images.filter( ImageIds=[self.dataset_obj['aws_image_id'] ]))[0].root_device_name disk_size_gib = self.dataset_obj['disk_size_gib'] for vm_type in vm_types: try: response = self.ec2c.run_instances( BlockDeviceMappings=[{ 'DeviceName': root_device, 'Ebs': { 'VolumeSize': (disk_size_gib if disk_size_gib > 8 else 8), 'VolumeType': 'gp2' } }], ImageId=self.dataset_obj['aws_image_id'], InstanceType=vm_type, KeyName=self.dataset_obj['aws_key_name'], SecurityGroupIds=[self.consolidated_sg.id], SubnetId=self.dataset_obj['aws_subnet_id'], Placement={ 'AvailabilityZone': str(self.dataset_obj['aws_placement']) }, PrivateIpAddress=str(self.dataset_obj['intern_ip']), Ipv6Addresses=[{ 'Ipv6Address': str(self.dataset_obj['primary_ip6']) }], UserData='' if postboot is None else postboot, TagSpecifications=[ { 'ResourceType': 'instance', 'Tags': [ { 'Key': 'Name', 'Value': self.dataset_obj['hostname'], }, ] }, ], DryRun=False, MinCount=1, MaxCount=1, ) log.debug(response) self.dataset_obj['aws_instance_type'] = vm_type break except ClientError as e: raise VMError(e) except CapacityNotAvailableError as e: continue if run_puppet: self.run_puppet(clear_cert=True, debug=debug_puppet) self.dataset_obj['aws_instance_id'] = response['Instances'][0][ 'InstanceId'] log.info('waiting for {} to be started'.format( self.dataset_obj['hostname'])) vm_setup = tqdm.tqdm(total=timeout_vm_setup, desc='vm_setup', position=0) cloud_init = tqdm.tqdm(total=timeout_cloud_init, desc='cloud_init', position=1) # Wait for AWS to declare the VM running while (timeout_vm_setup and AWS_RETURN_CODES['running'] != self.aws_describe_instance_status( self.dataset_obj['aws_instance_id'])): vm_setup.update(1) timeout_vm_setup -= 1 time.sleep(1) vm_setup.update(timeout_vm_setup) # TODO: Handle overrun timeout # Try to provision the VM with cloudinit for retry in range(timeout_cloud_init): cloud_init.update(1) # Only try to connect every 20s to avoid paramiko exceptions if retry % 20 != 0: time.sleep(1) continue with settings( hide('aborts'), host_string=self.dataset_obj['hostname'], warn_only=True, abort_on_prompts=True, ): try: if run('find /var/lib/cloud/instance/boot-finished', quiet=True).succeeded: cloud_init.update(timeout_cloud_init - retry - 1) break except (SystemExit, NetworkError): time.sleep(1) # TODO: Handle overrun timeout self.create_ssh_keys() log.info('"{}" is successfully built in AWS.'.format(self.fqdn))
def aws_build(self, run_puppet: bool = True, debug_puppet: bool = False, postboot: Optional[str] = None, timeout_vm_setup: int = 300, timeout_cloud_init: int = 600) -> None: """AWS build Build a VM in AWS. :param: run_puppet: Run puppet (incl. cert clean) after VM creation :param: debug_puppet: Run puppet in debug mode :param: postboot: cloudinit configuration put as userdata :param: timeout_vm_setup: Timeout value for the VM creation :param: timeout_cloud_init: Timeout value for the cloudinit provisioning :raises: VMError: Generic exception for VM errors of all kinds """ ec2 = boto3.client('ec2') try: response = ec2.run_instances( ImageId=self.dataset_obj['aws_image_id'], InstanceType=self.dataset_obj['aws_instance_type'], KeyName=self.dataset_obj['aws_key_name'], SecurityGroupIds=list( self.dataset_obj['aws_security_group_ids']), SubnetId=self.dataset_obj['aws_subnet_id'], Placement={ 'AvailabilityZone': self.dataset_obj['aws_placement'] }, PrivateIpAddress=str(self.dataset_obj['intern_ip']), UserData='' if postboot is None else postboot, TagSpecifications=[ { 'ResourceType': 'instance', 'Tags': [ { 'Key': 'Name', 'Value': self.dataset_obj['hostname'], }, ] }, ], DryRun=False, MinCount=1, MaxCount=1) log.info(response) except ClientError as e: raise VMError(e) if run_puppet: self.run_puppet(clear_cert=True, debug=debug_puppet) self.dataset_obj['aws_instance_id'] = response['Instances'][0][ 'InstanceId'] log.info('waiting for {} to be started'.format( self.dataset_obj['hostname'])) vm_setup = tqdm.tqdm(total=timeout_vm_setup, desc='vm_setup', position=0) cloud_init = tqdm.tqdm(total=timeout_cloud_init, desc='cloud_init', position=1) # Wait for AWS to declare the VM running while (timeout_vm_setup and AWS_RETURN_CODES['running'] != self.aws_describe_instance_status( self.dataset_obj['aws_instance_id'])): vm_setup.update(1) timeout_vm_setup -= 1 time.sleep(1) vm_setup.update(timeout_vm_setup) # TODO: Handle overrun timeout # Try to provision the VM with cloudinit for retry in range(timeout_cloud_init): cloud_init.update(1) # Only try to connect every 10s to avoid paramiko exceptions if retry % 10 != 0: time.sleep(1) continue with settings( hide('aborts'), host_string=self.dataset_obj['hostname'], warn_only=True, abort_on_prompts=True, ): try: if run('find /var/lib/cloud/instance/boot-finished', quiet=True).succeeded: cloud_init.update(timeout_cloud_init - retry - 1) break except (SystemExit, NetworkError): time.sleep(1) # TODO: Handle overrun timeout self.create_ssh_keys() log.info('"{}" is successfully built in AWS.'.format(self.fqdn))