def _init_policy(self): """ Initialize IAM policy. This policy allow instance to: - Load FPGA bitstream. - Access to S3 buckets objects for read and write. Returns: str: 'policy' """ # Create a policy with _exception_handler(filter_error_codes='EntityAlreadyExists'): self._iam_client.create_policy(PolicyName=self._policy, PolicyDocument=_json_dumps( self.POLICY_DOCUMENT)) _get_logger().debug( _utl.gen_msg('created_named', 'policy', self._policy)) with _exception_handler(): response = self._iam_client.list_policies(Scope='Local', OnlyAttached=False, MaxItems=100) for policy_item in response['Policies']: if policy_item['PolicyName'] == self._policy: self._policy_arn = policy_item['Arn'] # 'policy' returns str is used set future object ID return 'policy' raise _exc.HostConfigurationException(gen_msg=('created_failed_named', 'IAM policy', self._policy))
def _init_block_device_mappings(self): """ Initialize block device mapping """ if self._delete_volumes_on_termination: # Get block devices for specified image with _exception_handler(): block_devices = self._ec2_resource.Image( self._image_id).block_device_mappings # Configure block devices for block_device in block_devices: ebs = block_device.get('Ebs') if ebs is None: continue # AWS don't allow both SnapshotId and Encrypted values if 'SnapshotId' in ebs and 'Encrypted' in ebs: del ebs['Encrypted'] # Force termination if not forced if not ebs.get('DeleteOnTermination', False): ebs['DeleteOnTermination'] = True _get_logger().debug( 'Enable "DeleteOnTermination" on "%s" volume.', block_device['DeviceName']) # Pass parameter if at least a disk parameter changed self._block_devices = block_devices
def _init_key_pair(self): """ Initializes key pair. """ # Checks if Key pairs exists, needs to get the full pairs list # and compare in lower case because Boto perform its checks case # sensitive and AWS use case insensitive names. with _exception_handler(): key_pairs = self._ec2_client.describe_key_pairs() name_lower = self._key_pair.lower() for key_pair in key_pairs['KeyPairs']: key_pair_name = key_pair['KeyName'] if key_pair_name.lower() == name_lower: self._key_pair = key_pair_name return # Key does not exist on the CSP, create it with _exception_handler(): key_pair = self._ec2_resource.create_key_pair( KeyName=self._key_pair) _utl.create_key_pair_file(self._key_pair, key_pair.key_material) _get_logger().debug( _utl.gen_msg("created_named", "key pair", self._key_pair))
def _attach_instance_profile_role(self): """ Attach IAM role to IAM instance profile. """ # Attach role to instance profile with _exception_handler(filter_error_codes='LimitExceeded'): with _utl.Timeout(self.TIMEOUT, sleep=self._TIMEOUT_SLEEP) as timeout: while True: try: self._iam_client.add_role_to_instance_profile( InstanceProfileName=self._instance_profile_name, RoleName=self._role) break # Some time, instance_profile is not ready immediately except _ClientError as exception: if (exception.response['Error']['Code'] == 'NoSuchEntityException'): if timeout.reached(): raise _exc.HostRuntimeException(gen_msg=( 'timeout', 'IAM instance_profile and role attachment' )) continue raise _get_logger().debug( _utl.gen_msg('attached_to', 'IAM role', self._role, 'IAM instance profile', self._instance_profile_name))
def _call(command, check_file=None, **exc_args): """ Call command in subprocess. Args: command (list or tuple of str): Command to call. check_file (str): Returns file content in exception if exists. exc_args: Extra arguments for exception to raise if error. Raises: apyfal.exceptions.ClientRuntimeException: Error while calling command. """ _get_logger().debug("Running shell command: '%s'" % ' '.join(command)) try: process = _Popen(command, stdout=_PIPE, stderr=_PIPE, universal_newlines=True) outputs = list(process.communicate()) in_error = process.returncode except OSError as exception: in_error = True outputs = [str(exception)] if in_error: if check_file and _exists(check_file): with open(check_file, 'rt') as file: outputs.append(file.read()) raise _exc.ClientRuntimeException(exc='\n'.join( [command if isinstance(command, str) else ' '.join(command)] + [output for output in outputs if output]), **exc_args)
def start(self, src=None, info_dict=None, host_env=None, reload=None, reset=None, **parameters): """ Configures accelerator. Args: src (path-like object or file-like object): Depending on the accelerator, a configuration data need to be loaded before a process can be run. Path-like object can be path, URL or cloud object URL. parameters (str, path-like object or dict): Accelerator configuration specific parameters Can also be a full configuration parameters dictionary (Or JSON equivalent as str literal or apyfal.storage URL to file) Parameters dictionary override default configuration values, individuals specific parameters overrides parameters dictionary values. Take a look to accelerator documentation for more information on possible parameters. Path-like object can be path, URL or cloud object URL. reload (bool): Force reload of FPGA bitstream. reset (bool): Force reset of FPGA logic. info_dict (dict or None): If a dict passed, this dict is updated with extra information from current operation. host_env (dict): Overrides Accelerator "env". """ self._stopped = False _get_logger().info("Configuring accelerator...") # Configure start parameters = self._get_parameters(parameters, self._configuration_parameters) parameters['env'].update(host_env or dict()) # Set FPGA reset and reload options if reload is not None: parameters['app']['reload'] = reload if reset is not None: parameters['app']['reset'] = reset # Handle files with self._data_file(src, parameters, ('src', 'datafile'), mode='rb') as src: # Starts response = self._start(src, parameters) # Check response status self._raise_for_status(response, "Failed to configure accelerator: ") _get_logger().info("Accelerator ready") # Update info dict if info_dict is not None and response: _utl.recursive_update(info_dict, response)
def _attach_role_policy(self): """ Attach IAM policy to IAM role. """ with _exception_handler(filter_error_codes='EntityAlreadyExists'): self._iam_client.attach_role_policy(PolicyArn=self._policy_arn, RoleName=self._role) _get_logger().debug( _utl.gen_msg('attached_to', 'IAM policy', self._policy_arn, 'IAM role', self._role))
def _attach_role_policy(self): """ Attach RAM policy to RAM role. """ if self._request( 'AttachPolicyToRole', domain='ram', PolicyType='Custom', PolicyName=self._policy, RoleName=self._role, error_code_ignore='EntityAlreadyExists'): _get_logger().debug(_utl.gen_msg( 'attached_to', 'RAM policy', self._policy, 'RAM role', self._role))
def _init_instance_profile(self): """ Initialize IAM instance profile. This instance_profile allow to perform actions defined by role. """ # Create instance profile with _exception_handler(filter_error_codes='EntityAlreadyExists'): self._iam_client.create_instance_profile( InstanceProfileName=self._instance_profile_name) _get_logger().debug( _utl.gen_msg('created_named', 'IAM instance profile', self._instance_profile_name))
def _init_security_group(self): """ Initialize security group. """ # Checks if security group exists security_group_id = None security_group_name = self._security_group.lower() with _exception_handler(gen_msg=('no_find', "security groups")): for security_group in self._neutron_client.list_security_groups( )['security_groups']: if security_group['name'].lower() == security_group_name: self._security_group = security_group['name'] security_group_id = security_group['id'] # Create security group if not exists if security_group_id is None: with _exception_handler(gen_msg=('created_failed', "security groups")): security_group_id = self._neutron_client.create_security_group( { 'security_group': { 'name': self._security_group, 'description': _utl.gen_msg('accelize_generated'), } })['security_group']['id'] _get_logger().debug( _utl.gen_msg('created_named', 'security group', self._security_group)) # Verify rules associated to security group for host IP address public_ip = _utl.get_host_public_ip() # Create rule on SSH and HTTP for port in self.ALLOW_PORTS: with _exception_handler(filter_error_codes=(409, )): self._neutron_client.create_security_group_rule({ 'security_group_rule': { 'direction': 'ingress', 'port_range_min': str(port), 'port_range_max': str(port), 'remote_ip_prefix': public_ip, 'protocol': 'tcp', 'security_group_id': security_group_id } }) _get_logger().debug( _utl.gen_msg('authorized_ip', public_ip, self._security_group))
def _init_role(self): """ Initialize IAM role. This role allow to perform actions defined by policy. """ with _exception_handler(filter_error_codes='EntityAlreadyExists'): role = self._iam_resource.create_role( RoleName=self._role, AssumeRolePolicyDocument=_json_dumps( self.ASSUME_ROLE_POLICY_DOCUMENT), Description=_utl.gen_msg('accelize_generated')) _get_logger().debug(_utl.gen_msg('created_named', 'IAM role', role))
def _init_role(self): """ Initialize RAM role. This role allow to perform actions defined by policy. """ if self._request( 'CreateRole', domain='ram', RoleName=self._role, AssumeRolePolicyDocument=_json.dumps( self.ASSUME_ROLE_POLICY_DOCUMENT), Description=_utl.gen_msg('accelize_generated'), error_code_ignore='EntityAlreadyExists'): _get_logger().debug(_utl.gen_msg( 'created_named', 'RAM role', self._role))
def _init_policy(self): """ Initialize RAM policy. This policy allow instance to: - Load FPGA bitstream. - Access to OSS buckets objects for read and write. """ if self._request( 'CreatePolicy', domain='ram', PolicyName=self._policy, PolicyDocument=_json.dumps(self.POLICY_DOCUMENT), Description=_utl.gen_msg('accelize_generated'), error_code_ignore='EntityAlreadyExists'): _get_logger().debug(_utl.gen_msg( 'created_named', 'RAM policy', self._policy))
def _wait_instance_boot(self): """ Waits until instance has booted and webservice is OK Raises: apyfal.exceptions.HostRuntimeException: Timeout while booting. """ if not self.ALLOW_PORTS or _utl.check_port(self.host_ip, 80): # Avoid to show message if already booted or not return _get_logger().info("Waiting instance boot...") _sleep(self._TIMEOUT_SLEEP) if not _utl.check_port( self.host_ip, 80, timeout=self.TIMEOUT, sleep=self._TIMEOUT_SLEEP): raise _exc.HostRuntimeException(gen_msg=('timeout', "boot"))
def _init_security_group(self): """ Initialize security group. """ response = self._request('DescribeSecurityGroups') # Checks if security group exists lower_name = self._security_group.lower() for security_group in response['SecurityGroups']['SecurityGroup']: security_group_name = security_group['SecurityGroupName'] if security_group_name.lower() == lower_name: # Update security group name self._security_group = security_group_name self._security_group_id = security_group['SecurityGroupId'] break # Creates security group if not exists if not self._security_group_id: response = self._request( 'CreateSecurityGroup', SecurityGroupName=self._security_group, Description=_utl.gen_msg('accelize_generated')) self._security_group_id = response['SecurityGroupId'] _get_logger().debug(_utl.gen_msg( 'created_named', 'security group', self._security_group_id)) # Adds host IP to security group if not already done public_ip = _utl.get_host_public_ip() rules = list() for port in self.ALLOW_PORTS: rules.append(dict( Priority=1, IpProtocol='tcp', PortRange='%s/%s' % (port, port), SourceCidrIp=public_ip)) for rule in rules: self._request( 'AuthorizeSecurityGroup', SecurityGroupId=self._security_group_id, **rule) _get_logger().debug( _utl.gen_msg('authorized_ip', public_ip, self._security_group))
def _update_configuration_files(config_env, update_config, update_credentials): """ Updates configuration files values. Args: config_env (dict): environment. update_config (bool): Update configuration file. update_credentials (bool): Update credentials file. """ # Credentials if update_credentials: with open(_cfg.METERING_CREDENTIALS, 'wt') as credential_file: _json.dump( { key: config_env[key] for key in ('client_id', 'client_secret') }, credential_file) # Configuration if update_config: # Fix 1.0.0 Backward compatibility if 'fpgaimage' not in config_env: try: config_env['fpgaimage'] = config_env['AGFI'] except KeyError: pass # update configuration with open(_cfg.METERING_CLIENT_CONFIG, 'wt') as config_file: config_content = '\n'.join('%s=%s' % (key, config_env[key]) for key in config_env if key not in ('client_id', 'client_secret')) config_file.write(config_content) _get_logger().debug("Setting configuration:\n%s" % config_content.replace('\n', ' \n'))
def _init_key_pair(self): """ Initializes key pair. Returns: bool: True if reuses existing key """ # Get key pair from CSP is exists key_pair_name = self._key_pair.lower() with _exception_handler(gen_msg=('no_find', "key pair")): for key_pair in self._nova_client.keypairs.list(): if key_pair.name.lower() == key_pair_name: self._key_pair = key_pair.name return # Create key pair if not exists with _exception_handler(gen_msg=('created_failed', "key pair")): key_pair = self._nova_client.keypairs.create_keypair( name=self._key_pair) _utl.create_key_pair_file(self._key_pair, key_pair.private_key) _get_logger().debug( _utl.gen_msg("created_named", "key pair", self._key_pair))
def _wait_instance_ready(self): """ Waits until instance is ready. """ warned = False # Waiting for the instance provisioning with _utl.Timeout(self.TIMEOUT, sleep=self._TIMEOUT_SLEEP) as timeout: while True: # Get instance status status = self._status() if status == self.STATUS_RUNNING: return elif status == self.STATUS_ERROR: raise _exc.HostRuntimeException( gen_msg=('unable_to_status', "provision", status)) elif timeout.reached(): raise _exc.HostRuntimeException(gen_msg=('timeout_status', "provisioning", status)) elif not warned: # Avoid to show message if already booted warned = True _get_logger().info("Waiting instance provisioning...")
def _log_profiling_info(info_dict): """ Shows profiling and specific information in logger. Args: info_dict (dict): info_dict from AcceleratorClient.process """ # Handle profiling info try: profiling = info_dict['app']['profiling'] except (KeyError, TypeError): return None logger = _get_logger() logger.info("Profiling information from result:") # Compute and show information only on DEBUG level values = dict() for key in ('wall-clock-time', 'fpga-elapsed-time', 'total-bytes-written', 'total-bytes-read'): try: values[key] = float(profiling[key]) except KeyError: pass total_bytes = (values.get('total-bytes-written', 0.0) + values.get('total-bytes-read', 0.0)) global_time = values.get('wall-clock-time', 0.0) fpga_time = values.get('fpga-elapsed-time', 0.0) if global_time > 0.0: logger.info('- Wall clock time: %.3fs' % global_time) if fpga_time > 0.0: logger.info('- FPGA elapsed time: %.3fs' % fpga_time) if total_bytes > 0.0 and global_time > 0.0: logger.info("- Server processing bandwidths: %.1f MB/s", total_bytes / global_time / 1024.0 / 1024.0) if total_bytes > 0.0 and fpga_time > 0.0: logger.info("- FPGA processing bandwidths: %.1f MB/s", total_bytes / fpga_time / 1024.0 / 1024.0)
def stop(self, stop_mode=None): """ Stop instance accordingly with the current stop_mode. See "stop_mode" property for more information. Args: stop_mode (str or int): If not None, override current "stop_mode" value. """ # No instance to stop (Avoid double call with __exit__ + __del__) if self._instance_id is None: return # Define stop mode if stop_mode is None: stop_mode = self._stop_mode # Keep instance alive if stop_mode == 'keep': if not self._warn_keep_once: self._warn_keep_once = True _get_logger().info("Instance '%s' is still running" % self.instance_id) return # Checks if instance to stop try: # Force instance update self._instance = self._get_instance() # Checks status self._status() except _exc.HostRuntimeException: return # Terminates and delete instance completely if stop_mode == 'term': self._terminate_instance() _get_logger().info("Instance '%s' has been terminated", self._instance_id) # Pauses instance and keep it alive else: self._pause_instance() _get_logger().info("Instance '%s' has been stopped", self._instance_id) # Detaches from instance self._instance_id = None self._instance = None
def process(self, src=None, dst=None, info_dict=None, **parameters): """ Processes with accelerator. Args: src (path-like object or file-like object): Source data to process. Path-like object can be path, URL or cloud object URL. dst (path-like object or file-like object): Processed data destination. Path-like object can be path, URL or cloud object URL. parameters (path-like object, str or dict): Accelerator process specific parameters Can also be a full process parameters dictionary (Or JSON equivalent as str literal) Parameters dictionary override default configuration values, individuals specific parameters overrides parameters dictionary values. Take a look to accelerator documentation for more information on possible parameters. Path-like object can be path, URL or cloud object URL. info_dict (dict or None): If a dict passed, this dict is updated with extra information from current operation. Returns: Result from process operation, depending used accelerator. """ _enable_logger = _get_logger().isEnabledFor(20) if _enable_logger and info_dict is None: info_dict = dict() # Process file with accelerator process_result = self._client.process( src=src, dst=dst, info_dict=info_dict, **parameters) if _enable_logger: self._log_profiling_info(info_dict) return process_result
def _init_security_group(self): """ Initialize security group. """ # Get list of security groups # Checks if Key pairs exists, like for key pairs # needs case insensitive names check with _exception_handler(): security_groups = self._ec2_client.describe_security_groups() name_lower = self._security_group.lower() group_exists = False security_group_id = '' for security_group in security_groups['SecurityGroups']: group_name = security_group['GroupName'] if group_name.lower() == name_lower: # Update name self._security_group = group_name # Get group ID security_group_id = security_group['GroupId'] # Mark as existing group_exists = True break # Try to create security group if not exist if not group_exists: # Get VPC with _exception_handler(): vpc_id = self._ec2_client.describe_vpcs().get('Vpcs', [{}])[0].get( 'VpcId', '') with _exception_handler(): response = self._ec2_client.create_security_group( GroupName=self._security_group, Description=_utl.gen_msg('accelize_generated'), VpcId=vpc_id) # Get group ID security_group_id = response['GroupId'] _get_logger().debug( _utl.gen_msg('created_named', 'security group', security_group_id)) # Add host IP to security group if not already done public_ip = _utl.get_host_public_ip() ip_permissions = [] for port in self.ALLOW_PORTS: ip_permissions.append({ 'IpProtocol': 'tcp', 'FromPort': port, 'ToPort': port, 'IpRanges': [{ 'CidrIp': public_ip }] }) with _exception_handler( filter_error_codes='InvalidPermission.Duplicate'): self._ec2_client.authorize_security_group_ingress( GroupId=security_group_id, IpPermissions=ip_permissions) _get_logger().debug( _utl.gen_msg('authorized_ip', public_ip, self._security_group))
def start(self, accelerator=None, accel_parameters=None, stop_mode=None, image_id=None, instance_type=None): """ Start instance if not already started. Create instance if necessary. Needs "accel_client" or "accel_parameters". Args: accelerator (str): Name of the accelerator. accel_parameters (dict): Can override parameters from accelerator client. image_id (str): Force the use of specified image ID. instance_type (str): Force the use of specified instance type. stop_mode (str or int): See "stop_mode" property for more information. """ # Updates stop mode self.stop_mode = stop_mode # Starts instance only if not already started if self._host_ip is None: # Get parameters from accelerator self._set_accelerator_requirements( accelerator=accelerator, accel_parameters=accel_parameters, image_id=image_id, instance_type=instance_type) # Checks CSP credential self._check_credential() # Creates and starts instance if not exists if self.instance_id is None: _get_logger().info("Configuring host on %s instance...", self._host_type) with self._stop_silently_on_exception(): self._create_instance() with self._stop_silently_on_exception(): self._instance, self._instance_id = \ self._start_new_instance() _get_logger().debug( _utl.gen_msg('created_named', 'instance', self._instance_id)) # If exists, starts it directly else: self._start_existing_instance(self._status()) # Waiting for instance provisioning with self._stop_silently_on_exception(): self._wait_instance_ready() # Update instance URL self._host_ip = self.host_ip self._url = _utl.format_url(self._host_ip, force_secure=bool(self._ssl_cert_crt)) # Waiting for the instance to boot self._wait_instance_boot() _get_logger().info("Host ready") # If Host IP exists exists, checks if reachable elif self.ALLOW_PORTS and not _utl.check_port(self.host_ip, 80): raise _exc.HostRuntimeException(gen_msg=('unable_reach_port', self.host_ip, 80))