def __init__(self, standalone_config): self.config = standalone_config self.backend_name = self.config['backend'] self.runtime = self.config['runtime'] self.is_lithops_worker = is_lithops_worker() self.start_timeout = self.config.get('start_timeout', 300) self.auto_dismantle = self.config.get('auto_dismantle') self.hard_dismantle_timeout = self.config.get('hard_dismantle_timeout') self.soft_dismantle_timeout = self.config.get('soft_dismantle_timeout') try: module_location = 'lithops.standalone.backends.{}'.format( self.backend_name) sb_module = importlib.import_module(module_location) StandaloneBackend = getattr(sb_module, 'StandaloneBackend') self.backend = StandaloneBackend(self.config[self.backend_name]) except Exception as e: logger.error("There was an error trying to create the " "{} standalone backend".format(self.backend_name)) raise e self.log_monitors = {} self.ssh_credentials = self.backend.get_ssh_credentials() self.ip_address = self.backend.get_ip_address() from lithops.util.ssh_client import SSHClient self.ssh_client = SSHClient(self.ssh_credentials) logger.debug("Standalone handler created successfully")
def get_ssh_client(self): """ Creates an ssh client against the VM only if the Instance is the master """ if self.ip_address: if not self.ssh_client: self.ssh_client = SSHClient(self.ip_address, self.ssh_credentials) return self.ssh_client
def get_ssh_client(self): """ Creates an ssh client against the VM only if the Instance is the master """ if self.public: if not self.ssh_client or self.ssh_client.ip_address != self.public_ip: self.ssh_client = SSHClient(self.public_ip, self.ssh_credentials) else: if not self.ssh_client or self.ssh_client.ip_address != self.private_ip: self.ssh_client = SSHClient(self.private_ip, self.ssh_credentials) return self.ssh_client
class VMInstance: def __init__(self, config): self.ip_address = self.config['ip_address'] self.ssh_client = None self.ssh_credentials = { 'username': self.config.get('ssh_user', 'root'), 'password': self.config.get('ssh_password', None), 'key_filename': self.config.get('ssh_key_filename', None) } logger.debug('{} created'.format(self)) def __str__(self): return 'VM instance {}'.format(self.ip_address) def get_ssh_client(self): """ Creates an ssh client against the VM only if the Instance is the master """ if self.ip_address: if not self.ssh_client: self.ssh_client = SSHClient(self.ip_address, self.ssh_credentials) return self.ssh_client def del_ssh_client(self): """ Deletes the ssh client """ if self.ssh_client: self.ssh_client.close() self.ssh_client = None def create(self): pass def start(self): pass def stop(self): pass def delete(self): pass
def get_ssh_client(self): """ Creates an ssh client against the VM only if the Instance is the master """ if not self.validated and self.public and self.instance_id: # validate that private ssh key in ssh_credentials is a pair of public key on instance key_filename = self.ssh_credentials['key_filename'] key_filename = os.path.abspath(os.path.expanduser(key_filename)) if not os.path.exists(key_filename): raise LithopsValidationError( f"Private key file {key_filename} doesn't exist") initialization_data = self.ibm_vpc_client.get_instance_initialization( self.instance_id).get_result() private_res = paramiko.RSAKey(filename=key_filename).get_base64() key = None names = [] for k in initialization_data['keys']: public_res = self.ibm_vpc_client.get_key( k['id']).get_result()['public_key'].split(' ')[1] if public_res == private_res: self.validated = True break else: names.append(k['name']) if not self.validated: raise LithopsValidationError( f"No public key from keys: {names} on master {self} not a pair for private ssh key {key_filename}" ) if self.private_ip or self.public_ip: if not self.ssh_client: self.ssh_client = SSHClient(self.public_ip or self.private_ip, self.ssh_credentials) return self.ssh_client
def run_job_on_worker(worker_info, call_ids_range, job_payload): """ Install all the Lithops dependencies into the worker. Runs the job """ instance_name, ip_address, instance_id = worker_info logger.info('Going to setup {}, IP address {}'.format( instance_name, ip_address)) ssh_client = SSHClient(ip_address, STANDALONE_SSH_CREDNTIALS) wait_instance_ready(ssh_client) # upload zip lithops package logger.info('Uploading lithops files to VM instance {}'.format(ip_address)) ssh_client.upload_local_file('/opt/lithops/lithops_standalone.zip', '/tmp/lithops_standalone.zip') logger.info( 'Executing lithops installation process on VM instance {}'.format( ip_address)) vm_data = { 'instance_name': instance_name, 'ip_address': ip_address, 'instance_id': instance_id } script = get_worker_setup_script(STANDALONE_CONFIG, vm_data) ssh_client.run_remote_command(script, run_async=True) ssh_client.close() # Wait until the proxy is ready wait_proxy_ready(ip_address) dbr = job_payload['data_byte_ranges'] job_payload['call_ids'] = call_ids_range job_payload['data_byte_ranges'] = [ dbr[int(call_id)] for call_id in call_ids_range ] url = "http://{}:{}/run".format(ip_address, STANDALONE_SERVICE_PORT) r = requests.post(url, data=json.dumps(job_payload)) response = r.json() if 'activationId' in response: logger.info('Calls {} invoked. Activation ID: {}'.format( ', '.join(call_ids_range), response['activationId'])) else: logger.error('calls {} failed invocation: {}'.format( ', '.join(call_ids_range), response['error']))
class IBMVPCInstance: def __init__(self, name, ibm_vpc_config, ibm_vpc_client=None, public=False): """ Initialize a IBMVPCInstance instance VMs can have master role, this means they will have a public IP address """ self.name = name.lower() self.config = ibm_vpc_config self.delete_on_dismantle = self.config['delete_on_dismantle'] self.profile_name = self.config['profile_name'] self.ibm_vpc_client = ibm_vpc_client or self._create_vpc_client() self.public = public self.ssh_client = None self.instance_id = None self.instance_data = None self.ip_address = None self.public_ip = None self.ssh_credentials = { 'username': self.config['ssh_user'], 'password': self.config.get('ssh_password', None if public else SSH_PASSWD), 'key_filename': self.config.get('ssh_key_filename', None) } def __str__(self): return 'VM instance {} ({})'.format(self.name, self.public_ip or self.ip_address) def _create_vpc_client(self): """ Creates an IBM VPC python-sdk instance """ authenticator = IAMAuthenticator(self.iam_api_key) ibm_vpc_client = VpcV1('2021-01-19', authenticator=authenticator) ibm_vpc_client.set_service_url(self.config['endpoint'] + '/v1') return ibm_vpc_client def get_ssh_client(self): """ Creates an ssh client against the VM only if the Instance is the master """ if self.ip_address or self.public_ip: if not self.ssh_client: self.ssh_client = SSHClient(self.public_ip or self.ip_address, self.ssh_credentials) return self.ssh_client def del_ssh_client(self): """ Deletes the ssh client """ if self.ssh_client: try: self.ssh_client.close() except Exception: pass self.ssh_client = None def _create_instance(self): """ Creates a new VM instance """ logger.debug("Creating new VM instance {}".format(self.name)) security_group_identity_model = {'id': self.config['security_group_id']} subnet_identity_model = {'id': self.config['subnet_id']} primary_network_interface = { 'name': 'eth0', 'subnet': subnet_identity_model, 'security_groups': [security_group_identity_model] } boot_volume_profile = { 'capacity': 100, 'name': '{}-boot'.format(self.name), 'profile': {'name': self.config['volume_tier_name']}} boot_volume_attachment = { 'delete_volume_on_instance_delete': True, 'volume': boot_volume_profile } key_identity_model = {'id': self.config['key_id']} instance_prototype = {} instance_prototype['name'] = self.name instance_prototype['keys'] = [key_identity_model] instance_prototype['profile'] = {'name': self.profile_name} instance_prototype['resource_group'] = {'id': self.config['resource_group_id']} instance_prototype['vpc'] = {'id': self.config['vpc_id']} instance_prototype['image'] = {'id': self.config['image_id']} instance_prototype['zone'] = {'name': self.config['zone_name']} instance_prototype['boot_volume_attachment'] = boot_volume_attachment instance_prototype['primary_network_interface'] = primary_network_interface if not self.public: instance_prototype['user_data'] = CLOUD_CONFIG try: resp = self.ibm_vpc_client.create_instance(instance_prototype) except ApiException as e: if e.code == 400 and 'already exists' in e.message: return self.get_instance_data() elif e.code == 400 and 'over quota' in e.message: logger.debug("Create VM instance {} failed due to quota limit" .format(self.name)) else: logger.debug("Create VM instance {} failed with status code {}" .format(self.name, str(e.code))) raise e logger.debug("VM instance {} created successfully ".format(self.name)) return resp.result def _attach_floating_ip(self, instance): """ Attach a floating IP address only if the VM is the master instance """ fip = self.config['floating_ip'] fip_id = self.config['floating_ip_id'] # logger.debug('Attaching floating IP {} to VM instance {}'.format(fip, instance['id'])) # we need to check if floating ip is not attached already. if not, attach it to instance instance_primary_ni = instance['primary_network_interface'] if instance_primary_ni['primary_ipv4_address'] and instance_primary_ni['id'] == fip_id: # floating ip already atteched. do nothing logger.debug('Floating IP {} already attached to eth0'.format(fip)) else: self.ibm_vpc_client.add_instance_network_interface_floating_ip( instance['id'], instance['network_interfaces'][0]['id'], fip_id) def get_instance_data(self): """ Returns the instance information """ instances_data = self.ibm_vpc_client.list_instances(name=self.name).get_result() if len(instances_data['instances']) > 0: self.instance_data = instances_data['instances'][0] return self.instance_data return None def get_instance_id(self): """ Returns the instance ID """ instance_data = self.get_instance_data() if instance_data: self.instance_id = instance_data['id'] return self.instance_id logger.debug('VM instance {} does not exists'.format(self.name)) return None def _get_ip_address(self): """ Requests the the primary network IP address """ ip_address = None if self.instance_id: while not ip_address: instance_data = self.ibm_vpc_client.get_instance(self.instance_id).get_result() ip_address = instance_data['primary_network_interface']['primary_ipv4_address'] return ip_address def create(self, check_if_exists=False, start=True): """ Creates a new VM instance """ instance = None vsi_exists = True if self.instance_id else False if check_if_exists and not vsi_exists: logger.debug('Checking if VM instance {} already exists'.format(self.name)) instances_data = self.get_instance_data() if instances_data: logger.debug('VM instance {} already exists'.format(self.name)) vsi_exists = True self.instance_id = instances_data['id'] if not vsi_exists: instance = self._create_instance() self.instance_id = instance['id'] self.ip_address = self._get_ip_address() if self.public and instance: self._attach_floating_ip(instance) if start: # In IBM VPC, VM instances are automatically started on create if vsi_exists: self.start() return self.instance_id def start(self): logger.debug("Starting VM instance {}".format(self.name)) try: resp = self.ibm_vpc_client.create_instance_action(self.instance_id, 'start') except ApiException as e: if e.code == 404: pass else: raise e logger.debug("VM instance {} started successfully".format(self.name)) def _delete_instance(self): """ Deletes the VM instacne and the associated volume """ logger.debug("Deleting VM instance {}".format(self.name)) try: self.ibm_vpc_client.delete_instance(self.instance_id) except ApiException as e: if e.code == 404: pass else: raise e self.instance_id = None self.ip_address = None self.del_ssh_client() def _stop_instance(self): """ Stops the VM instacne and """ logger.debug("Stopping VM instance {}".format(self.name)) try: resp = self.ibm_vpc_client.create_instance_action(self.instance_id, 'stop') except ApiException as e: if e.code == 404: pass else: raise e def stop(self): if self.delete_on_dismantle: self._delete_instance() else: self._stop_instance() def delete(self): """ Deletes the VM instance """ self._delete_instance()
class StandaloneHandler: """ A StandaloneHandler object is used by invokers and other components to access underlying standalone backend without exposing the implementation details. """ def __init__(self, standalone_config): self.config = standalone_config self.backend_name = self.config['backend'] self.runtime = self.config['runtime'] self.is_lithops_worker = is_lithops_worker() self.start_timeout = self.config.get('start_timeout', 300) self.auto_dismantle = self.config.get('auto_dismantle') self.hard_dismantle_timeout = self.config.get('hard_dismantle_timeout') self.soft_dismantle_timeout = self.config.get('soft_dismantle_timeout') try: module_location = 'lithops.standalone.backends.{}'.format( self.backend_name) sb_module = importlib.import_module(module_location) StandaloneBackend = getattr(sb_module, 'StandaloneBackend') self.backend = StandaloneBackend(self.config[self.backend_name]) except Exception as e: logger.error("There was an error trying to create the " "{} standalone backend".format(self.backend_name)) raise e self.log_monitors = {} self.ssh_credentials = self.backend.get_ssh_credentials() self.ip_address = self.backend.get_ip_address() from lithops.util.ssh_client import SSHClient self.ssh_client = SSHClient(self.ssh_credentials) logger.debug("Standalone handler created successfully") def _is_backend_ready(self): """ Checks if the VM instance is ready to receive ssh connections """ try: self.ssh_client.run_remote_command(self.ip_address, 'id', timeout=2) except Exception: return False return True def _wait_backend_ready(self): """ Waits until the VM instance is ready to receive ssh connections """ logger.debug('Waiting VM instance to become ready') start = time.time() while (time.time() - start < self.start_timeout): if self._is_backend_ready(): return True time.sleep(1) self.dismantle() raise Exception('VM readiness probe expired. Check your VM') def _start_backend(self): if not self._is_backend_ready(): # The VM instance is stopped init_time = time.time() self.backend.start() self._wait_backend_ready() total_start_time = round(time.time() - init_time, 2) logger.info( 'VM instance ready in {} seconds'.format(total_start_time)) def _is_proxy_ready(self): """ Checks if the proxy is ready to receive http connections """ try: if self.is_lithops_worker: url = "http://{}:{}/ping".format('127.0.0.1', PROXY_SERVICE_PORT) r = requests.get(url, timeout=1, verify=True) if r.status_code == 200: return True return False else: cmd = 'curl -X GET http://127.0.0.1:8080/ping' out = self.ssh_client.run_remote_command(self.ip_address, cmd, timeout=2) data = json.loads(out) if data['response'] == 'pong': return True except Exception: return False def _wait_proxy_ready(self): """ Waits until the proxy is ready to receive http connections """ logger.info('Waiting Lithops proxy to become ready') start = time.time() while (time.time() - start < self.start_timeout): if self._is_proxy_ready(): return True time.sleep(1) self.dismantle() raise Exception('Proxy readiness probe expired. Check your VM') def _start_log_monitor(self, executor_id, job_id): """ Starts a process that polls the remote log into a local file """ job_key = create_job_key(executor_id, job_id) def log_monitor(): os.makedirs(LOGS_DIR, exist_ok=True) log_file = os.path.join(LOGS_DIR, job_key + '.log') fdout_0 = open(log_file, 'wb') fdout_1 = open(FN_LOG_FILE, 'ab') ssh_client = self.ssh_client.create_client(self.ip_address) cmd = 'tail -n +1 -F /tmp/lithops/logs/{}.log'.format(job_key) stdin, stdout, stderr = ssh_client.exec_command(cmd) channel = stdout.channel stdin.close() channel.shutdown_write() data = None while not channel.closed: try: readq, _, _ = select.select([channel], [], [], 10) if readq and readq[0].recv_ready(): data = channel.recv(len(readq[0].in_buffer)) fdout_0.write(data) fdout_0.flush() fdout_1.write(data) fdout_1.flush() else: if data: cmd = 'ls /tmp/lithops/jobs/{}.done'.format( job_key) _, out, _ = ssh_client.exec_command(cmd) if out.read().decode().strip(): break time.sleep(0.5) except Exception: pass if not self.is_lithops_worker: Thread(target=log_monitor, daemon=True).start() logger.debug('ExecutorID {} | JobID {} - Remote log monitor ' 'started'.format(executor_id, job_id)) def run_job(self, job_payload): """ Run the job description against the selected environment """ executor_id = job_payload['executor_id'] job_id = job_payload['job_id'] job_key = create_job_key(executor_id, job_id) log_file = os.path.join(LOGS_DIR, job_key + '.log') if not self._is_proxy_ready(): # The VM instance is stopped init_time = time.time() self.backend.start() self._wait_proxy_ready() total_start_time = round(time.time() - init_time, 2) logger.info( 'VM instance ready in {} seconds'.format(total_start_time)) self._start_log_monitor(executor_id, job_id) logger.info('ExecutorID {} | JobID {} - Running job'.format( executor_id, job_id)) logger.info("View execution logs at {}".format(log_file)) if self.is_lithops_worker: url = "http://{}:{}/run".format('127.0.0.1', PROXY_SERVICE_PORT) r = requests.post(url, data=json.dumps(job_payload), verify=True) response = r.json() else: cmd = ('curl -X POST http://127.0.0.1:8080/run -d {} ' '-H \'Content-Type: application/json\''.format( shlex.quote(json.dumps(job_payload)))) out = self.ssh_client.run_remote_command(self.ip_address, cmd) response = json.loads(out) return response['activationId'] def create_runtime(self, runtime): """ Installs the proxy and extracts the runtime metadata and preinstalled modules """ self._start_backend() self._setup_proxy() self._wait_proxy_ready() logger.debug('Extracting runtime metadata information') payload = {'runtime': runtime} if self.is_lithops_worker: url = "http://{}:{}/preinstalls".format('127.0.0.1', PROXY_SERVICE_PORT) r = requests.get(url, data=json.dumps(payload), verify=True) runtime_meta = r.json() else: cmd = ('curl http://127.0.0.1:8080/preinstalls -d {} ' '-H \'Content-Type: application/json\' -X GET'.format( shlex.quote(json.dumps(payload)))) out = self.ssh_client.run_remote_command(self.ip_address, cmd) runtime_meta = json.loads(out) return runtime_meta def get_runtime_key(self, runtime_name): """ Wrapper method that returns a formated string that represents the runtime key. Each backend has its own runtime key format. Used to store modules preinstalls into the storage """ return self.backend.get_runtime_key(runtime_name) def dismantle(self): """ Stop VM instance """ self.backend.stop() def init(self): """ Start the VM instance and initialize runtime """ self._start_backend() # Not sure if mandatory, but sleep several seconds to let proxy server start time.sleep(2) # if proxy not started, install it if not self._is_proxy_ready(): self._setup_proxy() self._wait_proxy_ready() def clean(self): pass def clear(self): pass def _setup_proxy(self): logger.debug('Installing Lithops proxy in the VM instance') logger.debug( 'Be patient, installation process can take up to 3 minutes ' 'if this is the first time you use the VM instance') service_file = '/etc/systemd/system/{}'.format(PROXY_SERVICE_NAME) self.ssh_client.upload_data_to_file(self.ip_address, PROXY_SERVICE_FILE, service_file) cmd = 'rm -R {}; mkdir -p {}; '.format(REMOTE_INSTALL_DIR, REMOTE_INSTALL_DIR) cmd += 'systemctl daemon-reload; systemctl stop {}; '.format( PROXY_SERVICE_NAME) self.ssh_client.run_remote_command(self.ip_address, cmd) config_file = os.path.join(REMOTE_INSTALL_DIR, 'config') self.ssh_client.upload_data_to_file(self.ip_address, json.dumps(self.config), config_file) src_proxy = os.path.join(os.path.dirname(__file__), 'proxy.py') create_handler_zip(FH_ZIP_LOCATION, src_proxy) self.ssh_client.upload_local_file(self.ip_address, FH_ZIP_LOCATION, '/tmp/lithops_standalone.zip') os.remove(FH_ZIP_LOCATION) # Install dependenices cmd = 'mkdir -p /tmp/lithops; ' cmd += 'apt-get update >> /tmp/lithops/proxy.log; ' cmd += 'apt-get install unzip python3-pip -y >> /tmp/lithops/proxy.log; ' cmd += 'pip3 install flask gevent pika==0.13.1 >> /tmp/lithops/proxy.log; ' cmd += 'unzip -o /tmp/lithops_standalone.zip -d {} > /dev/null 2>&1; '.format( REMOTE_INSTALL_DIR) cmd += 'rm /tmp/lithops_standalone.zip; ' cmd += 'chmod 644 {}; '.format(service_file) # Start proxy service cmd += 'systemctl daemon-reload; ' cmd += 'systemctl stop {}; '.format(PROXY_SERVICE_NAME) cmd += 'systemctl enable {}; '.format(PROXY_SERVICE_NAME) cmd += 'systemctl start {}; '.format(PROXY_SERVICE_NAME) self.ssh_client.run_remote_command(self.ip_address, cmd, background=True)
class IBMVPCInstance: def __init__(self, name, ibm_vpc_config, ibm_vpc_client=None, public=False): """ Initialize a IBMVPCInstance instance VMs can have master role, this means they will have a public IP address """ self.name = name.lower() self.config = ibm_vpc_config self.delete_on_dismantle = self.config['delete_on_dismantle'] self.profile_name = self.config['profile_name'] self.ibm_vpc_client = ibm_vpc_client or self._create_vpc_client() self.public = public self.ssh_client = None self.instance_id = None self.instance_data = None self.private_ip = None self.public_ip = None self.home_dir = '/root' self.ssh_credentials = { 'username': self.config['ssh_username'], 'password': self.config['ssh_password'], 'key_filename': self.config.get('ssh_key_filename', '~/.ssh/id_rsa') } self.validated = False def __str__(self): return f'VM instance {self.name} ({self.public_ip or self.private_ip})' def _create_vpc_client(self): """ Creates an IBM VPC python-sdk instance """ authenticator = IAMAuthenticator(self.iam_api_key) ibm_vpc_client = VpcV1(VPC_API_VERSION, authenticator=authenticator) ibm_vpc_client.set_service_url(self.config['endpoint'] + '/v1') # decorate instance public methods with except/retry logic decorate_instance(self.ibm_vpc_client, vpc_retry_on_except) return ibm_vpc_client def get_ssh_client(self): """ Creates an ssh client against the VM only if the Instance is the master """ if not self.validated and self.public and self.instance_id: # validate that private ssh key in ssh_credentials is a pair of public key on instance key_filename = self.ssh_credentials['key_filename'] key_filename = os.path.abspath(os.path.expanduser(key_filename)) if not os.path.exists(key_filename): raise LithopsValidationError( f"Private key file {key_filename} doesn't exist") initialization_data = self.ibm_vpc_client.get_instance_initialization( self.instance_id).get_result() private_res = paramiko.RSAKey(filename=key_filename).get_base64() key = None names = [] for k in initialization_data['keys']: public_res = self.ibm_vpc_client.get_key( k['id']).get_result()['public_key'].split(' ')[1] if public_res == private_res: self.validated = True break else: names.append(k['name']) if not self.validated: raise LithopsValidationError( f"No public key from keys: {names} on master {self} not a pair for private ssh key {key_filename}" ) if self.private_ip or self.public_ip: if not self.ssh_client: self.ssh_client = SSHClient(self.public_ip or self.private_ip, self.ssh_credentials) return self.ssh_client def del_ssh_client(self): """ Deletes the ssh client """ if self.ssh_client: try: self.ssh_client.close() except Exception: pass self.ssh_client = None def is_ready(self, verbose=False): """ Checks if the VM instance is ready to receive ssh connections """ login_type = 'password' if 'password' in self.ssh_credentials and \ not self.public else 'publickey' try: self.get_ssh_client().run_remote_command('id') except LithopsValidationError as e: raise e except Exception as e: if verbose: logger.debug( f'SSH to {self.private_ip} failed ({login_type}): {e}') self.del_ssh_client() return False return True def wait_ready(self, verbose=False): """ Waits until the VM instance is ready to receive ssh connections """ logger.debug(f'Waiting {self} to become ready') start = time.time() while (time.time() - start < INSTANCE_START_TIMEOUT): if self.is_ready(verbose=verbose): start_time = round(time.time() - start, 2) logger.debug(f'{self} ready in {start_time} seconds') return True time.sleep(5) raise TimeoutError(f'Readiness probe expired on {self}') def _create_instance(self, user_data): """ Creates a new VM instance """ logger.debug("Creating new VM instance {}".format(self.name)) security_group_identity_model = { 'id': self.config['security_group_id'] } subnet_identity_model = {'id': self.config['subnet_id']} primary_network_interface = { 'name': 'eth0', 'subnet': subnet_identity_model, 'security_groups': [security_group_identity_model] } boot_volume_data = { 'capacity': self.config['boot_volume_capacity'], 'name': '{}-{}-boot'.format(self.name, str(uuid.uuid4())[:4]), 'profile': { 'name': self.config['boot_volume_profile'] } } boot_volume_attachment = { 'delete_volume_on_instance_delete': True, 'volume': boot_volume_data } key_identity_model = {'id': self.config['key_id']} instance_prototype = {} instance_prototype['name'] = self.name instance_prototype['keys'] = [key_identity_model] instance_prototype['profile'] = {'name': self.profile_name} instance_prototype['resource_group'] = { 'id': self.config['resource_group_id'] } instance_prototype['vpc'] = {'id': self.config['vpc_id']} instance_prototype['image'] = {'id': self.config['image_id']} instance_prototype['zone'] = {'name': self.config['zone_name']} instance_prototype['boot_volume_attachment'] = boot_volume_attachment instance_prototype[ 'primary_network_interface'] = primary_network_interface if user_data: instance_prototype['user_data'] = user_data try: resp = self.ibm_vpc_client.create_instance(instance_prototype) except ApiException as e: if e.code == 400 and 'already exists' in e.message: return self.get_instance_data() elif e.code == 400 and 'over quota' in e.message: logger.debug( "Create VM instance {} failed due to quota limit".format( self.name)) else: logger.debug( "Create VM instance {} failed with status code {}: {}". format(self.name, str(e.code), e.message)) raise e logger.debug("VM instance {} created successfully ".format(self.name)) return resp.result def _attach_floating_ip(self, instance): """ Attach a floating IP address only if the VM is the master instance """ fip = self.config['floating_ip'] fip_id = self.config['floating_ip_id'] # logger.debug('Attaching floating IP {} to VM instance {}'.format(fip, instance['id'])) # we need to check if floating ip is not attached already. if not, attach it to instance instance_primary_ni = instance['primary_network_interface'] if instance_primary_ni['primary_ipv4_address'] and instance_primary_ni[ 'id'] == fip_id: # floating ip already atteched. do nothing logger.debug('Floating IP {} already attached to eth0'.format(fip)) else: self.ibm_vpc_client.add_instance_network_interface_floating_ip( instance['id'], instance['network_interfaces'][0]['id'], fip_id) def get_instance_data(self): """ Returns the instance information """ instances_data = self.ibm_vpc_client.list_instances( name=self.name).get_result() if len(instances_data['instances']) > 0: self.instance_data = instances_data['instances'][0] return self.instance_data return None def get_instance_id(self): """ Returns the instance ID """ instance_data = self.get_instance_data() if instance_data: self.instance_id = instance_data['id'] return self.instance_id logger.debug('VM instance {} does not exists'.format(self.name)) return None def get_private_ip(self): """ Requests the private IP address """ while not self.private_ip or self.private_ip == '0.0.0.0': time.sleep(1) instance_data = self.get_instance_data() self.private_ip = instance_data['primary_network_interface'][ 'primary_ipv4_address'] return self.private_ip def get_public_ip(self): """ Requests the public IP address """ if self.public and self.public_ip: return self.public_ip return None def create(self, check_if_exists=False, user_data=None): """ Creates a new VM instance """ instance = None vsi_exists = True if self.instance_id else False if check_if_exists and not vsi_exists: logger.debug('Checking if VM instance {} already exists'.format( self.name)) instances_data = self.get_instance_data() if instances_data: logger.debug('VM instance {} already exists'.format(self.name)) vsi_exists = True self.instance_id = instances_data['id'] if not vsi_exists: instance = self._create_instance(user_data=user_data) self.instance_id = instance['id'] self.private_ip = self.get_private_ip() else: self.start() if self.public and instance: self._attach_floating_ip(instance) return self.instance_id def start(self): logger.debug("Starting VM instance {}".format(self.name)) try: self.ibm_vpc_client.create_instance_action(self.instance_id, 'start') except ApiException as e: if e.code == 404: pass else: raise e logger.debug("VM instance {} started successfully".format(self.name)) def _delete_instance(self): """ Deletes the VM instacne and the associated volume """ logger.debug("Deleting VM instance {}".format(self.name)) try: self.ibm_vpc_client.delete_instance(self.instance_id) except ApiException as e: if e.code == 404: pass else: raise e self.instance_id = None self.private_ip = None self.del_ssh_client() def _stop_instance(self): """ Stops the VM instacne and """ logger.debug("Stopping VM instance {}".format(self.name)) try: self.ibm_vpc_client.create_instance_action(self.instance_id, 'stop') except ApiException as e: if e.code == 404: pass else: raise e def stop(self): if self.delete_on_dismantle: self._delete_instance() else: self._stop_instance() def delete(self): """ Deletes the VM instance """ self._delete_instance() def validate_capabilities(self): """ Validate hardware/os requirments specified in backend config """ if self.config.get('singlesocket'): cmd = "lscpu -p=socket|grep -v '#'" res = self.get_ssh_client().run_remote_command(cmd) sockets = set() for c in res: if c != '\n': sockets.add(c) if len(sockets) != 1: raise LithopsValidationError( f'Not using single CPU socket as specified, using {len(sockets)} sockets instead' )
class VMInstance: def __init__(self, config): self.public_ip = self.private_ip = self.config['ip_address'] self.ssh_client = None self.ssh_credentials = { 'username': self.config.get('ssh_user', 'root'), 'password': self.config.get('ssh_password', None), 'key_filename': self.config.get('ssh_key_filename', '~/.ssh/id_rsa') } logger.debug('{} created'.format(self)) def __str__(self): return 'VM instance {}'.format(self.ip_address) def get_ssh_client(self): """ Creates an ssh client against the VM only if the Instance is the master """ if self.public_ip: if not self.ssh_client: self.ssh_client = SSHClient(self.public_ip, self.ssh_credentials) return self.ssh_client def del_ssh_client(self): """ Deletes the ssh client """ if self.ssh_client: try: self.ssh_client.close() except Exception: pass self.ssh_client = None def is_ready(self, verbose=False): """ Checks if the VM is ready to receive ssh connections """ try: self.get_ssh_client().run_remote_command('id') except LithopsValidationError as e: raise e except Exception as e: if verbose: logger.debug(f'ssh to {self.private_ip} failed: {e}') self.del_ssh_client() return False return True def wait_ready(self, verbose=False): """ Waits until the VM is ready to receive ssh connections """ logger.debug(f'Waiting {self} to become ready') start = time.time() while (time.time() - start < INSTANCE_START_TIMEOUT): if self.is_ready(verbose=verbose): start_time = round(time.time() - start, 2) logger.debug(f'{self} ready in {start_time} seconds') return True time.sleep(5) raise TimeoutError(f'Readiness probe expired on {self}') def get_public_ip(self): """ Requests the the primary public IP address """ return self.public_ip def create(self, **kwargs): pass def start(self): pass def stop(self): pass def delete(self): pass
class EC2Instance: def __init__(self, name, ec2_config, ec2_client=None, public=False): """ Initialize a EC2Instance instance VMs can have master role, this means they will have a public IP address """ self.name = name.lower() self.config = ec2_config self.delete_on_dismantle = self.config['delete_on_dismantle'] self.instance_type = self.config['worker_instance_type'] self.region = self.config['region_name'] self.spot_instance = self.config['request_spot_instances'] self.ec2_client = ec2_client or self._create_ec2_client() self.public = public self.ssh_client = None self.instance_id = None self.instance_data = None self.private_ip = None self.public_ip = '0.0.0.0' self.fast_io = self.config.get('fast_io', False) self.home_dir = '/home/ubuntu' self.ssh_credentials = { 'username': self.config['ssh_username'], 'password': self.config['ssh_password'], 'key_filename': self.config.get('ssh_key_filename', '~/.ssh/id_rsa') } def __str__(self): ip = self.public_ip if self.public else self.private_ip if ip is None or ip == '0.0.0.0': return f'VM instance {self.name}' else: return f'VM instance {self.name} ({ip})' def _create_ec2_client(self): """ Creates an EC2 boto3 instance """ client_config = botocore.client.Config( user_agent_extra=self.config['user_agent'] ) ec2_client = boto3.client( 'ec2', aws_access_key_id=self.ec2_config['access_key_id'], aws_secret_access_key=self.ec2_config['secret_access_key'], config=client_config, region_name=self.region ) return ec2_client def get_ssh_client(self): """ Creates an ssh client against the VM only if the Instance is the master """ if self.public: if not self.ssh_client or self.ssh_client.ip_address != self.public_ip: self.ssh_client = SSHClient(self.public_ip, self.ssh_credentials) else: if not self.ssh_client or self.ssh_client.ip_address != self.private_ip: self.ssh_client = SSHClient(self.private_ip, self.ssh_credentials) return self.ssh_client def del_ssh_client(self): """ Deletes the ssh client """ if self.ssh_client: try: self.ssh_client.close() except Exception: pass self.ssh_client = None def is_ready(self, verbose=False): """ Checks if the VM instance is ready to receive ssh connections """ login_type = 'password' if 'password' in self.ssh_credentials and \ not self.public else 'publickey' try: self.get_ssh_client().run_remote_command('id') except LithopsValidationError as e: raise e except Exception as e: if verbose: logger.debug(f'SSH to {self.private_ip} failed ({login_type}): {e}') self.del_ssh_client() return False return True def wait_ready(self, verbose=False): """ Waits until the VM instance is ready to receive ssh connections """ logger.debug(f'Waiting {self} to become ready') start = time.time() while(time.time() - start < INSTANCE_START_TIMEOUT): if self.is_ready(verbose=verbose): start_time = round(time.time()-start, 2) logger.debug(f'{self} ready in {start_time} seconds') return True time.sleep(5) raise TimeoutError(f'Readiness probe expired on {self}') def _create_instance(self, user_data=None): """ Creates a new VM instance """ if self.fast_io: BlockDeviceMappings = [ { 'DeviceName': '/dev/xvda', 'Ebs': { 'VolumeSize': 100, 'DeleteOnTermination': True, 'VolumeType': 'gp2', # 'Iops' : 10000, }, }, ] else: BlockDeviceMappings = None LaunchSpecification = { "ImageId": self.config['target_ami'], "InstanceType": self.instance_type, "SecurityGroupIds": [self.config['security_group_id']], "EbsOptimized": False, "IamInstanceProfile": {'Name': self.config['iam_role']}, "Monitoring": {'Enabled': False} } if BlockDeviceMappings is not None: LaunchSpecification['BlockDeviceMappings'] = BlockDeviceMappings if 'key_name' in self.config: LaunchSpecification['KeyName'] = self.config['key_name'] if self.spot_instance and not self.public: logger.debug("Creating new VM instance {} (Spot)".format(self.name)) if user_data: # Allow master VM to access workers trough ssh password LaunchSpecification['UserData'] = b64s(user_data) spot_requests = self.ec2_client.request_spot_instances( SpotPrice=str(self.config['spot_price']), InstanceCount=1, LaunchSpecification=LaunchSpecification)['SpotInstanceRequests'] request_ids = [r['SpotInstanceRequestId'] for r in spot_requests] pending_request_ids = request_ids while pending_request_ids: time.sleep(3) spot_requests = self.ec2_client.describe_spot_instance_requests( SpotInstanceRequestIds=request_ids)['SpotInstanceRequests'] failed_requests = [r for r in spot_requests if r['State'] == 'failed'] if failed_requests: failure_reasons = {r['Status']['Code'] for r in failed_requests} logger.debug(failure_reasons) raise Exception( "The spot request failed for the following reason{s}: {reasons}" .format( s='' if len(failure_reasons) == 1 else 's', reasons=', '.join(failure_reasons))) pending_request_ids = [ r['SpotInstanceRequestId'] for r in spot_requests if r['State'] == 'open'] self.ec2_client.create_tags( Resources=[r['InstanceId'] for r in spot_requests], Tags=[{'Key': 'Name', 'Value': self.name}] ) filters = [{'Name': 'instance-id', 'Values': [r['InstanceId'] for r in spot_requests]}] resp = self.ec2_client.describe_instances(Filters=filters)['Reservations'][0] else: logger.debug("Creating new VM instance {}".format(self.name)) LaunchSpecification['MinCount'] = 1 LaunchSpecification['MaxCount'] = 1 LaunchSpecification["TagSpecifications"] = [{"ResourceType": "instance", "Tags": [{'Key': 'Name', 'Value': self.name}]}] LaunchSpecification["InstanceInitiatedShutdownBehavior"] = 'terminate' if self.delete_on_dismantle else 'stop' if user_data: LaunchSpecification['UserData'] = user_data # if not self.public: # LaunchSpecification['NetworkInterfaces'] = [{'AssociatePublicIpAddress': False, 'DeviceIndex': 0}] resp = self.ec2_client.run_instances(**LaunchSpecification) logger.debug("VM instance {} created successfully ".format(self.name)) return resp['Instances'][0] def get_instance_data(self): """ Returns the instance information """ if self.instance_id: instances = self.ec2_client.describe_instances(InstanceIds=[self.instance_id]) instances = instances['Reservations'][0]['Instances'] if len(instances) > 0: self.instance_data = instances[0] return self.instance_data else: filters = [{'Name': 'tag:Name', 'Values': [self.name]}] resp = self.ec2_client.describe_instances(Filters=filters) if len(resp['Reservations']) > 0: self.instance_data = resp['Reservations'][0]['Instances'][0] return self.instance_data return None def get_instance_id(self): """ Returns the instance ID """ if self.instance_id: return self.instance_id instance_data = self.get_instance_data() if instance_data: self.instance_id = instance_data['InstanceId'] return self.instance_id logger.debug('VM instance {} does not exists'.format(self.name)) return None def get_private_ip(self): """ Requests the private IP address """ while not self.private_ip: instance_data = self.get_instance_data() if instance_data and 'PrivateIpAddress' in instance_data: self.private_ip = instance_data['PrivateIpAddress'] else: time.sleep(1) return self.private_ip def get_public_ip(self): """ Requests the public IP address """ while self.public and (not self.public_ip or self.public_ip == '0.0.0.0'): instance_data = self.get_instance_data() if instance_data and 'PublicIpAddress' in instance_data: self.public_ip = instance_data['PublicIpAddress'] else: time.sleep(1) return self.public_ip def create(self, check_if_exists=False, user_data=None): """ Creates a new VM instance """ vsi_exists = True if self.instance_id else False if check_if_exists and not vsi_exists: logger.debug('Checking if VM instance {} already exists'.format(self.name)) instance_data = self.get_instance_data() if instance_data: logger.debug('VM instance {} already exists'.format(self.name)) vsi_exists = True self.instance_id = instance_data['InstanceId'] self.private_ip = instance_data['PrivateIpAddress'] if not vsi_exists: instance_data = self._create_instance(user_data=user_data) self.instance_id = instance_data['InstanceId'] self.private_ip = instance_data['PrivateIpAddress'] self.public_ip = self.get_public_ip() else: self.start() return self.instance_id def start(self): logger.info("Starting VM instance {}".format(self.name)) try: self.ec2_client.start_instances(InstanceIds=[self.instance_id]) self.public_ip = self.get_public_ip() except botocore.exceptions.ClientError as e: if e.response['Error']['Code'] == 'IncorrectInstanceState': time.sleep(20) return self.start() else: raise e logger.debug("VM instance {} started successfully".format(self.name)) def _delete_instance(self): """ Deletes the VM instance and the associated volume """ logger.debug("Deleting VM instance {}".format(self.name)) self.ec2_client.terminate_instances(InstanceIds=[self.instance_id]) self.instance_id = None self.private_ip = None self.public_ip = None self.del_ssh_client() def _stop_instance(self): """ Stops the VM instacne and """ logger.debug("Stopping VM instance {}".format(self.name)) self.ec2_client.stop_instances(InstanceIds=[self.instance_id]) def stop(self): if self.delete_on_dismantle: self._delete_instance() else: self._stop_instance() def delete(self): """ Deletes the VM instance """ self._delete_instance() def validate_capabilities(self): """ Validate hardware/os requirments specified in backend config """ pass