def go(self, engine): """ Starts the prepare process :param engine: access to global parameters and functions :type engine: :class:`plumbery.PlumberyEngine` """ super(PreparePolisher, self).go(engine) self.report = [] self.user = engine.get_shared_user() self.secret = engine.get_shared_secret() self.key_files = engine.get_shared_key_files() if 'key' in self.settings: key = self.settings['key'] file = os.path.expanduser(key) if os.path.isfile(file): plogging.debug("- using shared key {}".format(key)) if self.key_files is None: self.key_files = [key] else: self.key_files.insert(0, key) else: plogging.error("Error: missing file {}".format(key))
def build(self, settings): tf_path = settings.get('tf_path', None) if tf_path is None: # default back to the directory of the fittings file. tf_path = self.working_directory parameters = settings.get('parameters', {}) with open(os.path.join(tf_path, '.tfvars'), 'w') as tf_vars: for (key, value) in parameters.items(): tf_vars.write('%s = "%s"\n' % (key, value)) ret, o, err = self._run_tf( 'plan', tf_path, var_file=os.path.join(tf_path, '.tfvars'), input=False, detailed_exitcode=True, out=os.path.join(tf_path, '.tfstate')) plogging.debug("STDOUT from terraform plan %s", o) if err != '' or None: plogging.error(err) if ret == 2: _, o, err = self._run_tf('apply', os.path.join(tf_path, '.tfstate')) plogging.debug("STDOUT from terraform apply %s", o) if err != '' or None: plogging.error(err) if os.path.isfile(os.path.join(tf_path, '.tfstate')): os.remove(os.path.join(tf_path, '.tfstate')) if os.path.isfile(os.path.join(tf_path, '.tfvars')): os.remove(os.path.join(tf_path, '.tfvars'))
def run(self, node, client): """ Reboots the node. See also :class:`Deployment.run` """ repeats = 0 while True: try: self.region.reboot_node(node) except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue if 'VMWARE_TOOLS_INVALID_STATUS' in str(feedback): if repeats < 5: time.sleep(10) repeats += 1 continue plogging.error("- unable to reboot node") plogging.error(str(feedback)) finally: return node
def test_direct(self): plogging.setLevel(logging.DEBUG) self.assertEqual(plogging.getEffectiveLevel(), logging.DEBUG) plogging.debug("hello world -- debug") plogging.info("hello world -- info") plogging.warning("hello world -- warning") plogging.error("hello world -- error") plogging.critical("hello world -- critical") self.assertEqual(plogging.foundErrors(), True) plogging.reset() self.assertEqual(plogging.foundErrors(), False)
def _stop_monitoring(self, node, settings): """ Disables monitoring of one node :param node: the target node :type node: :class:`libcloud.compute.base.Node` """ if node is None: return if ('running' in settings and settings['running'] == 'always' and node.state == NodeState.RUNNING): return if self.engine.safeMode: return plogging.info("Stopping monitoring of node '{}'".format(node.name)) while True: try: self.facility.region.ex_disable_monitoring(node) plogging.info("- in progress") except Exception as feedback: if 'NO_CHANGE' in str(feedback): pass elif 'OPERATION_NOT_SUPPORTED' in str(feedback): pass elif 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue elif 'RESOURCE_NOT_FOUND' in str(feedback): plogging.info("- not found") elif 'RESOURCE_LOCKED' in str(feedback): plogging.info("- not now - locked") else: plogging.info("- unable to stop monitoring") plogging.error(str(feedback)) break
def _detach_node(self, node, settings): """ Detach a node from multiple networks :param node: the target node :type node: :class:`libcloud.compute.base.Node` This function removes all secondary network interfaces to a node, and any potential translation to the public Internet. """ if node is None: return True if ('running' in settings and settings['running'] == 'always' and node.state == NodeState.RUNNING): return True for interface in self._list_secondary_interfaces(node): plogging.info("Detaching node '{}' from network '{}'".format( node.name, interface['network'])) while True: try: self.region.ex_destroy_nic(interface['id']) plogging.info("- in progress") except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue elif 'RESOURCE_LOCKED' in str(feedback): plogging.info("- not now - locked") elif 'NO_CHANGE' in str(feedback): plogging.info("- already there") else: plogging.info("- unable to detach node") plogging.error(str(feedback)) return False break return True
def _enrich_node(self, node, region=None): """ Adds attributes to a node This function is a hack, aiming to complement the nice job done by Libcloud: - add public IPv4 if one exists - add disk size, ids, etc. """ if region is None: region = self.region # hack because the driver does not report public ipv4 accurately if len(node.public_ips) < 1: domain = region.ex_get_network_domain( node.extra['networkDomainId']) for rule in self.region.ex_list_nat_rules(domain): if rule.internal_ip == node.private_ips[0]: node.public_ips.append(rule.external_ip) break # hack to retrieve disk information node.extra['disks'] = [] try: element = region.connection.request_with_orgId_api_2( 'server/server/%s' % node.id).object for disk in findall(element, 'disk', TYPES_URN): scsiId = int(disk.get('scsiId')) speed = disk.get('speed') id = disk.get('id') sizeGb = int(disk.get('sizeGb')) node.extra['disks'].append({ 'scsiId': scsiId, 'speed': speed, 'id': id, 'size': sizeGb }) except Exception as feedback: if 'RESOURCE_NOT_FOUND' in str(feedback): pass else: plogging.info("Error: unable to retrieve storage information") plogging.error(str(feedback))
def _run_tf(self, command, state_directory, **kwargs): if self.tf_path is None: plogging.error("Could not locate terraform binary. " "Please check TERRAFORM_PATH ENV var.") raise RuntimeError("Missing terraform binary") params = [self.tf_path, command] for (key, value) in kwargs.items(): params.append("-%s=%s" % (key.replace('_', '-'), value)) params.append(state_directory) plogging.debug(params) process = subprocess.Popen(params, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = process.communicate() retcode = process.returncode return (retcode, stdout, stderr)
def start_node(self, node): """ Starts one node :param node: the target node, or its name :type node: :class:`Node` or ``str`` """ if isinstance(node, str): name = node node = self.get_node(name) else: name = node.name plogging.info("Starting node '{}'".format(name)) if node is None: plogging.info("- not found") return if self.plumbery.safeMode: plogging.info("- skipped - safe mode") return while True: try: self.region.ex_start_node(node) plogging.info("- in progress") except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue elif 'SERVER_STARTED' in str(feedback): plogging.info("- skipped - node is up and running") else: plogging.info("- unable to start node") plogging.error(str(feedback)) break
def _start_monitoring(self, node, monitoring='ESSENTIALS'): """ Enables monitoring of one node :param node: the target node :type node: :class:`libcloud.compute.base.Node` :param monitoring: either 'ESSENTIALS' or 'ADVANCED' :type monitoring: ``str`` """ value = monitoring.upper() plogging.info("Starting {} monitoring of node '{}'".format( value.lower(), node.name)) while True: try: self.facility.region.ex_enable_monitoring(node, service_plan=value) plogging.info("- in progress") return True except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue elif 'RETRYABLE_SYSTEM_ERROR' in str(feedback): time.sleep(10) continue elif 'NO_CHANGE' in str(feedback): plogging.info("- already there") elif 'RESOURCE_LOCKED' in str(feedback): plogging.info("- unable to start monitoring " "- node has been locked") else: plogging.info("- unable to start monitoring") plogging.error(str(feedback)) break return False
def test_direct(self): class DullHandler(logging.NullHandler): level = logging.DEBUG def emit(self, record): log_entry = self.format(record) plogging.addHandler(DullHandler()) plogging.setLevel(logging.DEBUG) self.assertEqual(plogging.getEffectiveLevel(), logging.DEBUG) plogging.debug("hello world -- debug") plogging.info("hello world -- info") plogging.warning("hello world -- warning") plogging.error("hello world -- error") plogging.critical("hello world -- critical") self.assertEqual(plogging.foundErrors(), True) plogging.reset() self.assertEqual(plogging.foundErrors(), False)
def change_node_disk_speed(self, node, id, speed): """ Changes an existing virtual disk :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param id: the disk unique identifier, as reported by the API :type id: ``str`` :param speed: storage type, either 'standard', 'highperformance' or 'economy' :type speed: ``str`` """ if self.engine.safeMode: plogging.info("- skipped - safe mode") return while True: try: self.facility.region.ex_change_storage_speed( node=node, disk_id=id, speed=speed) plogging.info("- in progress") except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue if 'Please try again later' in str(feedback): time.sleep(10) continue plogging.info("- unable to change disk to '{}'" .format(speed)) plogging.error(str(feedback)) break
def change_node_disk_size(self, node, id, size): """ Changes an existing virtual disk :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param id: the disk unique identifier, as reported by the API :type id: ``str`` :param size: the disk size, expressed in Giga bytes :type size: ``int`` """ if self.engine.safeMode: plogging.info("- skipped - safe mode") return while True: try: self.facility.region.ex_change_storage_size( node=node, disk_id=id, size=size) plogging.info("- in progress") except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue if 'Please try again later' in str(feedback): time.sleep(10) continue plogging.info("- unable to change disk size to {}GB" .format(size)) plogging.error(str(feedback)) break
def change_node_disk_size(self, node, id, size): """ Changes an existing virtual disk :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param id: the disk unique identifier, as reported by the API :type id: ``str`` :param size: the disk size, expressed in Giga bytes :type size: ``int`` """ if self.engine.safeMode: plogging.info("- skipped - safe mode") return while True: try: self.facility.region.ex_change_storage_size(node=node, disk_id=id, size=size) plogging.info("- in progress") except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue if 'Please try again later' in str(feedback): time.sleep(10) continue plogging.info( "- unable to change disk size to {}GB".format(size)) plogging.error(str(feedback)) break
def change_node_disk_speed(self, node, id, speed): """ Changes an existing virtual disk :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param id: the disk unique identifier, as reported by the API :type id: ``str`` :param speed: storage type, either 'standard', 'highperformance' or 'economy' :type speed: ``str`` """ if self.engine.safeMode: plogging.info("- skipped - safe mode") return while True: try: self.facility.region.ex_change_storage_speed(node=node, disk_id=id, speed=speed) plogging.info("- in progress") except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue if 'Please try again later' in str(feedback): time.sleep(10) continue plogging.info("- unable to change disk to '{}'".format(speed)) plogging.error(str(feedback)) break
def shine_container(self, container): """ Configures a container :param container: the container to be polished :type container: :class:`plumbery.PlumberyInfrastructure` """ plogging.info("Configuring blueprint '{}'".format( container.blueprint['target'])) if container.network is None: plogging.error("- aborted - no network here") return self.container = container plogging.info("- waiting for nodes to be deployed") names = self.nodes.list_nodes(container.blueprint) for name in sorted(names): while True: node = self.nodes.get_node(name) if node is None: plogging.error( "- aborted - missing node '{}'".format(name)) return if node.extra['status'].action is None: plogging.debug("- {} is ready".format(node.name)) break if (node is not None and node.extra['status'].failure_reason is not None): plogging.error("- aborted - failed deployment " "of node '{}'".format(name)) return time.sleep(20) plogging.info("- nodes have been deployed") container._build_firewall_rules() container._build_balancer()
def shine_container(self, container): """ Configures a container :param container: the container to be polished :type container: :class:`plumbery.PlumberyInfrastructure` """ plogging.info("Configuring blueprint '{}'".format( container.blueprint['target'])) if container.network is None: plogging.error("- aborted - no network here") return self.container = container plogging.info("- waiting for nodes to be deployed") names = self.nodes.list_nodes(container.blueprint) for name in sorted(names): while True: node = self.nodes.get_node(name) if node is None: plogging.error("- aborted - missing node '{}'".format(name)) return if node.extra['status'].action is None: plogging.debug("- {} is ready".format(node.name)) break if (node is not None and node.extra['status'].failure_reason is not None): plogging.error("- aborted - failed deployment " "of node '{}'".format(name)) return time.sleep(20) plogging.info("- nodes have been deployed") container._build_firewall_rules() container._build_balancer()
def set_node_disk(self, node, id, size, speed='standard'): """ Sets a virtual disk :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param id: the disk id, starting at 0 and growing :type id: ``int`` :param size: the disk size, expressed in Giga bytes :type size: ``int`` :param speed: storage type, either 'standard', 'highperformance' or 'economy' :type speed: ``str`` """ if size < 1: plogging.info("- minimum disk size is 1 GB") return if size > 1000: plogging.info("- disk size cannot exceed 1000 GB") return if speed not in ('standard', 'highperformance', 'economy'): plogging.info("- disk speed should be either 'standard' " "or 'highperformance' or 'economy'") return if 'disks' in node.extra: for disk in node.extra['disks']: if disk['scsiId'] == id: changed = False if disk['size'] > size: plogging.info("- disk shrinking could break the node") plogging.info("- skipped - disk {} will not be reduced" .format(id)) if disk['size'] < size: plogging.info("- expanding disk {} to {} GB" .format(id, size)) self.change_node_disk_size(node, disk['id'], size) changed = True if disk['speed'].lower() != speed.lower(): plogging.info("- changing disk {} to '{}'" .format(id, speed)) self.change_node_disk_speed(node, disk['id'], speed) changed = True if not changed: plogging.debug("- no change in disk {}".format(id)) return plogging.info("- adding {} GB '{}' disk".format( size, speed)) # if self.engine.safeMode: # plogging.info("- skipped - safe mode") # return while True: try: self.facility.region.ex_add_storage_to_node( node=node, amount=size, speed=speed.upper()) plogging.info("- in progress") except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue if 'Please try again later' in str(feedback): time.sleep(10) continue plogging.info("- unable to add disk {} GB '{}'" .format(size, speed)) plogging.error(str(feedback)) break
try: engine.do(args.action, args.blueprints, args.facilities) plogging.info(engine.document_elapsed()) except Exception as feedback: if plogging.getEffectiveLevel() == logging.DEBUG: plogging.error("Unable to do '{}'".format(args.action)) raise else: plogging.error("Unable to do '{}', run with -d for debug".format( args.action)) plogging.error("{}: {}".format(feedback.__class__.__name__, str(feedback))) sys.exit(1) if __name__ == "__main__": try: main() # if some errors have been logged, make it explicit to the caller if plogging.foundErrors(): plogging.error("Hit some error, you should check the logs") sys.exit(1) except KeyboardInterrupt: plogging.error("Aborted by user") sys.exit(1)
try: engine.do(args.action, args.blueprints, args.facilities) plogging.info(engine.document_elapsed()) except Exception as feedback: if plogging.getEffectiveLevel() == logging.DEBUG: plogging.error("Unable to do '{}'".format(args.action)) raise else: plogging.error("Unable to do '{}', run with -d for debug".format( args.action)) plogging.error("{}: {}".format( feedback.__class__.__name__, str(feedback))) sys.exit(1) if __name__ == "__main__": try: main() # if some errors have been logged, make it explicit to the caller if plogging.foundErrors(): plogging.error("Hit some error, you should check the logs") sys.exit(1) except KeyboardInterrupt: plogging.error("Aborted by user") sys.exit(1)
def attach_node(self, node, networks): """ Glues a node to multiple networks :param node: the target node :type node: :class:`libcloud.compute.base.Node` :param networks: a list of networks to connect, and ``internet`` :type networks: list of ``str`` This function adds network interfaces to a node, or adds address translation to the public Internet. Example in the fittings plan:: - web: domain: ipv4: 6 ethernet: name: gigafox.data nodes: - web[10..12]: glue: - gigafox.control - internet 80 443 In this example, another network interface is added to each node for connection to the Ethernet network ``gigafox.control``. Also, public IPv4 addresses are mapped on private addresses, so that each node web10, web11 and web12 is reachable from the internet. Public IPv4 addresses are taken from pool declared at the domain level, with the attribute ``ipv4``. In the example above, 6 addresses are assigned to the network domain, of which 3 are given to web nodes. If one or multiple numbers are mentioned after the keyword `internet`, they are used to configure the firewall appropriately. """ hasChanged = False if node is None: return hasChanged for line in networks: tokens = line.strip(' ').split(' ') token = tokens.pop(0) if token.lower() == 'internet': self.attach_node_to_internet(node, tokens) continue if token == self.container.blueprint['ethernet']['name']: continue if token.lower() == 'primary': continue plogging.info("Glueing node '{}' to network '{}'".format( node.name, token)) vlan = self.container.get_ethernet(token.split('::')) if vlan is None: plogging.info("- network '{}' is unknown".format(token)) continue kwargs = {} if len(tokens) > 0: numbers = tokens.pop(0).strip('.').split('.') subnet = vlan.private_ipv4_range_address.split('.') while len(numbers) < 4: numbers.insert(0, subnet[3 - len(numbers)]) private_ipv4 = '.'.join(numbers) plogging.debug("- using address '{}'".format(private_ipv4)) kwargs['private_ipv4'] = private_ipv4 if self.engine.safeMode: plogging.info("- skipped - safe mode") continue if 'private_ipv4' not in kwargs: kwargs['vlan'] = vlan while True: try: self.region.ex_attach_node_to_vlan(node, **kwargs) plogging.info("- in progress") hasChanged = True except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue elif 'RESOURCE_LOCKED' in str(feedback): plogging.info("- not now - locked") elif 'INVALID_INPUT_DATA' in str(feedback): plogging.info("- already there") else: plogging.info("- unable to glue node") plogging.error(str(feedback)) break return hasChanged
def move_to(self, facility): """ Checks if we can beachhead at this facility :param facility: access to local parameters and functions :type facility: :class:`plumbery.PlumberyFacility` This function lists all addresses of the computer that is running plumbery. If there is at least one routable IPv6 address, then it assumes that communication with nodes is possible. If no suitable IPv6 address can be found, then plumbery falls back to IPv4. Beachheading is granted only if the address of the computer running plumbery matches the fitting parameter ``beachhead``. """ self.facility = facility self.region = facility.region self.nodes = PlumberyNodes(facility) self.beachheading = False try: self.addresses = [] for interface in netifaces.interfaces(): addresses = netifaces.ifaddresses(interface) if netifaces.AF_INET in addresses.keys(): for address in addresses[netifaces.AF_INET]: # strip local loop if address['addr'].startswith('127.0.0.1'): continue self.addresses.append(address['addr']) if netifaces.AF_INET6 in addresses.keys(): for address in addresses[netifaces.AF_INET6]: # strip local loop if address['addr'].startswith('::1'): continue # strip local link addresses if address['addr'].startswith('fe80::'): continue # we have a routable ipv6, so let's go self.beachheading = True except Exception as feedback: plogging.error(str(feedback)) for item in self.facility.get_setting('prepare', []): if not isinstance(item, dict): continue if 'beachhead' not in item.keys(): continue if item['beachhead'] in self.addresses: self.beachheading = True break if self.beachheading: plogging.debug("- beachheading at '{}'".format( self.facility.get_setting('locationId'))) else: plogging.debug("- not beachheading at '{}'".format( self.facility.get_setting('locationId')))
def main(args=None, engine=None): """ Runs plumbery from the command line :param args: arguments to be considered for this invocation :type args: a list of ``str`` :param engine: an instance of the plumbery engine :type engine: :class:`plumbery.PlumberEngine` Example:: $ python -m plumbery fittings.yaml build web In this example, plumbery loads fittings plan from ``fittings.yaml``, then it builds the blueprint named ``web``. If no blueprint is mentioned, then plumbery looks at all blueprint definitions in the fittings plan. In other terms, the following command builds the entire fittings plan, eventually across multiple facilities:: $ python -m plumbery fittings.yaml build Of course, plumbery can be invoked through the entire life cycle of your fittings:: $ python -m plumbery fittings.yaml build $ python -m plumbery fittings.yaml start $ python -m plumbery fittings.yaml polish ... nodes are up and running ... $ python -m plumbery fittings.yaml stop ... nodes have been stopped ... $ python -m plumbery fittings.yaml wipe ... nodes have been destroyed, but the infrastructure remains ... $ python -m plumbery fittings.yaml destroy ... every virtual resources has been removed ... To focus at a single location, put the character '@' followed by the id. For example, to build fittings only at 'NA12' you would type:: $ python -m plumbery fittings.yaml build @NA12 To focus on one blueprint just mention its name on the command line. For example, if fittings plan has a blueprint for nodes running Docker, then you may use following statements to bootstrap each node:: $ python -m plumbery fittings.yaml build docker $ python -m plumbery fittings.yaml start docker $ python -m plumbery fittings.yaml prepare docker ... Docker is up and running at multiple nodes ... If you create a new polisher and put it in the directory ``plumbery\polishers``, then it will become automatically available:: $ python -m plumbery fittings.yaml my_special_stuff To get some help, you can type:: $ python -m plumbery -h """ # part 1 - understand what the user wants if args is None: args = sys.argv[1:] try: args = parse_args(args) except Exception as feedback: plogging.error("Incorrect arguments. " "Maybe the following can help: python -m plumbery -h") if plogging.getEffectiveLevel() == logging.DEBUG: raise else: plogging.error("{}: {}".format( feedback.__class__.__name__, str(feedback))) sys.exit(2) # part 2 - get a valid and configured engine if engine is None: try: engine = PlumberyEngine(args.fittings, args.parameters) if args.safe: engine.safeMode = True except Exception as feedback: if plogging.getEffectiveLevel() == logging.DEBUG: plogging.error("Cannot read fittings plan from '{}'".format( args.fittings)) raise else: plogging.error("Cannot read fittings plan from '{}'" ", run with -d for debug".format( args.fittings)) plogging.error("{}: {}".format( feedback.__class__.__name__, str(feedback))) sys.exit(2) # part 3 - do the job try: engine.do(args.action, args.blueprints, args.facilities) plogging.info(engine.document_elapsed()) except Exception as feedback: if plogging.getEffectiveLevel() == logging.DEBUG: plogging.error("Unable to do '{}'".format(args.action)) raise else: plogging.error("Unable to do '{}', run with -d for debug".format( args.action)) plogging.error("{}: {}".format( feedback.__class__.__name__, str(feedback))) sys.exit(1)
def stop_node(self, node, settings={}): """ Stops one node :param node: the target node, or its name :type node: :class:`Node` or ``str`` :param settings: additional attributes for this node :type settings: ``dict`` """ if isinstance(node, str): name = node node = self.get_node(name) else: name = node.name plogging.info("Stopping node '{}'".format(name)) if node is None: plogging.info("- not found") return if ('running' in settings and settings['running'] == 'always' and node.state == NodeState.RUNNING): plogging.info("- skipped - node has to stay always on") return if self.plumbery.safeMode: plogging.info("- skipped - safe mode") return retry = True while True: try: self.region.ex_shutdown_graceful(node) plogging.info("- in progress") except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue elif 'UNEXPECTED_ERROR' in str(feedback): time.sleep(10) continue elif 'VMWARE_TOOLS_INVALID_STATUS' in str(feedback): # prevent transient errors if retry: retry = False time.sleep(30) continue plogging.info("- unable to shutdown gracefully " "- invalid VMware tools") plogging.info("- powering the node off") try: self.region.ex_power_off(node) plogging.info("- in progress") except Exception as feedback: if 'SERVER_STOPPED' in str(feedback): plogging.info("- already stopped") else: plogging.info("- unable to stop node") plogging.error(str(feedback)) elif 'SERVER_STOPPED' in str(feedback): plogging.info("- already stopped") else: plogging.info("- unable to stop node") plogging.error(str(feedback)) break
def shine_node(self, node, settings, container): """ prepares a node :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param settings: the fittings plan for this node :type settings: ``dict`` :param container: the container of this node :type container: :class:`plumbery.PlumberyInfrastructure` """ plogging.info("Preparing node '{}'".format(settings['name'])) if node is None: plogging.error("- not found") return timeout = 300 tick = 6 while node.extra['status'].action == 'START_SERVER': time.sleep(tick) node = self.nodes.get_node(node.name) timeout -= tick if timeout < 0: break if node.state != NodeState.RUNNING: plogging.error("- skipped - node is not running") return self.upgrade_vmware_tools(node) prepares = self._get_prepares(node, settings, container) if len(prepares) < 1: plogging.info('- nothing to do') self.report.append( {node.name: { 'status': 'skipped - nothing to do' }}) return if len(node.public_ips) > 0: plogging.info("- node is reachable at '{}'".format( node.public_ips[0])) elif not self.beachheading: plogging.error('- node is unreachable') self.report.append({node.name: {'status': 'unreachable'}}) return descriptions = [] for item in prepares: descriptions.append(item['description']) if self._apply_prepares(node, prepares): self.report.append( {node.name: { 'status': 'completed', 'prepares': descriptions }}) else: self.report.append( {node.name: { 'status': 'failed', 'prepares': descriptions }})
def shine_node(self, node, settings, container): """ prepares a node :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param settings: the fittings plan for this node :type settings: ``dict`` :param container: the container of this node :type container: :class:`plumbery.PlumberyInfrastructure` """ self.container = container plogging.info("Preparing node '{}'".format(settings['name'])) if node is None: plogging.error("- not found") return timeout = 300 tick = 6 while node.extra['status'].action == 'START_SERVER': time.sleep(tick) node = self.nodes.get_node(node.name) timeout -= tick if timeout < 0: break if node.state != NodeState.RUNNING: plogging.error("- skipped - node is not running") return self.upgrade_vmware_tools(node) prepares = self._get_prepares(node, settings, container) if len(prepares) < 1: plogging.info('- nothing to do') self.report.append({node.name: { 'status': 'skipped - nothing to do' }}) return if len(node.public_ips) > 0: plogging.info("- node is reachable at '{}'".format( node.public_ips[0])) node.transient = False elif container.with_transient_exposure(): external_ip = self.attach_node_to_internet(node, ports=['22']) if external_ip is None: plogging.error('- no IP has been assigned') self.report.append({node.name: { 'status': 'unreachable' }}) return node.public_ips = [external_ip] node.transient = True elif not self.beachheading: plogging.error('- node is unreachable') self.report.append({node.name: { 'status': 'unreachable' }}) return descriptions = [] for item in prepares: descriptions.append(item['description']) if self._apply_prepares(node, prepares): self.report.append({node.name: { 'status': 'completed', 'prepares': descriptions }}) else: self.report.append({node.name: { 'status': 'failed', 'prepares': descriptions }}) if node.transient: self.container._detach_node_from_internet(node)
def attach_node(self, node, networks): """ Glues a node to multiple networks :param node: the target node :type node: :class:`libcloud.compute.base.Node` :param networks: a list of networks to connect, and ``internet`` :type networks: list of ``str`` This function adds network interfaces to a node, or adds address translation to the public Internet. Example in the fittings plan:: - web: domain: ipv4: 6 ethernet: name: gigafox.data nodes: - web[10..12]: glue: - gigafox.control - internet 80 443 In this example, another network interface is added to each node for connection to the Ethernet network ``gigafox.control``. Also, public IPv4 addresses are mapped on private addresses, so that each node web10, web11 and web12 is reachable from the internet. Public IPv4 addresses are taken from pool declared at the domain level, with the attribute ``ipv4``. In the example above, 6 addresses are assigned to the network domain, of which 3 are given to web nodes. If one or multiple numbers are mentioned after the keyword `internet`, they are used to configure the firewall appropriately. """ hasChanged = False if node is None: return hasChanged for line in networks: tokens = line.strip(' ').split(' ') token = tokens.pop(0) if token.lower() == 'internet': self.attach_node_to_internet(node, tokens) continue if token == self.container.blueprint['ethernet']['name']: continue if token.lower() == 'primary': continue plogging.info("Glueing node '{}' to network '{}'" .format(node.name, token)) vlan = self.container.get_ethernet(token.split('::')) if vlan is None: plogging.info("- network '{}' is unknown".format(token)) continue kwargs = {} if len(tokens) > 0: numbers = tokens.pop(0).strip('.').split('.') subnet = vlan.private_ipv4_range_address.split('.') while len(numbers) < 4: numbers.insert(0, subnet[3-len(numbers)]) private_ipv4 = '.'.join(numbers) plogging.debug("- using address '{}'".format(private_ipv4)) kwargs['private_ipv4'] = private_ipv4 if self.engine.safeMode: plogging.info("- skipped - safe mode") continue if 'private_ipv4' not in kwargs: kwargs['vlan'] = vlan while True: try: self.region.ex_attach_node_to_vlan(node, **kwargs) plogging.info("- in progress") hasChanged = True except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue elif 'RESOURCE_LOCKED' in str(feedback): plogging.info("- not now - locked") elif 'INVALID_INPUT_DATA' in str(feedback): plogging.info("- already there") else: plogging.info("- unable to glue node") plogging.error(str(feedback)) break return hasChanged
def set_node_compute(self, node, cpu, memory): """ Sets compute capability :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param cpu: the cpu specification :type cpu: ``DimensionDataServerCpuSpecification`` :param memory: the memory size, expressed in Giga bytes :type memory: ``int`` """ changed = False if cpu is not None and 'cpu' in node.extra: if int(cpu.cpu_count) != int(node.extra['cpu'].cpu_count): plogging.info("- changing to {} cpu".format( cpu.cpu_count)) changed = True if (int(cpu.cores_per_socket) != int(node.extra['cpu'].cores_per_socket)): plogging.info("- changing to {} core(s) per socket".format( cpu.cores_per_socket)) changed = True if cpu.performance != node.extra['cpu'].performance: plogging.info("- changing to '{}' cpu performance".format( cpu.performance.lower())) changed = True if memory is not None and 'memoryMb' in node.extra: if memory != int(node.extra['memoryMb']/1024): plogging.info("- changing to {} GB memory".format( memory)) changed = True if not changed: plogging.debug("- no change in compute") return if self.engine.safeMode: plogging.info("- skipped - safe mode") return while True: try: self.region.ex_reconfigure_node( node=node, memory_gb=memory, cpu_count=cpu.cpu_count, cores_per_socket=cpu.cores_per_socket, cpu_performance=cpu.performance) plogging.info("- in progress") except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue if 'Please try again later' in str(feedback): time.sleep(10) continue plogging.info("- unable to reconfigure node") plogging.error(str(feedback)) break
def build_blueprint(self, blueprint, container): """ Create missing nodes :param blueprint: the blueprint to build :type blueprint: ``dict`` :param container: the container where nodes will be built :type container: :class:`plumbery.PlumberyInfrastructure` """ plogging.debug("Building nodes of blueprint '{}'".format( blueprint['target'])) self.facility.power_on() if ('nodes' not in blueprint or not isinstance(blueprint['nodes'], list)): plogging.debug("No nodes have been defined in '{}'".format( blueprint['target'])) blueprint['nodes'] = [] for item in blueprint['nodes']: if type(item) is dict: label = list(item.keys())[0] settings = list(item.values())[0] else: label = item settings = {} for label in self.expand_labels(label): plogging.info("Creating node '{}'".format(label)) if self.get_node(label): plogging.info("- already there") continue description = '#plumbery' if 'description' in settings: description = settings['description'] + ' #plumbery' if 'appliance' in settings: imageName = settings['appliance'] else: imageName = None image = self.facility.get_image(imageName) if image is None: raise PlumberyException("Error: unable to find image " "for '{}'!".format(imageName)) plogging.debug("- using image '{}'".format(image.name)) cpu = None if 'cpu' in settings: tokens = str(settings['cpu']).split(' ') if len(tokens) < 2: tokens.append('1') if len(tokens) < 3: tokens.append('standard') if (int(tokens[0]) < 1 or int(tokens[0]) > 32): plogging.info("- cpu should be between 1 and 32") elif (int(tokens[1]) < 1 or int(tokens[1]) > 2): plogging.info("- core per cpu should be either 1 or 2") elif tokens[2].upper() not in ('STANDARD', 'HIGHPERFORMANCE'): plogging.info("- cpu speed should be either 'standard'" " or 'highspeed'") else: cpu = DimensionDataServerCpuSpecification( cpu_count=tokens[0], cores_per_socket=tokens[1], performance=tokens[2].upper()) plogging.debug("- assigning {} cpus".format( cpu.cpu_count)) plogging.debug("- core per cpu: {}".format( cpu.cores_per_socket)) plogging.debug("- cpu performance: {}".format( cpu.performance.lower())) memory = None if 'memory' in settings: memory = int(settings['memory']) if memory < 1 or memory > 256: plogging.info("- memory should be between 1 and 256") memory = None else: plogging.debug("- assigning {} GB of memory".format( memory)) if self.plumbery.safeMode: plogging.info("- skipped - safe mode") continue if container.domain is None: plogging.info("- missing network domain") continue if container.network is None: plogging.info("- missing Ethernet network") continue primary_ipv4 = None if 'glue' in settings: for line in settings['glue']: tokens = line.strip(' ').split(' ') token = tokens.pop(0) if token.lower() == 'primary': token = container.network.name if token != container.network.name: continue if len(tokens) < 1: break plogging.info("Glueing node '{}' to network '{}'" .format(label, token)) numbers = tokens.pop(0).strip('.').split('.') subnet = container.network.private_ipv4_range_address.split('.') while len(numbers) < 4: numbers.insert(0, subnet[3-len(numbers)]) primary_ipv4 = '.'.join(numbers) plogging.debug("- using address '{}'" .format(primary_ipv4)) break retries = 2 should_start = False while True: try: if primary_ipv4 is not None: self.region.create_node( name=label, image=image, auth=NodeAuthPassword( self.plumbery.get_shared_secret()), ex_network_domain=container.domain, ex_primary_ipv4=primary_ipv4, ex_cpu_specification=cpu, ex_memory_gb=memory, ex_is_started=should_start, ex_description=description) else: self.region.create_node( name=label, image=image, auth=NodeAuthPassword( self.plumbery.get_shared_secret()), ex_network_domain=container.domain, ex_vlan=container.network, ex_cpu_specification=cpu, ex_memory_gb=memory, ex_is_started=should_start, ex_description=description) plogging.info("- in progress") if should_start: # stop the node after start plogging.info("- waiting for node to be deployed") node = None while True: node = self.get_node(label) if node is None: plogging.error("- aborted - missing node '{}'".format(label)) return if node.extra['status'].action is None: break if (node is not None and node.extra['status'].failure_reason is not None): plogging.error("- aborted - failed deployment " "of node '{}'".format(label)) return time.sleep(20) if node is not None: self.region.ex_shutdown_graceful(node) plogging.info("- shutting down after deployment") except SocketError as feedback: if feedback.errno == errno.ECONNRESET and retries > 0: retries -= 1 time.sleep(10) continue else: plogging.info("- unable to create node") plogging.error(str(feedback)) except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue elif 'RESOURCE_NOT_FOUND' in str(feedback): plogging.info("- not now") plogging.error(str(feedback)) elif 'RESOURCE_LOCKED' in str(feedback): plogging.info("- not now - locked") plogging.error(str(feedback)) elif ('INVALID_INPUT_DATA: Cannot deploy server ' 'with Software Labels in the "Stopped" state.' in str(feedback)): should_start = True continue else: plogging.info("- unable to create node") plogging.error(str(feedback)) break
def _get_prepares(self, node, settings, container): """ Defines the set of actions to be done on a node :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param settings: the fittings plan for this node :type settings: ``dict`` :param container: the container of this node :type container: :class:`plumbery.PlumberyInfrastructure` :return: a list of actions to be performed, and related descriptions :rtype: a ``list`` of `{ 'description': ..., 'genius': ... }`` """ if not isinstance(settings, dict): return [] environment = PlumberyNodeContext(node=node, container=container, context=self.facility) prepares = [] for key_file in self.key_files: try: path = os.path.expanduser(key_file) with open(path) as stream: key = stream.read() stream.close() prepares.append({ 'description': 'deploy SSH public key', 'genius': SSHKeyDeployment(key=key) }) except IOError: plogging.warning("no ssh key in {}".format(key_file)) if ('prepare' in settings and isinstance(settings['prepare'], list) and len(settings['prepare']) > 0): plogging.info('- using prepare commands') for script in settings['prepare']: tokens = script.split(' ') if len(tokens) == 1: tokens.insert(0, 'run') if tokens[0] in ['run', 'run_raw']: # send and run a script script = tokens[1] if len(tokens) > 2: args = tokens[2:] else: args = [] plogging.debug("- {} {} {}".format(tokens[0], script, ' '.join(args))) try: with open(script) as stream: text = stream.read() if (tokens[0] == 'run' and PlumberyText.could_expand(text)): plogging.debug( "- expanding script '{}'".format(script)) text = PlumberyText.expand_string( text, environment) if len(text) > 0: plogging.info("- running '{}'".format(script)) prepares.append({ 'description': ' '.join(tokens), 'genius': ScriptDeployment(script=text, args=args, name=script) }) else: plogging.error( "- script '{}' is empty".format(script)) except IOError: plogging.error( "- unable to read script '{}'".format(script)) elif tokens[0] in ['put', 'put_raw']: # send a file file = tokens[1] if len(tokens) > 2: destination = tokens[2] else: destination = './' + file plogging.debug("- {} {} {}".format(tokens[0], file, destination)) try: with open(file) as stream: content = stream.read() if (tokens[0] == 'put' and PlumberyText.could_expand(content)): plogging.debug( "- expanding file '{}'".format(file)) content = PlumberyText.expand_string( content, environment) plogging.info("- putting file '{}'".format(file)) prepares.append({ 'description': ' '.join(tokens), 'genius': FileContentDeployment(content=content, target=destination) }) except IOError: plogging.error( "- unable to read file '{}'".format(file)) else: # echo a sensible message eventually if tokens[0] == 'echo': tokens.pop(0) message = ' '.join(tokens) message = PlumberyText.expand_string(message, environment) plogging.info("- {}".format(message)) if ('cloud-config' in settings and isinstance(settings['cloud-config'], dict) and len(settings['cloud-config']) > 0): plogging.info('- using cloud-config') # mandatory, else cloud-init will not consider user-data plogging.debug('- preparing meta-data') meta_data = 'instance_id: dummy\n' destination = '/var/lib/cloud/seed/nocloud-net/meta-data' prepares.append({ 'description': 'put meta-data', 'genius': FileContentDeployment(content=meta_data, target=destination) }) plogging.debug('- preparing user-data') expanded = PlumberyText.expand_string(settings['cloud-config'], environment) user_data = '#cloud-config\n' + expanded plogging.debug(user_data) destination = '/var/lib/cloud/seed/nocloud-net/user-data' prepares.append({ 'description': 'put user-data', 'genius': FileContentDeployment(content=user_data, target=destination) }) plogging.debug('- preparing remote install of cloud-init') script = 'prepare.cloud-init.sh' try: path = os.path.dirname(__file__) + '/' + script with open(path) as stream: text = stream.read() if text: prepares.append({ 'description': 'run ' + script, 'genius': ScriptDeployment(script=text, name=script) }) except IOError: raise PlumberyException( "Error: cannot read '{}'".format(script)) plogging.debug('- preparing reboot to trigger cloud-init') prepares.append({ 'description': 'reboot node', 'genius': RebootDeployment(container=container) }) return prepares
def destroy_blueprint(self, blueprint): """ Destroys nodes of a given blueprint :param blueprint: the blueprint to build :type blueprint: ``dict`` """ self.facility.power_on() infrastructure = PlumberyInfrastructure(self.facility) container = infrastructure.get_container(blueprint) if ('nodes' not in blueprint or not isinstance(blueprint['nodes'], list)): return # destroy in reverse order for item in reversed(blueprint['nodes']): if type(item) is dict: label = list(item)[0] settings = item[label] else: label = str(item) settings = {} for label in self.expand_labels(label): node = self.get_node(label) if node is None: plogging.info("Destroying node '{}'".format(label)) plogging.info("- not found") continue if 'destroy' in settings and settings['destroy'] == 'never': plogging.info("Destroying node '{}'".format(label)) plogging.info("- this node can never be destroyed") return False timeout = 300 tick = 6 while node.extra['status'].action == 'SHUTDOWN_SERVER': time.sleep(tick) node = self.get_node(label) timeout -= tick if timeout < 0: break if node.state == NodeState.RUNNING: plogging.info("Destroying node '{}'".format(label)) plogging.info("- skipped - node is up and running") continue if self.plumbery.safeMode: plogging.info("Destroying node '{}'".format(label)) plogging.info("- skipped - safe mode") continue configuration = MonitoringConfiguration( engine=container.facility.plumbery, facility=container.facility) configuration.deconfigure(node, settings) self._detach_node(node, settings) container._detach_node_from_internet(node) plogging.info("Destroying node '{}'".format(label)) while True: try: self.region.destroy_node(node) plogging.info("- in progress") except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue elif 'RESOURCE_NOT_FOUND' in str(feedback): plogging.info("- not found") elif 'SERVER_STARTED' in str(feedback): plogging.info("- skipped - node is up and running") elif 'RESOURCE_LOCKED' in str(feedback): plogging.info("- not now - locked") return False else: plogging.info("- unable to destroy node") plogging.error(str(feedback)) break
def _apply_prepares(self, node, steps): """ Does the actual job over SSH :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param steps: the various steps of the preparing :type steps: ``list`` of ``dict`` :return: ``True`` if everything went fine, ``False`` otherwise :rtype: ``bool`` """ if node is None or node.state != NodeState.RUNNING: plogging.warning("- skipped - node is not running") return False # select the address to use if len(node.public_ips) > 0: target_ip = node.public_ips[0] elif node.extra['ipv6']: target_ip = node.extra['ipv6'] else: target_ip = node.private_ips[0] # use libcloud to communicate with remote nodes session = SSHClient(hostname=target_ip, port=22, username=self.user, password=self.secret, key_files=self.key_files, timeout=10) repeats = 0 while True: try: session.connect() break except Exception as feedback: repeats += 1 if repeats > 5: plogging.error( "Error: can not connect to '{}'!".format(target_ip)) plogging.error("- failed to connect") return False plogging.debug(str(feedback)) plogging.debug( "- connection {} failed, retrying".format(repeats)) time.sleep(10) continue while True: try: if self.engine.safeMode: plogging.info( "- skipped - no ssh interaction in safe mode") else: for step in steps: plogging.info('- {}'.format(step['description'])) step['genius'].run(node, session) except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue plogging.error("Error: unable to prepare '{}' at '{}'!".format( node.name, target_ip)) plogging.error(str(feedback)) plogging.error("- failed") result = False else: result = True break try: session.close() except: pass return result
def _configure_backup(self, node, backup): """ Configure backup on a node :param node: the target node :type node: :class:`libcloud.compute.base.Node` :param backup: The backup settings :type backup: ``dict`` or ``str`` """ default_email = self.facility.backup.connection.get_account_details( ).email if isinstance(backup, string_types): backup = { 'plan': backup, 'email': default_email, 'clients': [{ 'type': 'filesystem' }] } plan = backup['plan'].lower().capitalize() plogging.info("Starting {} backup of node '{}'".format( plan.lower(), node.name)) backup_details = None try: self.facility.backup.create_target_from_node( node, extra={'servicePlan': plan}) except Exception as feedback: if feedback.msg == 'Cloud backup for this server is already enabled or being enabled (state: NORMAL).': plogging.info("- already there") backup_details = self.facility.backup.ex_get_backup_details_for_target( node.id) else: plogging.info("- unable to start backup") plogging.error(str(feedback)) return False while (backup_details is not None and backup_details.status is not 'NORMAL'): try: backup_details = self.facility.backup.ex_get_backup_details_for_target( node.id) plogging.info("- in progress, found asset %s", backup_details.asset_id) except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue elif 'RETRYABLE_SYSTEM_ERROR' in str(feedback): time.sleep(10) continue elif 'NO_CHANGE' in str(feedback): plogging.info("- already there") elif 'RESOURCE_LOCKED' in str(feedback): plogging.info("- unable to start backup " "- node has been locked") else: plogging.info("- unable to start backup") plogging.error(str(feedback)) break target = self.facility.backup.ex_get_target_by_id(node.id) storage_policies = self.facility.backup.ex_list_available_storage_policies( target=target) schedule_policies = self.facility.backup.ex_list_available_schedule_policies( target=target) client_types = self.facility.backup.ex_list_available_client_types( target=target) clients = backup.get('clients', [{'type': 'filesystem'}]) for client in clients: plogging.info("- adding backup client") client_type = client.get('type', 'filesystem').lower() storage_policy = client.get('storagePolicy', '14 Day Storage Policy').lower() schedule_policy = client.get('schedulePolicy', '12AM - 6AM').lower() trigger = client.get('trigger', 'ON_FAILURE') email = client.get('email', default_email) try: storage_policy = [ x for x in storage_policies if x.name.lower() == storage_policy ][0] except IndexError: raise ConfigurationError( "Could not find matching storage policy '%s'" % storage_policy) try: schedule_policy = [ x for x in schedule_policies if x.name.lower() == schedule_policy ][0] except IndexError: raise ConfigurationError( "Could not find matching schedule policy '%s'" % schedule_policy) if client_type in ['file', 'filesystem']: client = [x for x in client_types if x.is_file_system][0] else: client = [ x for x in client_types if x.description.startswith(client_type) ][0] self.facility.backup.ex_add_client_to_target( target=target, client_type=client, storage_policy=storage_policy, schedule_policy=schedule_policy, trigger=trigger, email=email) return True
def configure(self, node, settings): """ prepares a node :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param settings: the fittings plan for this node :type settings: ``dict`` :param container: the container of this node :type container: :class:`plumbery.PlumberyInfrastructure` """ if self._element_name_ in settings: plogging.info("preparing node '{}'".format(settings['name'])) if node is None: plogging.info("- not found") return timeout = 300 tick = 6 while node.extra['status'].action == 'START_SERVER': time.sleep(tick) node = self.nodes.get_node(node.name) timeout -= tick if timeout < 0: break if node.state != NodeState.RUNNING: plogging.info("- skipped - node is not running") return ipv6 = node.extra['ipv6'] ip = node.private_ips[0] if ipv6 is None: plogging.error('No ipv6 address for node, cannot configure') return # Check to see if WinRM works.. try: self._try_winrm(node) except winrm.exceptions.InvalidCredentialsError: plogging.warn('initial login to %s failed, trying to setup winrm remotely', ip) self._setup_winrm(node) self._try_winrm(node) except requests.exceptions.ConnectionError: plogging.warn('initial connection to %s failed, trying to setup winrm remotely', ip) self._setup_winrm(node) self._try_winrm(node) # OK, we're all ready. Let's look at the node config and start commands cmds = [] hostname = settings[self._element_name_].get('hostname', None) if hostname is not None and isinstance(hostname, str): cmds.append(('powershell.exe', ['Rename-Computer', '-NewName', hostname])) extra_cmds = settings[self._element_name_].get('cmds', []) for command in extra_cmds: command = command.rstrip() command_parts = command.split(' ') cmds.append((command_parts[0], command_parts[1:])) out, err = self._winrm_commands(node, cmds) plogging.info(out) plogging.warning(err) plogging.debug('locking down winrm') self._lockdown_winrm(node) else: return False
def build_blueprint(self, blueprint, container): """ Create missing nodes :param blueprint: the blueprint to build :type blueprint: ``dict`` :param container: the container where nodes will be built :type container: :class:`plumbery.PlumberyInfrastructure` """ plogging.debug("Building nodes of blueprint '{}'".format( blueprint['target'])) self.facility.power_on() if ('nodes' not in blueprint or not isinstance(blueprint['nodes'], list)): plogging.debug("No nodes have been defined in '{}'".format( blueprint['target'])) blueprint['nodes'] = [] for item in blueprint['nodes']: if type(item) is dict: label = list(item.keys())[0] settings = list(item.values())[0] else: label = item settings = {} for label in self.expand_labels(label): plogging.info("Creating node '{}'".format(label)) if self.get_node(label): plogging.info("- already there") continue description = '#plumbery' if 'description' in settings: description = settings['description'] + ' #plumbery' if 'appliance' in settings: imageName = settings['appliance'] else: imageName = None image = self.facility.get_image(imageName) if image is None: raise PlumberyException("Error: unable to find image " "for '{}'!".format(imageName)) plogging.debug("- using image '{}'".format(image.name)) cpu = None if 'cpu' in settings: tokens = str(settings['cpu']).split(' ') if len(tokens) < 2: tokens.append('1') if len(tokens) < 3: tokens.append('standard') if (int(tokens[0]) < 1 or int(tokens[0]) > 32): plogging.info("- cpu should be between 1 and 32") elif (int(tokens[1]) < 1 or int(tokens[1]) > 2): plogging.info("- core per cpu should be either 1 or 2") elif tokens[2].upper() not in ('STANDARD', 'HIGHPERFORMANCE'): plogging.info("- cpu speed should be either 'standard'" " or 'highspeed'") else: cpu = DimensionDataServerCpuSpecification( cpu_count=tokens[0], cores_per_socket=tokens[1], performance=tokens[2].upper()) plogging.debug("- assigning {} cpus".format( cpu.cpu_count)) plogging.debug("- core per cpu: {}".format( cpu.cores_per_socket)) plogging.debug("- cpu performance: {}".format( cpu.performance.lower())) memory = None if 'memory' in settings: memory = int(settings['memory']) if memory < 1 or memory > 256: plogging.info("- memory should be between 1 and 256") memory = None else: plogging.debug( "- assigning {} GB of memory".format(memory)) if self.plumbery.safeMode: plogging.info("- skipped - safe mode") continue if container.domain is None: plogging.info("- missing network domain") continue if container.network is None: plogging.info("- missing Ethernet network") continue primary_ipv4 = None if 'glue' in settings: for line in settings['glue']: tokens = line.strip(' ').split(' ') token = tokens.pop(0) if token.lower() == 'primary': token = container.network.name if token != container.network.name: continue if len(tokens) < 1: break plogging.info( "Glueing node '{}' to network '{}'".format( label, token)) numbers = tokens.pop(0).strip('.').split('.') subnet = container.network.private_ipv4_range_address.split( '.') while len(numbers) < 4: numbers.insert(0, subnet[3 - len(numbers)]) primary_ipv4 = '.'.join(numbers) plogging.debug( "- using address '{}'".format(primary_ipv4)) break retries = 2 should_start = False while True: try: if primary_ipv4 is not None: self.region.create_node( name=label, image=image, auth=NodeAuthPassword( self.plumbery.get_shared_secret()), ex_network_domain=container.domain, ex_primary_ipv4=primary_ipv4, ex_cpu_specification=cpu, ex_memory_gb=memory, ex_is_started=should_start, ex_description=description) else: self.region.create_node( name=label, image=image, auth=NodeAuthPassword( self.plumbery.get_shared_secret()), ex_network_domain=container.domain, ex_vlan=container.network, ex_cpu_specification=cpu, ex_memory_gb=memory, ex_is_started=should_start, ex_description=description) plogging.info("- in progress") if should_start: # stop the node after start plogging.info("- waiting for node to be deployed") node = None while True: node = self.get_node(label) if node is None: plogging.error( "- aborted - missing node '{}'".format( label)) return if node.extra['status'].action is None: break if (node is not None and node.extra['status'].failure_reason is not None): plogging.error( "- aborted - failed deployment " "of node '{}'".format(label)) return time.sleep(20) if node is not None: self.region.ex_shutdown_graceful(node) plogging.info( "- shutting down after deployment") except SocketError as feedback: if feedback.errno == errno.ECONNRESET and retries > 0: retries -= 1 time.sleep(10) continue else: plogging.info("- unable to create node") plogging.error(str(feedback)) except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue elif 'RESOURCE_NOT_FOUND' in str(feedback): plogging.info("- not now") plogging.error(str(feedback)) elif 'RESOURCE_LOCKED' in str(feedback): plogging.info("- not now - locked") plogging.error(str(feedback)) elif ('INVALID_INPUT_DATA: Cannot deploy server ' 'with Software Labels in the "Stopped" state.' in str(feedback)): should_start = True continue else: plogging.info("- unable to create node") plogging.error(str(feedback)) break
def _apply_prepares(self, node, steps): """ Does the actual job over SSH :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param steps: the various steps of the preparing :type steps: :class:`libcloud.compute.deployment.MultiStepDeployment` :return: ``True`` if everything went fine, ``False`` otherwise :rtype: ``bool`` """ if node is None or node.state != NodeState.RUNNING: plogging.warning("- skipped - node is not running") return False # select the address to use if len(node.public_ips) > 0: target_ip = node.public_ips[0] elif node.extra['ipv6']: target_ip = node.extra['ipv6'] else: target_ip = node.private_ips[0] # guess location of user key path = os.path.expanduser('~/.ssh/id_rsa') # use libcloud to communicate with remote nodes session = SSHClient(hostname=target_ip, port=22, username=self.user, password=self.secret, key_files=path, timeout=9) try: session.connect() except Exception as feedback: plogging.error("Error: unable to prepare '{}' at '{}'!".format( node.name, target_ip)) plogging.error(str(feedback)) plogging.error("- failed") return False while True: try: if self.engine.safeMode: plogging.info("- skipped - no ssh interaction in safe mode") else: node = steps.run(node, session) except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue plogging.error("Error: unable to prepare '{}' at '{}'!".format( node.name, target_ip)) plogging.error(str(feedback)) plogging.error("- failed") result = False else: result = True break try: session.close() except: pass return result
def set_node_compute(self, node, cpu, memory): """ Sets compute capability :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param cpu: the cpu specification :type cpu: ``DimensionDataServerCpuSpecification`` :param memory: the memory size, expressed in Giga bytes :type memory: ``int`` """ changed = False if cpu is not None and 'cpu' in node.extra: if int(cpu.cpu_count) != int(node.extra['cpu'].cpu_count): plogging.info("- changing to {} cpu".format(cpu.cpu_count)) changed = True if (int(cpu.cores_per_socket) != int( node.extra['cpu'].cores_per_socket)): plogging.info("- changing to {} core(s) per socket".format( cpu.cores_per_socket)) changed = True if cpu.performance != node.extra['cpu'].performance: plogging.info("- changing to '{}' cpu performance".format( cpu.performance.lower())) changed = True if memory is not None and 'memoryMb' in node.extra: if memory != int(node.extra['memoryMb'] / 1024): plogging.info("- changing to {} GB memory".format(memory)) changed = True if not changed: plogging.debug("- no change in compute") return if self.engine.safeMode: plogging.info("- skipped - safe mode") return while True: try: self.region.ex_reconfigure_node( node=node, memory_gb=memory, cpu_count=cpu.cpu_count, cores_per_socket=cpu.cores_per_socket, cpu_performance=cpu.performance) plogging.info("- in progress") except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue if 'Please try again later' in str(feedback): time.sleep(10) continue plogging.info("- unable to reconfigure node") plogging.error(str(feedback)) break
def _get_prepares(self, node, settings, container): """ Defines the set of actions to be done on a node :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param settings: the fittings plan for this node :type settings: ``dict`` :param container: the container of this node :type container: :class:`plumbery.PlumberyInfrastructure` :return: a list of actions to be performed, and related descriptions :rtype: a ``list`` of `{ 'description': ..., 'genius': ... }`` """ if not isinstance(settings, dict): return [] environment = PlumberyNodeContext(node=node, container=container, context=self.facility) prepares = [] if self.key is not None: prepares.append({ 'description': 'deploy SSH public key', 'genius': SSHKeyDeployment(self.key)}) if ('prepare' in settings and isinstance(settings['prepare'], list) and len(settings['prepare']) > 0): plogging.info('- using prepare commands') for script in settings['prepare']: tokens = script.split(' ') if len(tokens) == 1: tokens.insert(0, 'run') if tokens[0] in ['run', 'run_raw']: # send and run a script script = tokens[1] if len(tokens) > 2: args = tokens[2:] else: args = [] plogging.debug("- {} {} {}".format( tokens[0], script, ' '.join(args))) try: with open(script) as stream: text = stream.read() if(tokens[0] == 'run' and PlumberyText.could_expand(text)): plogging.debug("- expanding script '{}'" .format(script)) text = PlumberyText.expand_string( text, environment) if len(text) > 0: plogging.info("- running '{}'" .format(script)) prepares.append({ 'description': ' '.join(tokens), 'genius': ScriptDeployment( script=text, args=args, name=script)}) else: plogging.error("- script '{}' is empty" .format(script)) except IOError: plogging.error("- unable to read script '{}'" .format(script)) elif tokens[0] in ['put', 'put_raw']: # send a file file = tokens[1] if len(tokens) > 2: destination = tokens[2] else: destination = './'+file plogging.debug("- {} {} {}".format( tokens[0], file, destination)) try: with open(file) as stream: content = stream.read() if(tokens[0] == 'put' and PlumberyText.could_expand(content)): plogging.debug("- expanding file '{}'" .format(file)) content = PlumberyText.expand_string( content, environment) plogging.info("- putting file '{}'" .format(file)) prepares.append({ 'description': ' '.join(tokens), 'genius': FileContentDeployment( content=content, target=destination)}) except IOError: plogging.error("- unable to read file '{}'" .format(file)) else: # echo a sensible message eventually if tokens[0] == 'echo': tokens.pop(0) message = ' '.join(tokens) message = PlumberyText.expand_string( message, environment) plogging.info("- {}".format(message)) if ('cloud-config' in settings and isinstance(settings['cloud-config'], dict) and len(settings['cloud-config']) > 0): plogging.info('- using cloud-config') # mandatory, else cloud-init will not consider user-data plogging.debug('- preparing meta-data') meta_data = 'instance_id: dummy\n' destination = '/var/lib/cloud/seed/nocloud-net/meta-data' prepares.append({ 'description': 'put meta-data', 'genius': FileContentDeployment( content=meta_data, target=destination)}) plogging.debug('- preparing user-data') expanded = PlumberyText.expand_string( settings['cloud-config'], environment) user_data = '#cloud-config\n'+expanded plogging.debug(user_data) destination = '/var/lib/cloud/seed/nocloud-net/user-data' prepares.append({ 'description': 'put user-data', 'genius': FileContentDeployment( content=user_data, target=destination)}) plogging.debug('- preparing remote install of cloud-init') script = 'prepare.cloud-init.sh' try: path = os.path.dirname(__file__)+'/'+script with open(path) as stream: text = stream.read() if text: prepares.append({ 'description': 'run '+script, 'genius': ScriptDeployment( script=text, name=script)}) except IOError: raise PlumberyException("Error: cannot read '{}'" .format(script)) plogging.debug('- preparing reboot to trigger cloud-init') prepares.append({ 'description': 'reboot node', 'genius': RebootDeployment( container=container)}) return prepares
def attach_node_to_internet(self, node, ports=[]): """ Adds address translation for one node :param node: node that has to be reachable from the internet :type node: :class:`libcloud.common.Node` :param ports: the ports that have to be opened :type ports: a list of ``str`` """ plogging.info("Making node '{}' reachable from the internet".format( node.name)) domain = self.container.get_network_domain( self.container.blueprint['domain']['name']) internal_ip = node.private_ips[0] external_ip = None for rule in self.region.ex_list_nat_rules(domain): if rule.internal_ip == internal_ip: external_ip = rule.external_ip plogging.info( "- node is reachable at '{}'".format(external_ip)) if self.engine.safeMode: plogging.info("- skipped - safe mode") return if external_ip is None: external_ip = self.container._get_ipv4() if external_ip is None: plogging.info( "- no more ipv4 address available -- assign more") return while True: try: self.region.ex_create_nat_rule(domain, internal_ip, external_ip) plogging.info( "- node is reachable at '{}'".format(external_ip)) except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue elif 'RESOURCE_LOCKED' in str(feedback): plogging.info("- not now - locked") return else: plogging.info("- unable to add address translation") plogging.error(str(feedback)) break candidates = self.container._list_candidate_firewall_rules(node, ports) for rule in self.container._list_firewall_rules(): if rule.name in candidates.keys(): plogging.info("Creating firewall rule '{}'".format(rule.name)) plogging.info("- already there") candidates = { k: candidates[k] for k in candidates if k != rule.name } for name, rule in candidates.items(): plogging.info("Creating firewall rule '{}'".format(name)) if self.engine.safeMode: plogging.info("- skipped - safe mode") else: try: self.container._ex_create_firewall_rule( network_domain=domain, rule=rule, position='LAST') plogging.info("- in progress") except Exception as feedback: if 'NAME_NOT_UNIQUE' in str(feedback): plogging.info("- already there") else: plogging.info("- unable to create firewall rule") plogging.error(str(feedback))
def _configure_backup(self, node, backup): """ Configure backup on a node :param node: the target node :type node: :class:`libcloud.compute.base.Node` :param backup: The backup settings :type backup: ``dict`` or ``str`` """ default_email = self.facility.backup.connection.get_account_details().email if isinstance(backup, string_types): backup = { 'plan': backup, 'email': default_email, 'clients': [{ 'type': 'filesystem' }] } plan = backup['plan'].lower().capitalize() plogging.info("Starting {} backup of node '{}'".format( plan.lower(), node.name)) backup_details = None try: self.facility.backup.create_target_from_node( node, extra={'servicePlan': plan}) except Exception as feedback: if feedback.msg == 'Cloud backup for this server is already enabled or being enabled (state: NORMAL).': plogging.info("- already there") backup_details = self.facility.backup.ex_get_backup_details_for_target(node.id) else: plogging.info("- unable to start backup") plogging.error(str(feedback)) return False while (backup_details is not None and backup_details.status is not 'NORMAL'): try: backup_details = self.facility.backup.ex_get_backup_details_for_target(node.id) plogging.info("- in progress, found asset %s", backup_details.asset_id) except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue elif 'RETRYABLE_SYSTEM_ERROR' in str(feedback): time.sleep(10) continue elif 'NO_CHANGE' in str(feedback): plogging.info("- already there") elif 'RESOURCE_LOCKED' in str(feedback): plogging.info("- unable to start backup " "- node has been locked") else: plogging.info("- unable to start backup") plogging.error(str(feedback)) break target = self.facility.backup.ex_get_target_by_id(node.id) storage_policies = self.facility.backup.ex_list_available_storage_policies( target=target ) schedule_policies = self.facility.backup.ex_list_available_schedule_policies( target=target ) client_types = self.facility.backup.ex_list_available_client_types( target=target ) clients = backup.get('clients', [{'type': 'filesystem'}]) for client in clients: plogging.info("- adding backup client") client_type = client.get('type', 'filesystem').lower() storage_policy = client.get( 'storagePolicy', '14 Day Storage Policy').lower() schedule_policy = client.get( 'schedulePolicy', '12AM - 6AM').lower() trigger = client.get('trigger', 'ON_FAILURE') email = client.get('email', default_email) try: storage_policy = [x for x in storage_policies if x.name.lower() == storage_policy][0] except IndexError: raise ConfigurationError( "Could not find matching storage policy '%s'" % storage_policy) try: schedule_policy = [x for x in schedule_policies if x.name.lower() == schedule_policy][0] except IndexError: raise ConfigurationError( "Could not find matching schedule policy '%s'" % schedule_policy) if client_type in ['file', 'filesystem']: client = [x for x in client_types if x.is_file_system][0] else: client = [x for x in client_types if x.description.startswith(client_type)][0] self.facility.backup.ex_add_client_to_target( target=target, client_type=client, storage_policy=storage_policy, schedule_policy=schedule_policy, trigger=trigger, email=email ) return True
def shine_node(self, node, settings, container): """ prepares a node :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param settings: the fittings plan for this node :type settings: ``dict`` :param container: the container of this node :type container: :class:`plumbery.PlumberyInfrastructure` """ plogging.info("Preparing node '{}'".format(settings['name'])) if node is None: plogging.error("- not found") return timeout = 300 tick = 6 while node.extra['status'].action == 'START_SERVER': time.sleep(tick) node = self.nodes.get_node(node.name) timeout -= tick if timeout < 0: break if node.state != NodeState.RUNNING: plogging.error("- skipped - node is not running") return self.upgrade_vmware_tools(node) prepares = self._get_prepares(node, settings, container) if len(prepares) < 1: plogging.info('- nothing to do') self.report.append({node.name: { 'status': 'skipped - nothing to do' }}) return if len(node.public_ips) > 0: plogging.info("- node is reachable at '{}'".format( node.public_ips[0])) elif not self.beachheading: plogging.error('- node is unreachable') self.report.append({node.name: { 'status': 'unreachable' }}) return descriptions = [] steps = [] for item in prepares: descriptions.append(item['description']) steps.append(item['genius']) if self._apply_prepares(node, MultiStepDeployment(steps)): plogging.info('- rebooting') self.report.append({node.name: { 'status': 'completed', 'prepares': descriptions }}) else: self.report.append({node.name: { 'status': 'failed', 'prepares': descriptions }})
def main(args=None, engine=None): """ Runs plumbery from the command line :param args: arguments to be considered for this invocation :type args: a list of ``str`` :param engine: an instance of the plumbery engine :type engine: :class:`plumbery.PlumberEngine` Example:: $ python -m plumbery fittings.yaml build web In this example, plumbery loads fittings plan from ``fittings.yaml``, then it builds the blueprint named ``web``. If no blueprint is mentioned, then plumbery looks at all blueprint definitions in the fittings plan. In other terms, the following command builds the entire fittings plan, eventually across multiple facilities:: $ python -m plumbery fittings.yaml build Of course, plumbery can be invoked through the entire life cycle of your fittings:: $ python -m plumbery fittings.yaml build $ python -m plumbery fittings.yaml start $ python -m plumbery fittings.yaml polish ... nodes are up and running ... $ python -m plumbery fittings.yaml stop ... nodes have been stopped ... $ python -m plumbery fittings.yaml wipe ... nodes have been destroyed, but the infrastructure remains ... $ python -m plumbery fittings.yaml destroy ... every virtual resources has been removed ... To focus at a single location, put the character '@' followed by the id. For example, to build fittings only at 'NA12' you would type:: $ python -m plumbery fittings.yaml build @NA12 To focus on one blueprint just mention its name on the command line. For example, if fittings plan has a blueprint for nodes running Docker, then you may use following statements to bootstrap each node:: $ python -m plumbery fittings.yaml build docker $ python -m plumbery fittings.yaml start docker $ python -m plumbery fittings.yaml prepare docker ... Docker is up and running at multiple nodes ... If you create a new polisher and put it in the directory ``plumbery\polishers``, then it will become automatically available:: $ python -m plumbery fittings.yaml my_special_stuff To get some help, you can type:: $ python -m plumbery -h """ # part 1 - understand what the user wants if args is None: args = sys.argv[1:] try: args = parse_args(args) except Exception as feedback: plogging.error("Incorrect arguments. " "Maybe the following can help: python -m plumbery -h") if plogging.getEffectiveLevel() == logging.DEBUG: raise else: plogging.error("{}: {}".format(feedback.__class__.__name__, str(feedback))) sys.exit(2) # part 2 - get a valid and configured engine if engine is None: try: engine = PlumberyEngine(args.fittings, args.parameters) if args.safe: engine.safeMode = True except Exception as feedback: if plogging.getEffectiveLevel() == logging.DEBUG: plogging.error("Cannot read fittings plan from '{}'".format( args.fittings)) raise else: plogging.error("Cannot read fittings plan from '{}'" ", run with -d for debug".format(args.fittings)) plogging.error("{}: {}".format(feedback.__class__.__name__, str(feedback))) sys.exit(2) # part 3 - do the job try: engine.do(args.action, args.blueprints, args.facilities) plogging.info(engine.document_elapsed()) except Exception as feedback: if plogging.getEffectiveLevel() == logging.DEBUG: plogging.error("Unable to do '{}'".format(args.action)) raise else: plogging.error("Unable to do '{}', run with -d for debug".format( args.action)) plogging.error("{}: {}".format(feedback.__class__.__name__, str(feedback))) sys.exit(1)
def _apply_prepares(self, node, steps): """ Does the actual job over SSH :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param steps: the various steps of the preparing :type steps: :class:`libcloud.compute.deployment.MultiStepDeployment` :return: ``True`` if everything went fine, ``False`` otherwise :rtype: ``bool`` """ if node is None or node.state != NodeState.RUNNING: plogging.warning("- skipped - node is not running") return False # select the address to use if len(node.public_ips) > 0: target_ip = node.public_ips[0] elif node.extra['ipv6']: target_ip = node.extra['ipv6'] else: target_ip = node.private_ips[0] # guess location of user key path = os.path.expanduser('~/.ssh/id_rsa') # use libcloud to communicate with remote nodes session = SSHClient(hostname=target_ip, port=22, username=self.user, password=self.secret, key_files=path, timeout=9) try: session.connect() except Exception as feedback: plogging.error("Error: unable to prepare '{}' at '{}'!".format( node.name, target_ip)) plogging.error(str(feedback)) plogging.error("- failed") return False while True: try: if self.engine.safeMode: plogging.info( "- skipped - no ssh interaction in safe mode") else: node = steps.run(node, session) except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue plogging.error("Error: unable to prepare '{}' at '{}'!".format( node.name, target_ip)) plogging.error(str(feedback)) plogging.error("- failed") result = False else: result = True break try: session.close() except: pass return result
def set_node_disk(self, node, id, size, speed='standard'): """ Sets a virtual disk :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param id: the disk id, starting at 0 and growing :type id: ``int`` :param size: the disk size, expressed in Giga bytes :type size: ``int`` :param speed: storage type, either 'standard', 'highperformance' or 'economy' :type speed: ``str`` """ if size < 1: plogging.info("- minimum disk size is 1 GB") return if size > 1000: plogging.info("- disk size cannot exceed 1000 GB") return if speed not in ('standard', 'highperformance', 'economy'): plogging.info("- disk speed should be either 'standard' " "or 'highperformance' or 'economy'") return if 'disks' in node.extra: for disk in node.extra['disks']: if disk['scsiId'] == id: changed = False if disk['size'] > size: plogging.info("- disk shrinking could break the node") plogging.info( "- skipped - disk {} will not be reduced".format( id)) if disk['size'] < size: plogging.info("- expanding disk {} to {} GB".format( id, size)) self.change_node_disk_size(node, disk['id'], size) changed = True if disk['speed'].lower() != speed.lower(): plogging.info("- changing disk {} to '{}'".format( id, speed)) self.change_node_disk_speed(node, disk['id'], speed) changed = True if not changed: plogging.debug("- no change in disk {}".format(id)) return plogging.info("- adding {} GB '{}' disk".format(size, speed)) # if self.engine.safeMode: # plogging.info("- skipped - safe mode") # return while True: try: self.facility.region.ex_add_storage_to_node( node=node, amount=size, speed=speed.upper()) plogging.info("- in progress") except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue if 'Please try again later' in str(feedback): time.sleep(10) continue plogging.info("- unable to add disk {} GB '{}'".format( size, speed)) plogging.error(str(feedback)) break
def _apply_prepares(self, node, steps): """ Does the actual job over SSH :param node: the node to be polished :type node: :class:`libcloud.compute.base.Node` :param steps: the various steps of the preparing :type steps: ``list`` of ``dict`` :return: ``True`` if everything went fine, ``False`` otherwise :rtype: ``bool`` """ if node is None or node.state != NodeState.RUNNING: plogging.warning("- skipped - node is not running") return False # select the address to use if len(node.public_ips) > 0: target_ip = node.public_ips[0] elif node.extra['ipv6']: target_ip = node.extra['ipv6'] else: target_ip = node.private_ips[0] # use libcloud to communicate with remote nodes session = SSHClient(hostname=target_ip, port=22, username=self.user, password=self.secret, key_files=self.key_files, timeout=10) repeats = 0 while True: try: session.connect() break except Exception as feedback: repeats += 1 if repeats > 5: plogging.error("Error: can not connect to '{}'!".format( target_ip)) plogging.error("- failed to connect") return False plogging.debug(str(feedback)) plogging.debug("- connection {} failed, retrying".format(repeats)) time.sleep(10) continue while True: try: if self.engine.safeMode: plogging.info("- skipped - no ssh interaction in safe mode") else: for step in steps: plogging.info('- {}'.format(step['description'])) step['genius'].run(node, session) except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue plogging.error("Error: unable to prepare '{}' at '{}'!".format( node.name, target_ip)) plogging.error(str(feedback)) plogging.error("- failed") result = False else: result = True break try: session.close() except: pass return result
def attach_node_to_internet(self, node, ports=[]): """ Adds address translation for one node :param node: node that has to be reachable from the internet :type node: :class:`libcloud.common.Node` :param ports: the ports that have to be opened :type ports: a list of ``str`` """ plogging.info("Making node '{}' reachable from the internet" .format(node.name)) domain = self.container.get_network_domain( self.container.blueprint['domain']['name']) internal_ip = node.private_ips[0] external_ip = None for rule in self.region.ex_list_nat_rules(domain): if rule.internal_ip == internal_ip: external_ip = rule.external_ip plogging.info("- node is reachable at '{}'".format(external_ip)) if self.engine.safeMode: plogging.info("- skipped - safe mode") return if external_ip is None: external_ip = self.container._get_ipv4() if external_ip is None: plogging.info("- no more ipv4 address available -- assign more") return while True: try: self.region.ex_create_nat_rule( domain, internal_ip, external_ip) plogging.info("- node is reachable at '{}'".format( external_ip)) except Exception as feedback: if 'RESOURCE_BUSY' in str(feedback): time.sleep(10) continue elif 'RESOURCE_LOCKED' in str(feedback): plogging.info("- not now - locked") return else: plogging.info("- unable to add address translation") plogging.error(str(feedback)) break candidates = self.container._list_candidate_firewall_rules(node, ports) for rule in self.container._list_firewall_rules(): if rule.name in candidates.keys(): plogging.info("Creating firewall rule '{}'" .format(rule.name)) plogging.info("- already there") candidates = {k: candidates[k] for k in candidates if k != rule.name} for name, rule in candidates.items(): plogging.info("Creating firewall rule '{}'" .format(name)) if self.engine.safeMode: plogging.info("- skipped - safe mode") else: try: self.container._ex_create_firewall_rule( network_domain=domain, rule=rule, position='LAST') plogging.info("- in progress") except Exception as feedback: if 'NAME_NOT_UNIQUE' in str(feedback): plogging.info("- already there") else: plogging.info("- unable to create firewall rule") plogging.error(str(feedback))