def configure(self): # # Write config file # fp = open(CONFIG_FILE, 'w') try: print("# File generated by genconfig", file=fp) installer = self.kit_installer.config_manager.getInstaller() all_node_list = getNodeApi().getNodeList() node_list = [ node for node in all_node_list if node.getName() != installer and node.getState() != 'Deleted' and not node.getIsIdle() ] for node in node_list: print('{}'.format(node), file=fp) finally: fp.close() # # Write /etc/netgroup # fp = open('/etc/netgroup', 'w') dbm = DbManager() session = dbm.openSession() try: software_profiles = \ SoftwareProfilesDbHandler().getSoftwareProfileList(session) for software_profile in software_profiles: if not software_profile.nodes: continue software_profile_node_list = [ node.name for node in software_profile.nodes if node.state != 'Deleted' ] if not software_profile_node_list: continue fp.write( '{} {}\n\n'.format( software_profile.name, ' '.join(['({},,)'.format(node) for node in software_profile_node_list]) ) ) finally: fp.close() dbm.closeSession() fp.close()
def configure(self): fp = open(CONFIG_FILE, 'w') dbm = DbManager() session = dbm.openSession() try: print("# ", file=fp) print("# Dynamically generated by: genconfig (Do not edit!)", file=fp) print("#", file=fp) print("", file=fp) dnszone = self.kit_installer.get_db_parameter_value('DNSZone') for db_nic in session.query(Nics).order_by(Nics.ip).all(): if db_nic.node.state == 'Deleted': continue if not db_nic.ip: continue name = db_nic.node.name.split('.')[0] print('Host {}'.format(db_nic.ip), file=fp) print('\tStrictHostKeyChecking no', file=fp) print('Host {}.{}'.format(name, dnszone), file=fp) print('\tStrictHostKeyChecking no', file=fp) print('Host {}'.format(name), file=fp) print('\tStrictHostKeyChecking no', file=fp) print("", file=fp) print('Host *', file=fp) print('\t# ssh_config defaults', file=fp) print('\tGSSAPIAuthentication yes', file=fp) print('\tForwardX11Trusted yes', file=fp) print('\t# tortuga defaults', file=fp) print('\tNoHostAuthenticationForLocalhost yes', file=fp) print('\tStrictHostKeyChecking no', file=fp) finally: fp.close() dbm.closeSession()
def initDatabase(self) -> Tuple[Any, Session]: msg = _('Initializing database') self._logger.info(msg) print_('\n' + msg + '... ', end='') # This cannot be a global import since the database configuration # may be set in this script. from tortuga.db.dbManager import DbManager dbm = DbManager() # create database dbm.init_database() session = dbm.openSession() # Prime the database previously created as part of the bootstrap try: dbUtility.primeDb(session, self._settings) dbUtility.init_global_parameters(session, self._settings) print_(_('done')) session.commit() except Exception as exc: # pylint: disable=broad-except session.rollback() print_(_('failed.')) print_(_('Exception raised initializing database:') + ' {0}'.format(exc), file=sys.stderr) self._logger.debug('Done initializing database') return dbm, session
class AWSSpotdAppClass: """ App for checking on Spot Intance Requests and cleaning up requests that don't follow the proper lifecycle. """ def __init__(self, logger, polling_interval=SPOT_INSTANCE_POLLING_INTERVAL, max_register_duration=SPOT_INSTANCE_REGISTER_MAX_WAIT, dbm=None, one_time=False): self.logger = logger self.metadata_ws_api = MetadataWsApi() self.__one_time = one_time self.__polling_interval = polling_interval self.__max_register_duration = max_register_duration if dbm is None: self.__dbm = DbManager() else: self.__dbm = dbm self.__done = False self.__next_poll = 0 self.__bad_requests = [] self.__bad_request_lock = asyncio.Lock() def run(self): """ Main body of the application. Read, and optionally continue to read, known spot requests and compare against known instances. """ loop = asyncio.get_event_loop() queue = asyncio.Queue() max_tasks = 3 poller = None tasks = [] self.logger.debug('Creating %d worker tasks', max_tasks) try: # create worker coroutines tasks = [ asyncio.ensure_future(self.__worker(f'worker-{i}', queue)) for i in range(max_tasks) ] poller = asyncio.ensure_future(self.__poller(queue, tasks)) loop.run_until_complete(poller) except KeyboardInterrupt: pass except Exception as ex: # pylint: disable=broad-except self.logger.error('Error running event loop: %s', ex) finally: if not self.__done: self.__done = True if poller: loop.run_until_complete(poller) self.logger.debug('Cancelling worker tasks') for task in tasks: task.cancel() self.logger.debug('Closing asyncio loop') self.__dbm.closeSession() async def __poller(self, queue, tasks) -> NoReturn: resource_adapter = get_api('AWS') while not self.__done: if self.__next_poll < time.time(): self.logger.debug('Polling spot instance requests') results = self.metadata_ws_api.list() resource_adapter.session = self.__dbm.openSession() resource_adapter_cfgs = {} try: for result in results: if not result['key'].startswith('sir-'): # ignore any unrelated entries self.logger.debug('Ignoring metadata key [%s]', result['key']) continue with await self.__bad_request_lock: if result['key'] in self.__bad_requests: self.logger.warning( 'Invalid spot instance request [%s] will not be' ' queued', result['key']) continue # Parse the embedded value to get the corresponding # resource adapter configuration value = json.loads(result['value']) # Store the request ID in the value. This is used by the handlers later on. value['spot_instance_request_id'] = result['key'] resource_adapter_cfg = value[ 'resource_adapter_configuration'] # Cache resource adapter configurations. # First check if this one has been cached if not resource_adapter_cfg in resource_adapter_cfgs: self.logger.debug( 'Loading adapter configuration: [%s]', resource_adapter_cfg) # Get the the configuration for the spot instance request config = resource_adapter.get_config( resource_adapter_cfg) # Save the fields that we may need for other requests in this loop resource_adapter_cfgs[resource_adapter_cfg] = { 'cfg': config, 'ec2_conn': resource_adapter.getEC2Connection(config), 'max_register_duration': config.get('spot_provision_timeout', self.__max_register_duration), } # Update the record to queue to have the appropriate # cached data for the request. result['value'] = value result['ec2_conn'] = resource_adapter_cfgs[ resource_adapter_cfg]['ec2_conn'] result['max_register_duration'] = \ resource_adapter_cfgs[resource_adapter_cfg]['max_register_duration'] # enqueue spot instance request queue.put_nowait(result) except Exception as ex: # pylint: disable=broad-except self.logger.error( 'Unable to poll spot instance requests: %s', ex) finally: resource_adapter.session.close() if self.__one_time: await queue.join() self.__done = True break self.logger.debug('Sleeping for %ds', self.__polling_interval) self.__next_poll = time.time() + self.__polling_interval await asyncio.sleep(1) try: for task in tasks: await asyncio.wait_for(task, timeout=30.0) self.logger.debug('Exiting poller') except Exception as ex: # pylint: disable=broad-except self.logger.error('Unable to wait for worker tasks: %s', ex) async def __worker(self, name: str, queue: asyncio.Queue) -> NoReturn: self.logger.debug('Worker [%s] initializing...', name) while not self.__done: try: # Allow other pending co-routines to run # await asyncio.sleep(0.0) item = queue.get_nowait() except asyncio.QueueEmpty: await asyncio.sleep(0.01) continue session = self.__dbm.openSession() try: # Unpack the queued request sir_id = item['key'] spot_instance_request = item['value'] instance = item['instance'] ec2_conn = item['ec2_conn'] max_register_duration = item['max_register_duration'] node = None # Attempt to fetch the node matching the instance in the spot request. if instance: if 'id' in instance: try: node = self.__get_node_by_instance( session, instance['instance']) except Exception as ex: # pylint: disable=broad-except self.logger.debug('Unable to fetch node: %s', ex) self.logger.info( 'Worker [%s] processing spot instance request id [%s]', name, sir_id, ) with await self.__bad_request_lock: if sir_id in self.__bad_requests: self.logger.warning( 'Ignoring invalid spot instance request: [%s]', sir_id, ) continue try: await self.process_spot_instance_request( ec2_conn, session, node, spot_instance_request, max_register_duration, ) except Exception: # pylint: disable=broad-except self.logger.exception( 'Error processing spot instance request [%s]', spot_instance_request, ) finally: session.close() queue.task_done() self.logger.debug('Exiting worker') async def process_spot_instance_request( self, ec2_conn: EC2Connection, session: Session, instance: dict, spot_instance_request: dict, max_register_duration: float, ) -> None: """ :raises EC2ResponseError: """ sir_id = spot_instance_request.get('spot_instance_request_id') if sir_id is None: with await self.__bad_request_lock: self.__bad_requests.append(sir_id) return if instance and instance.state == 'Installed': self.logger.debug( 'Installed node [%s] already associated with spot instance' ' request [%s]', instance.name, sir_id) return try: result = ec2_conn.get_all_spot_instance_requests( request_ids=[sir_id], ) except boto.exception.EC2ResponseError as exc: if exc.status == 400 and \ exc.error_code in ( 'InvalidSpotInstanceRequestID.NotFound', ): spot_instance_request['status'] = 'notfound' raise create_time = dateutil.parser.isoparse(result[0].create_time) self.logger.debug( 'sir: [%s], state: [%s], status code: [%s], created at: [%s]', sir_id, result[0].state, result[0].status.code, create_time, ) jump_table = { 'active': self.__handle_active_spot_requests, 'open': self.__handle_open_spot_requests, 'closed': self.__handle_closed_spot_requests, 'cancelled': self.__handle_cancelled_spot_requests, 'failed': self.__handle_failed_spot_requests, } handler = jump_table.get(result[0].state) if handler is None: self.logger.error( 'Ignoring unknown spot instance request state: [%s]', result[0].state) return self.logger.debug('Calling handler for state: [%s]', result[0].state) await handler( result[0].status.code, sir_id, ec2_conn, result[0].instance_id, instance, spot_instance_request, create_time, session, max_register_duration, ) async def __handle_active_spot_requests( self, status_code, sir_id, ec2_conn, instance_id, instance, # pylint: disable=unused-argument spot_instance_request, create_time, session, max_register_duration # pylint: disable=unused-argument ): if status_code != 'fulfilled': return self.logger.debug('Waiting for node for spot instance' ' request [%s]', sir_id) await self.__fulfilled_request_handler( ec2_conn, session, instance_id, spot_instance_request, create_time, max_register_duration, ) async def __handle_open_spot_requests( self, status_code, sir_id, ec2_conn, # pylint: disable=unused-argument instance_id, instance, spot_instance_request, # pylint: disable=unused-argument create_time, session, max_register_duration # pylint: disable=unused-argument ): """Handle open spot instance requests""" if status_code in ('pending-fulfillment', 'price-too-low'): return if status_code not in ('capacity-oversubscribed', 'instance-terminated-by-price', 'instance-terminated-no-capacity', 'instance-terminated-capacity-oversubscribed', 'instance-terminated-launch-group-constraint'): # unknown status code self.logger.warning( 'Unrecognized open spot request status code: [%s]', status_code) return if status_code == 'capacity-oversubscribed': self.logger.info( 'spot instance request [%s] not fulfilled due to' ' oversubscription; request will remain open', sir_id, ) return async def __handle_closed_spot_requests( self, status_code, sir_id, ec2_conn, # pylint: disable=unused-argument instance_id, instance, # pylint: disable=unused-argument spot_instance_request, create_time, # pylint: disable=unused-argument session, max_register_duration # pylint: disable=unused-argument ): if status_code == 'marked-for-termination': self.logger.info( 'Instance [%s] marked for termination', instance_id, ) return if status_code == 'system-error': self.logger.warning( 'Reported AWS/EC2 system error for spot instance request id' ' [%s]', sir_id) return if status_code not in ( 'instance-terminated-by-user', 'instance-terminated-by-price', 'instance-terminated-no-capacity', 'instance-terminated-capacity-oversubscribed', 'instance-terminated-launch-group-constraint', ): # unknown status code self.logger.warning( 'Unrecognized closed spot request status code: [%s]', status_code) return # Instance is terminated. We can remove the request. self.logger.info( 'Deleting spot instance request id [%s] for terminated instance', sir_id) self.metadata_ws_api.deleteMetadata(filter_key=sir_id, ) async def __handle_cancelled_spot_requests(self, status_code, sir_id, ec2_conn, instance_id, instance, spot_instance_request, create_time, session, max_register_duration): if status_code == 'canceled-before-fulfillment': # Never had a instance so we must remove the request. self.logger.info( 'Deleting spot instance request id [%s]', sir_id, ) self.metadata_ws_api.deleteMetadata(filter_key=sir_id, ) return if status_code == 'request-canceled-and-instance-running': if instance is None: # Need to check if launch has instance registered. await self.__fulfilled_request_handler( ec2_conn, session, instance_id, spot_instance_request, create_time, max_register_duration, ) if status_code in ( 'instance-terminated-by-user', 'instance-terminated-capacity-oversubscribed', ): self.logger.info( 'Deleting spot instance request id [%s] for terminated instance [%s]', sir_id, instance_id) self.metadata_ws_api.deleteMetadata(filter_key=sir_id, ) async def __handle_failed_spot_requests( self, status_code, sir_id, ec2_conn, # pylint: disable=unused-argument instance_id, instance, spot_instance_request, # pylint: disable=unused-argument create_time, session, max_register_duration): # pylint: disable=unused-argument # This request is dead in the water; nothing more can happen return async def __fulfilled_request_handler( self, ec2_conn: EC2Connection, session: Session, instance_id: str, spot_instance_request: dict, create_time: datetime.datetime, max_register_duration: float, ): """Called when processing valid spot instance request""" sir_id = spot_instance_request.get('spot_instance_request_id') node = self.__get_node_by_instance(session, instance_id) if not node or node.state != 'Installed': waiting_seconds = time.time() - create_time.timestamp() if waiting_seconds > max_register_duration: self.logger.warn( 'Terminating instance [%s] as it failed to register in [%d] second(s)', instance_id, max_register_duration) ec2_conn.terminate_instances(instance_ids=[instance_id]) else: self.logger.info( 'Unable to find instance in database: [%s], instance will be terminated ' 'in [%d] second(s) if it fails to register.', instance_id, max_register_duration - waiting_seconds) return None result = self.__get_spot_instance_metadata(session, sir_id) if not result: self.logger.error( 'Unable to find matching spot instance request: %s', sir_id, ) return None self.logger.info( 'Matching spot instance request [%s] to instance id [%s]', sir_id, instance_id) node.instance.instance_metadata.append(result) session.commit() def __get_spot_instance_metadata( self, session: Session, sir_id: str) -> Optional[InstanceMetadata]: try: return session.query(InstanceMetadata).filter( InstanceMetadata.key == sir_id).one() # noqa except NoResultFound: pass return None def __get_node_by_instance(self, session: Session, instance_id: str) -> Optional[Node]: try: return session.query(InstanceMapping).filter( InstanceMapping.instance == instance_id # noqa ).one().node except NoResultFound: pass return None
class SetPrivateDnsZoneApp(TortugaCli): def __init__(self): super().__init__() self.dbm = DbManager() self.dns_conf = {} self.cfg = configparser.ConfigParser() self.cfgFileName = os.path.join(self._cm.getRoot(), 'config/base/dns-component.conf') def parseArgs(self, usage=None): self.addOption('--force', action='store_true', default='false', dest='bForce', help='Force update of domain name') self.addOption('zone', nargs='?') super().parseArgs(usage=usage) def _loadDNSConfig(self): self.cfg.read(self.cfgFileName) # Read/parse existing DNS settings if not self.cfg.has_section('dns'): self.cfg.add_section('dns') if self.cfg.has_option('dns', 'domain'): self.dns_conf['domain'] = self.cfg.get('dns', 'domain') if self.cfg.has_option('dns', 'type'): dns_type = self.cfg.get('dns', 'type') self.dns_conf['type'] = dns_type if not self.dns_conf['type'].lower() in ('named', 'dnsmasq'): # Invalid DNS type self.dns_conf['type'] = DEFAULT_DNS_TYPE else: # Default to 'named' self.dns_conf['type'] = DEFAULT_DNS_TYPE def _getOldDnsZone(self): """ Returns None if DNS zone previously undefined """ with self.dbm.session() as session: try: result = GlobalParametersDbHandler().getParameter( session, 'DNSZone') return result.value.lower() if result.value else None except ParameterNotFound: return None def _updateDatabase(self, dnsZone): with self.dbm.session() as session: try: dbValue = GlobalParametersDbHandler().getParameter( session, 'DNSZone') # Update existing value dbValue.value = dnsZone except NoResultFound: dbValue = GlobalParameter(name='DNSZone', value=dnsZone) session.append(dbValue) session.commit() def _updateDnsComponentConf(self, dnsZone): if not os.path.exists(self.cfgFileName): return shutil.copy(self.cfgFileName, self.cfgFileName + '.orig') self.cfg.set('dns', 'domain', dnsZone) with open(self.cfgFileName + '.modified', 'w') as fpOut: self.cfg.write(fpOut) shutil.copy(self.cfgFileName + '.modified', self.cfgFileName) os.unlink(self.cfgFileName + '.modified') def _updatePuppetExtData(self, dnsZone): \ # pylint: disable=no-self-use # Read existing 'DNSZone' setting from Hiera fn = ('/etc/puppetlabs/code/environments/production/data' '/tortuga-common.yaml') srcDataDict = {} with open(fn) as fpIn: srcDataDict = yaml.load(fpIn) srcDataDict['DNSZone'] = dnsZone # Write updated file with open(fn + '.new', 'w') as fpOut: fpOut.write( yaml.safe_dump(srcDataDict, default_flow_style=False, explicit_start=True)) # Move new file into place if not os.path.exists(fn + '.orig'): shutil.copyfile(fn, fn + '.orig') shutil.copyfile(fn + '.new', fn) os.unlink(fn + '.new') def isDnsComponentEnabled(self): session = self.dbm.openSession() dbInstallerNode = NodesDbHandler().getNode(session, self._cm.getInstaller()) bDnsComponentEnabled = False # Iterate over components in software profile looking for one # matching name 'dns' for dbComponent in dbInstallerNode.softwareprofile.components: if dbComponent.name == 'dns': bDnsComponentEnabled = True break self.dbm.closeSession() return bDnsComponentEnabled def runCommand(self): self.parseArgs() self._loadDNSConfig() # Remove remnants oldDnsZone = self._getOldDnsZone() if not self.getArgs().zone: # Output current DNS zone and exit print(f'{oldDnsZone}') sys.exit(0) dnsZone = self.getArgs().zone.lower() if oldDnsZone == dnsZone and not self.getArgs().bForce: # Nothing changed. Nothing to do! sys.exit(0) # Update database self._updateDatabase(dnsZone) # Update dns-component.conf self._updateDnsComponentConf(dnsZone) # Update Puppet extdata file self._updatePuppetExtData(dnsZone) if 'type' in self.dns_conf and self.dns_conf['type'] == 'named': oldZoneFileName = '/var/named/%s.zone' % (oldDnsZone.lower()) if os.path.exists(oldZoneFileName): # Attempt to remove old named configuration os.unlink(oldZoneFileName) bDnsComponentEnabled = self.isDnsComponentEnabled() # TODO: update (genconfig dns) if bDnsComponentEnabled: tortugaSubprocess.executeCommand('genconfig dns') # TODO: schedule puppet update tortugaSubprocess.executeCommand( 'schedule-update "DNS zone changed from \"%s\" to \"%s\""' % (oldDnsZone, dnsZone)) if bDnsComponentEnabled and 'type' in self.dns_conf: if self.dns_conf['type'] == 'named': cmd = 'service named restart' else: cmd = 'service dnsmasq restart' tortugaSubprocess.executeCommand(cmd)