Beispiel #1
0
    def configure(self):
        #
        # Write config file
        #
        fp = open(CONFIG_FILE, 'w')

        try:
            print("# File generated by genconfig", file=fp)

            installer = self.kit_installer.config_manager.getInstaller()

            all_node_list = getNodeApi().getNodeList()
            node_list = [
                node
                for node in all_node_list
                if node.getName() != installer
                   and node.getState() != 'Deleted'
                   and not node.getIsIdle()
            ]
            for node in node_list:
                print('{}'.format(node), file=fp)

        finally:
            fp.close()

        #
        # Write /etc/netgroup
        #
        fp = open('/etc/netgroup', 'w')
        dbm = DbManager()
        session = dbm.openSession()

        try:
            software_profiles = \
                SoftwareProfilesDbHandler().getSoftwareProfileList(session)
            for software_profile in software_profiles:
                if not software_profile.nodes:
                    continue

                software_profile_node_list = [
                    node.name
                    for node in software_profile.nodes
                    if node.state != 'Deleted'
                ]
                if not software_profile_node_list:
                    continue

                fp.write(
                    '{} {}\n\n'.format(
                        software_profile.name,
                        ' '.join(['({},,)'.format(node) for node in software_profile_node_list])
                    )
                )

        finally:
            fp.close()
            dbm.closeSession()
            fp.close()
Beispiel #2
0
    def configure(self):
        fp = open(CONFIG_FILE, 'w')
        dbm = DbManager()
        session = dbm.openSession()

        try:
            print("# ", file=fp)
            print("# Dynamically generated by: genconfig (Do not edit!)",
                  file=fp)
            print("#", file=fp)
            print("", file=fp)

            dnszone = self.kit_installer.get_db_parameter_value('DNSZone')

            for db_nic in session.query(Nics).order_by(Nics.ip).all():
                if db_nic.node.state == 'Deleted':
                    continue

                if not db_nic.ip:
                    continue

                name = db_nic.node.name.split('.')[0]

                print('Host {}'.format(db_nic.ip), file=fp)
                print('\tStrictHostKeyChecking no', file=fp)

                print('Host {}.{}'.format(name, dnszone), file=fp)
                print('\tStrictHostKeyChecking no', file=fp)

                print('Host {}'.format(name), file=fp)
                print('\tStrictHostKeyChecking no', file=fp)

                print("", file=fp)

            print('Host *', file=fp)
            print('\t# ssh_config defaults', file=fp)
            print('\tGSSAPIAuthentication yes', file=fp)
            print('\tForwardX11Trusted yes', file=fp)
            print('\t# tortuga defaults', file=fp)
            print('\tNoHostAuthenticationForLocalhost yes', file=fp)
            print('\tStrictHostKeyChecking no', file=fp)

        finally:
            fp.close()
            dbm.closeSession()
Beispiel #3
0
    def initDatabase(self) -> Tuple[Any, Session]:
        msg = _('Initializing database')

        self._logger.info(msg)

        print_('\n' + msg + '... ', end='')

        # This cannot be a global import since the database configuration
        # may be set in this script.
        from tortuga.db.dbManager import DbManager

        dbm = DbManager()

        # create database
        dbm.init_database()

        session = dbm.openSession()

        # Prime the database previously created as part of the bootstrap
        try:
            dbUtility.primeDb(session, self._settings)

            dbUtility.init_global_parameters(session, self._settings)

            print_(_('done'))

            session.commit()
        except Exception as exc:  # pylint: disable=broad-except
            session.rollback()

            print_(_('failed.'))

            print_(_('Exception raised initializing database:') +
                   ' {0}'.format(exc),
                   file=sys.stderr)

        self._logger.debug('Done initializing database')

        return dbm, session
class AWSSpotdAppClass:
    """
    App for checking on  Spot Intance Requests and cleaning up requests
    that don't follow the proper lifecycle.
    """
    def __init__(self,
                 logger,
                 polling_interval=SPOT_INSTANCE_POLLING_INTERVAL,
                 max_register_duration=SPOT_INSTANCE_REGISTER_MAX_WAIT,
                 dbm=None,
                 one_time=False):
        self.logger = logger
        self.metadata_ws_api = MetadataWsApi()
        self.__one_time = one_time
        self.__polling_interval = polling_interval
        self.__max_register_duration = max_register_duration

        if dbm is None:
            self.__dbm = DbManager()
        else:
            self.__dbm = dbm
        self.__done = False
        self.__next_poll = 0
        self.__bad_requests = []
        self.__bad_request_lock = asyncio.Lock()

    def run(self):
        """ Main body of the application.  Read, and optionally continue to read,
        known spot requests and compare against known instances.  """
        loop = asyncio.get_event_loop()

        queue = asyncio.Queue()

        max_tasks = 3

        poller = None
        tasks = []

        self.logger.debug('Creating %d worker tasks', max_tasks)

        try:
            # create worker coroutines
            tasks = [
                asyncio.ensure_future(self.__worker(f'worker-{i}', queue))
                for i in range(max_tasks)
            ]

            poller = asyncio.ensure_future(self.__poller(queue, tasks))

            loop.run_until_complete(poller)
        except KeyboardInterrupt:
            pass
        except Exception as ex:  # pylint: disable=broad-except
            self.logger.error('Error running event loop: %s', ex)
        finally:
            if not self.__done:
                self.__done = True
                if poller:
                    loop.run_until_complete(poller)

            self.logger.debug('Cancelling worker tasks')
            for task in tasks:
                task.cancel()

            self.logger.debug('Closing asyncio loop')

            self.__dbm.closeSession()

    async def __poller(self, queue, tasks) -> NoReturn:
        resource_adapter = get_api('AWS')
        while not self.__done:
            if self.__next_poll < time.time():
                self.logger.debug('Polling spot instance requests')

                results = self.metadata_ws_api.list()
                resource_adapter.session = self.__dbm.openSession()
                resource_adapter_cfgs = {}
                try:
                    for result in results:
                        if not result['key'].startswith('sir-'):
                            # ignore any unrelated entries
                            self.logger.debug('Ignoring metadata key [%s]',
                                              result['key'])
                            continue

                        with await self.__bad_request_lock:
                            if result['key'] in self.__bad_requests:
                                self.logger.warning(
                                    'Invalid spot instance request [%s] will not be'
                                    ' queued', result['key'])

                                continue

                        # Parse the embedded value to get the corresponding
                        # resource adapter configuration
                        value = json.loads(result['value'])
                        # Store the request ID in the value.  This is used by the handlers later on.
                        value['spot_instance_request_id'] = result['key']

                        resource_adapter_cfg = value[
                            'resource_adapter_configuration']

                        # Cache resource adapter configurations.
                        # First check if this one has been cached
                        if not resource_adapter_cfg in resource_adapter_cfgs:
                            self.logger.debug(
                                'Loading adapter configuration: [%s]',
                                resource_adapter_cfg)
                            # Get the the configuration for the spot instance request
                            config = resource_adapter.get_config(
                                resource_adapter_cfg)
                            # Save the fields that we may need for other requests in this loop
                            resource_adapter_cfgs[resource_adapter_cfg] = {
                                'cfg':
                                config,
                                'ec2_conn':
                                resource_adapter.getEC2Connection(config),
                                'max_register_duration':
                                config.get('spot_provision_timeout',
                                           self.__max_register_duration),
                            }

                        # Update the record to queue to have the appropriate
                        # cached data for the request.
                        result['value'] = value
                        result['ec2_conn'] = resource_adapter_cfgs[
                            resource_adapter_cfg]['ec2_conn']
                        result['max_register_duration'] = \
                            resource_adapter_cfgs[resource_adapter_cfg]['max_register_duration']

                        # enqueue spot instance request
                        queue.put_nowait(result)
                except Exception as ex:  # pylint: disable=broad-except
                    self.logger.error(
                        'Unable to poll spot instance requests: %s', ex)
                finally:
                    resource_adapter.session.close()

                if self.__one_time:
                    await queue.join()
                    self.__done = True
                    break
                self.logger.debug('Sleeping for %ds', self.__polling_interval)
                self.__next_poll = time.time() + self.__polling_interval
            await asyncio.sleep(1)
        try:
            for task in tasks:
                await asyncio.wait_for(task, timeout=30.0)
            self.logger.debug('Exiting poller')
        except Exception as ex:  # pylint: disable=broad-except
            self.logger.error('Unable to wait for worker tasks: %s', ex)

    async def __worker(self, name: str, queue: asyncio.Queue) -> NoReturn:

        self.logger.debug('Worker [%s] initializing...', name)

        while not self.__done:
            try:
                # Allow other pending co-routines to run
                # await asyncio.sleep(0.0)
                item = queue.get_nowait()
            except asyncio.QueueEmpty:
                await asyncio.sleep(0.01)
                continue
            session = self.__dbm.openSession()
            try:
                # Unpack the queued request
                sir_id = item['key']
                spot_instance_request = item['value']
                instance = item['instance']
                ec2_conn = item['ec2_conn']
                max_register_duration = item['max_register_duration']
                node = None

                # Attempt to fetch the node matching the instance in the spot request.
                if instance:
                    if 'id' in instance:
                        try:
                            node = self.__get_node_by_instance(
                                session, instance['instance'])
                        except Exception as ex:  # pylint: disable=broad-except
                            self.logger.debug('Unable to fetch node: %s', ex)

                self.logger.info(
                    'Worker [%s] processing spot instance request id [%s]',
                    name,
                    sir_id,
                )

                with await self.__bad_request_lock:
                    if sir_id in self.__bad_requests:
                        self.logger.warning(
                            'Ignoring invalid spot instance request: [%s]',
                            sir_id,
                        )

                        continue

                try:
                    await self.process_spot_instance_request(
                        ec2_conn,
                        session,
                        node,
                        spot_instance_request,
                        max_register_duration,
                    )
                except Exception:  # pylint: disable=broad-except
                    self.logger.exception(
                        'Error processing spot instance request [%s]',
                        spot_instance_request,
                    )
            finally:
                session.close()
                queue.task_done()
        self.logger.debug('Exiting worker')

    async def process_spot_instance_request(
        self,
        ec2_conn: EC2Connection,
        session: Session,
        instance: dict,
        spot_instance_request: dict,
        max_register_duration: float,
    ) -> None:
        """
        :raises EC2ResponseError:
        """

        sir_id = spot_instance_request.get('spot_instance_request_id')
        if sir_id is None:
            with await self.__bad_request_lock:
                self.__bad_requests.append(sir_id)

            return

        if instance and instance.state == 'Installed':
            self.logger.debug(
                'Installed node [%s] already associated with spot instance'
                ' request [%s]', instance.name, sir_id)
            return
        try:
            result = ec2_conn.get_all_spot_instance_requests(
                request_ids=[sir_id], )
        except boto.exception.EC2ResponseError as exc:
            if exc.status == 400 and \
                    exc.error_code in (
                            'InvalidSpotInstanceRequestID.NotFound',
                    ):
                spot_instance_request['status'] = 'notfound'

            raise

        create_time = dateutil.parser.isoparse(result[0].create_time)
        self.logger.debug(
            'sir: [%s], state: [%s], status code: [%s], created at: [%s]',
            sir_id,
            result[0].state,
            result[0].status.code,
            create_time,
        )

        jump_table = {
            'active': self.__handle_active_spot_requests,
            'open': self.__handle_open_spot_requests,
            'closed': self.__handle_closed_spot_requests,
            'cancelled': self.__handle_cancelled_spot_requests,
            'failed': self.__handle_failed_spot_requests,
        }

        handler = jump_table.get(result[0].state)
        if handler is None:
            self.logger.error(
                'Ignoring unknown spot instance request state: [%s]',
                result[0].state)

            return

        self.logger.debug('Calling handler for state: [%s]', result[0].state)
        await handler(
            result[0].status.code,
            sir_id,
            ec2_conn,
            result[0].instance_id,
            instance,
            spot_instance_request,
            create_time,
            session,
            max_register_duration,
        )

    async def __handle_active_spot_requests(
            self,
            status_code,
            sir_id,
            ec2_conn,
            instance_id,
            instance,  # pylint: disable=unused-argument
            spot_instance_request,
            create_time,
            session,
            max_register_duration  # pylint: disable=unused-argument
    ):
        if status_code != 'fulfilled':
            return

        self.logger.debug('Waiting for node for spot instance'
                          ' request [%s]', sir_id)

        await self.__fulfilled_request_handler(
            ec2_conn,
            session,
            instance_id,
            spot_instance_request,
            create_time,
            max_register_duration,
        )

    async def __handle_open_spot_requests(
            self,
            status_code,
            sir_id,
            ec2_conn,  # pylint: disable=unused-argument
            instance_id,
            instance,
            spot_instance_request,  # pylint: disable=unused-argument
            create_time,
            session,
            max_register_duration  # pylint: disable=unused-argument
    ):
        """Handle open spot instance requests"""
        if status_code in ('pending-fulfillment', 'price-too-low'):
            return

        if status_code not in ('capacity-oversubscribed',
                               'instance-terminated-by-price',
                               'instance-terminated-no-capacity',
                               'instance-terminated-capacity-oversubscribed',
                               'instance-terminated-launch-group-constraint'):
            # unknown status code
            self.logger.warning(
                'Unrecognized open spot request status code: [%s]',
                status_code)

            return

        if status_code == 'capacity-oversubscribed':
            self.logger.info(
                'spot instance request [%s] not fulfilled due to'
                ' oversubscription; request will remain open',
                sir_id,
            )

            return

    async def __handle_closed_spot_requests(
            self,
            status_code,
            sir_id,
            ec2_conn,  # pylint: disable=unused-argument
            instance_id,
            instance,  # pylint: disable=unused-argument
            spot_instance_request,
            create_time,  # pylint: disable=unused-argument
            session,
            max_register_duration  # pylint: disable=unused-argument
    ):
        if status_code == 'marked-for-termination':
            self.logger.info(
                'Instance [%s] marked for termination',
                instance_id,
            )

            return

        if status_code == 'system-error':
            self.logger.warning(
                'Reported AWS/EC2 system error for spot instance request id'
                ' [%s]', sir_id)

            return

        if status_code not in (
                'instance-terminated-by-user',
                'instance-terminated-by-price',
                'instance-terminated-no-capacity',
                'instance-terminated-capacity-oversubscribed',
                'instance-terminated-launch-group-constraint',
        ):
            # unknown status code
            self.logger.warning(
                'Unrecognized closed spot request status code: [%s]',
                status_code)

            return

        # Instance is terminated.  We can remove the request.
        self.logger.info(
            'Deleting spot instance request id [%s] for terminated instance',
            sir_id)
        self.metadata_ws_api.deleteMetadata(filter_key=sir_id, )

    async def __handle_cancelled_spot_requests(self, status_code, sir_id,
                                               ec2_conn, instance_id, instance,
                                               spot_instance_request,
                                               create_time, session,
                                               max_register_duration):
        if status_code == 'canceled-before-fulfillment':
            # Never had a instance so we must remove the request.
            self.logger.info(
                'Deleting spot instance request id [%s]',
                sir_id,
            )

            self.metadata_ws_api.deleteMetadata(filter_key=sir_id, )
            return

        if status_code == 'request-canceled-and-instance-running':
            if instance is None:
                # Need to check if launch has instance registered.
                await self.__fulfilled_request_handler(
                    ec2_conn,
                    session,
                    instance_id,
                    spot_instance_request,
                    create_time,
                    max_register_duration,
                )

        if status_code in (
                'instance-terminated-by-user',
                'instance-terminated-capacity-oversubscribed',
        ):
            self.logger.info(
                'Deleting spot instance request id [%s] for terminated instance [%s]',
                sir_id, instance_id)
            self.metadata_ws_api.deleteMetadata(filter_key=sir_id, )

    async def __handle_failed_spot_requests(
            self,
            status_code,
            sir_id,
            ec2_conn,  # pylint: disable=unused-argument
            instance_id,
            instance,
            spot_instance_request,  # pylint: disable=unused-argument
            create_time,
            session,
            max_register_duration):  # pylint: disable=unused-argument
        # This request is dead in the water; nothing more can happen
        return

    async def __fulfilled_request_handler(
        self,
        ec2_conn: EC2Connection,
        session: Session,
        instance_id: str,
        spot_instance_request: dict,
        create_time: datetime.datetime,
        max_register_duration: float,
    ):
        """Called when processing valid spot instance request"""

        sir_id = spot_instance_request.get('spot_instance_request_id')
        node = self.__get_node_by_instance(session, instance_id)
        if not node or node.state != 'Installed':
            waiting_seconds = time.time() - create_time.timestamp()

            if waiting_seconds > max_register_duration:
                self.logger.warn(
                    'Terminating instance [%s] as it failed to register in [%d] second(s)',
                    instance_id, max_register_duration)
                ec2_conn.terminate_instances(instance_ids=[instance_id])
            else:
                self.logger.info(
                    'Unable to find instance in database: [%s], instance will be terminated '
                    'in [%d] second(s) if it fails to register.', instance_id,
                    max_register_duration - waiting_seconds)

            return None

        result = self.__get_spot_instance_metadata(session, sir_id)
        if not result:
            self.logger.error(
                'Unable to find matching spot instance request: %s',
                sir_id,
            )

            return None

        self.logger.info(
            'Matching spot instance request [%s] to instance id [%s]', sir_id,
            instance_id)

        node.instance.instance_metadata.append(result)
        session.commit()

    def __get_spot_instance_metadata(
            self, session: Session, sir_id: str) -> Optional[InstanceMetadata]:
        try:
            return session.query(InstanceMetadata).filter(
                InstanceMetadata.key == sir_id).one()  # noqa
        except NoResultFound:
            pass

        return None

    def __get_node_by_instance(self, session: Session,
                               instance_id: str) -> Optional[Node]:
        try:
            return session.query(InstanceMapping).filter(
                InstanceMapping.instance == instance_id  # noqa
            ).one().node
        except NoResultFound:
            pass

        return None
Beispiel #5
0
class SetPrivateDnsZoneApp(TortugaCli):
    def __init__(self):
        super().__init__()

        self.dbm = DbManager()

        self.dns_conf = {}

        self.cfg = configparser.ConfigParser()

        self.cfgFileName = os.path.join(self._cm.getRoot(),
                                        'config/base/dns-component.conf')

    def parseArgs(self, usage=None):
        self.addOption('--force',
                       action='store_true',
                       default='false',
                       dest='bForce',
                       help='Force update of domain name')

        self.addOption('zone', nargs='?')

        super().parseArgs(usage=usage)

    def _loadDNSConfig(self):
        self.cfg.read(self.cfgFileName)

        # Read/parse existing DNS settings

        if not self.cfg.has_section('dns'):
            self.cfg.add_section('dns')

        if self.cfg.has_option('dns', 'domain'):
            self.dns_conf['domain'] = self.cfg.get('dns', 'domain')

        if self.cfg.has_option('dns', 'type'):
            dns_type = self.cfg.get('dns', 'type')

            self.dns_conf['type'] = dns_type

            if not self.dns_conf['type'].lower() in ('named', 'dnsmasq'):
                # Invalid DNS type
                self.dns_conf['type'] = DEFAULT_DNS_TYPE
        else:
            # Default to 'named'
            self.dns_conf['type'] = DEFAULT_DNS_TYPE

    def _getOldDnsZone(self):
        """
        Returns None if DNS zone previously undefined
        """

        with self.dbm.session() as session:
            try:
                result = GlobalParametersDbHandler().getParameter(
                    session, 'DNSZone')

                return result.value.lower() if result.value else None
            except ParameterNotFound:
                return None

    def _updateDatabase(self, dnsZone):
        with self.dbm.session() as session:
            try:
                dbValue = GlobalParametersDbHandler().getParameter(
                    session, 'DNSZone')

                # Update existing value
                dbValue.value = dnsZone
            except NoResultFound:
                dbValue = GlobalParameter(name='DNSZone', value=dnsZone)

                session.append(dbValue)

            session.commit()

    def _updateDnsComponentConf(self, dnsZone):
        if not os.path.exists(self.cfgFileName):
            return

        shutil.copy(self.cfgFileName, self.cfgFileName + '.orig')

        self.cfg.set('dns', 'domain', dnsZone)

        with open(self.cfgFileName + '.modified', 'w') as fpOut:
            self.cfg.write(fpOut)

        shutil.copy(self.cfgFileName + '.modified', self.cfgFileName)

        os.unlink(self.cfgFileName + '.modified')

    def _updatePuppetExtData(self, dnsZone):         \
            # pylint: disable=no-self-use

        # Read existing 'DNSZone' setting from Hiera

        fn = ('/etc/puppetlabs/code/environments/production/data'
              '/tortuga-common.yaml')

        srcDataDict = {}

        with open(fn) as fpIn:
            srcDataDict = yaml.load(fpIn)

        srcDataDict['DNSZone'] = dnsZone

        # Write updated file
        with open(fn + '.new', 'w') as fpOut:
            fpOut.write(
                yaml.safe_dump(srcDataDict,
                               default_flow_style=False,
                               explicit_start=True))

        # Move new file into place
        if not os.path.exists(fn + '.orig'):
            shutil.copyfile(fn, fn + '.orig')

        shutil.copyfile(fn + '.new', fn)

        os.unlink(fn + '.new')

    def isDnsComponentEnabled(self):
        session = self.dbm.openSession()

        dbInstallerNode = NodesDbHandler().getNode(session,
                                                   self._cm.getInstaller())

        bDnsComponentEnabled = False

        # Iterate over components in software profile looking for one
        # matching name 'dns'
        for dbComponent in dbInstallerNode.softwareprofile.components:
            if dbComponent.name == 'dns':
                bDnsComponentEnabled = True
                break

        self.dbm.closeSession()

        return bDnsComponentEnabled

    def runCommand(self):
        self.parseArgs()

        self._loadDNSConfig()

        # Remove remnants
        oldDnsZone = self._getOldDnsZone()

        if not self.getArgs().zone:
            # Output current DNS zone and exit
            print(f'{oldDnsZone}')

            sys.exit(0)

        dnsZone = self.getArgs().zone.lower()

        if oldDnsZone == dnsZone and not self.getArgs().bForce:
            # Nothing changed. Nothing to do!
            sys.exit(0)

        # Update database
        self._updateDatabase(dnsZone)

        # Update dns-component.conf
        self._updateDnsComponentConf(dnsZone)

        # Update Puppet extdata file
        self._updatePuppetExtData(dnsZone)

        if 'type' in self.dns_conf and self.dns_conf['type'] == 'named':
            oldZoneFileName = '/var/named/%s.zone' % (oldDnsZone.lower())

            if os.path.exists(oldZoneFileName):
                # Attempt to remove old named configuration
                os.unlink(oldZoneFileName)

        bDnsComponentEnabled = self.isDnsComponentEnabled()

        # TODO: update (genconfig dns)
        if bDnsComponentEnabled:
            tortugaSubprocess.executeCommand('genconfig dns')

        # TODO: schedule puppet update
        tortugaSubprocess.executeCommand(
            'schedule-update "DNS zone changed from \"%s\" to \"%s\""' %
            (oldDnsZone, dnsZone))

        if bDnsComponentEnabled and 'type' in self.dns_conf:
            if self.dns_conf['type'] == 'named':
                cmd = 'service named restart'
            else:
                cmd = 'service dnsmasq restart'

            tortugaSubprocess.executeCommand(cmd)