def create_apps(zkclient, app_id, app, count): """Schedules new apps.""" instance_ids = [] acl = zkutils.make_role_acl('servers', 'rwcd') for _idx in range(0, count): node_path = zkutils.put(zkclient, _app_node(app_id, existing=False), app, sequence=True, acl=[acl]) instance_id = os.path.basename(node_path) # Create task for the app, and put it in pending state. # TODO: probably need to create PendingEvent and use to_data method. task_node = z.path.trace( instance_id, '{time},{hostname},pending,{data}'.format( time=time.time(), hostname=sysinfo.hostname(), data='created')) try: zkclient.create(task_node, b'', acl=[_SERVERS_ACL], makepath=True) except kazoo.client.NodeExistsError: pass instance_ids.append(instance_id) return instance_ids
def create_zk( ec2_conn, ipa_client, master, subnet_id=None, ip_address=None, instance_type=None): """ Create new Zookeeper """ sts_conn = awscontext.GLOBAL.sts ipa_domain = awscontext.GLOBAL.ipa_domain admin_cell = admin.Cell(context.GLOBAL.ldap.conn) cell = admin_cell.get(context.GLOBAL.cell) data = cell['data'] image_id = data['image'] if not image_id.startswith('ami-'): account = sts_conn.get_caller_identity().get('Account') image_id = ec2client.get_image( ec2_conn, owners=[account], name=image_id )['ImageId'] # FIXME; subnet not unique among ZK, not AZ aware if not subnet_id: subnet_id = random.choice(data['subnets']) shortname = master['hostname'].replace('.' + context.GLOBAL.dns_domain, '') if not instance_type: instance_type = 'm5.large' # Instance vars instance_vars = { 'treadmill_cell': context.GLOBAL.cell, 'treadmill_ldap': ','.join(context.GLOBAL.ldap.url), 'treadmill_ldap_suffix': context.GLOBAL.ldap_suffix, 'treadmill_dns_domain': context.GLOBAL.dns_domain, 'treadmill_isa': 'zookeeper', 'treadmill_profile': 'aws', 'treadmill_krb_realm': krb5.get_host_realm(sysinfo.hostname())[0], 'treadmill_zookeeper_myid': str(master['idx']), } # Build user-data and start new instance create_host(ec2_conn=ec2_conn, ipa_client=ipa_client, image_id=image_id, count=1, domain=ipa_domain, secgroup_ids=data['secgroup'], instance_type=instance_type, subnets=[subnet_id], disk=30, instance_vars=instance_vars, role='zookeeper', hostgroups=['zookeepers'], hostname=shortname, ip_address=ip_address) return master['hostname']
def version_monitor(): """Runs node version monitor.""" _LOGGER.info('Initializing version monitor') zkclient = context.GLOBAL.zk.conn zkclient.add_listener(zkutils.exit_on_lost) while (not zkclient.exists(z.VERSION) or not zkclient.exists(z.VERSION_HISTORY)): _LOGGER.warning('namespace not ready.') time.sleep(30) hostname = sysinfo.hostname() distributions = { dist.project_name: dist.version for dist in iter(pkg_resources.working_set) if dist.project_name.startswith(_TREADMILL_DIST_PREFIX) } version = {'distributions': distributions, 'since': int(time.time())} for name in plugin_manager.names('treadmill.version_monitor'): plugin = plugin_manager.load('treadmill.version_monitor', name) version.update(plugin()) _save_version(zkclient, hostname, version) subproc.safe_exec(['sleep', 'inf'])
def _create_environ(app): """Creates environ object. """ appenv = {envvar.name: envvar.value for envvar in app.environ} appenv.update({ 'TREADMILL_CPU': app.cpu, 'TREADMILL_DISK': app.disk, 'TREADMILL_MEMORY': app.memory, 'TREADMILL_CELL': app.cell, 'TREADMILL_APP': app.app, 'TREADMILL_INSTANCEID': app.task, 'TREADMILL_IDENTITY': app.identity, 'TREADMILL_IDENTITY_GROUP': app.identity_group, 'TREADMILL_PROID': app.proid, 'TREADMILL_ENV': app.environment, 'TREADMILL_HOSTNAME': sysinfo.hostname() }) for endpoint in app.endpoints: envname = 'TREADMILL_ENDPOINT_{0}'.format(endpoint.name.upper()) # real_port is not available at this time on Windows, docker will # randomly pick one for us, we will query it later if hasattr(endpoint, 'real_port'): appenv[envname] = str(endpoint.real_port) appenv['TREADMILL_EPHEMERAL_TCP_PORTS'] = ' '.join( [str(port) for port in app.ephemeral_ports.tcp]) appenv['TREADMILL_EPHEMERAL_UDP_PORTS'] = ' '.join( [str(port) for port in app.ephemeral_ports.udp]) return appenv
def _configure_locker(tkt_spool_dir, scandir, cell, celluser): """Configure ticket forwarding service.""" if os.path.exists(os.path.join(scandir, cell)): return _LOGGER.info('Configuring ticket locker: %s/%s', scandir, cell) name = cell realms = krb5.get_host_realm(sysinfo.hostname()) krb5ccname = 'FILE:{tkt_spool_dir}/{celluser}@{realm}'.format( tkt_spool_dir=tkt_spool_dir, celluser=celluser, realm=realms[0], ) supervisor.create_service( scandir, name=name, app_run_script=( '{treadmill}/bin/treadmill sproc ' 'tickets locker --tkt-spool-dir {tkt_spool_dir}'.format( treadmill=subproc.resolve('treadmill'), tkt_spool_dir=tkt_spool_dir)), userid='root', environ_dir=os.path.join(scandir, name, 'env'), environ={ 'KRB5CCNAME': krb5ccname, 'TREADMILL_CELL': cell, }, downed=False, trace=None, monitor_policy=None)
def zookeeper(ctx, run, master_id, data_dir): """Installs Treadmill master.""" ctx.obj['PARAMS']['zookeeper'] = context.GLOBAL.zk.url ctx.obj['PARAMS']['ldap'] = context.GLOBAL.ldap.url ctx.obj['PARAMS']['master_id'] = master_id realms = krb5.get_host_realm(sysinfo.hostname()) if realms: ctx.obj['PARAMS']['krb_realm'] = realms[0] if data_dir: ctx.obj['PARAMS']['data_dir'] = data_dir dst_dir = ctx.obj['PARAMS']['dir'] profile = ctx.obj['PARAMS'].get('profile') for master in ctx.obj['PARAMS']['masters']: # pylint: disable=E1136 if int(master['idx']) == int(master_id): ctx.obj['PARAMS'].update({'me': master}) run_sh = None if run: run_sh = os.path.join(dst_dir, 'treadmill', 'bin', 'run.sh') bootstrap.install( 'zookeeper', dst_dir, ctx.obj['PARAMS'], run=run_sh, profile=profile, )
def test_hostname(self): """Test getting hostname of the server. """ self.assertEqual( sysinfo.hostname(), 'foo.bar' )
def test_hostname_env(self): """Test getting hostname of the server (TREADMILL_HOSTNAME is set). """ self.assertEqual( sysinfo.hostname(), 'foo.bar' )
def create_endpoint_file(approot, port, appname, endpoint): """Create and link local endpoint file""" hostport = '%s:%s' % (sysinfo.hostname(), port) zkclinet = context.GLOBAL.zk.conn endpoint_proid_path = z.path.endpoint_proid(appname) acl = zkclinet.make_servers_acl() _LOGGER.info('Ensuring %s exists with ACL %r', endpoint_proid_path, acl) zkutils.ensure_exists(zkclinet, endpoint_proid_path, acl=[acl]) endpoint_path = z.path.endpoint(appname, 'tcp', endpoint) _LOGGER.info('Registering %s %s', endpoint_path, hostport) # Need to delete/create endpoints for the disovery to pick it up in # case of master restart. zkutils.ensure_deleted(zkclinet, endpoint_path) time.sleep(5) zkutils.put(zkclinet, endpoint_path, hostport) tm_env = appenv.AppEnvironment(approot) endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir) endpoints_mgr.unlink_all(appname=appname, endpoint=endpoint, proto='tcp') endpoints_mgr.create_spec( appname=appname, endpoint=endpoint, proto='tcp', real_port=port, pid=os.getpid(), port=port, owner='/proc/{}'.format(os.getpid()), )
def server(register, port, auth, modules, title, cors_origin): """Runs nodeinfo server.""" if port == 0: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(('0.0.0.0', 0)) port = sock.getsockname()[1] sock.close() hostname = sysinfo.hostname() hostport = '%s:%s' % (hostname, port) if register: zkclient = context.GLOBAL.zk.conn zkclient.add_listener(zkutils.exit_on_lost) appname = 'root.%s#%010d' % (hostname, os.getpid()) path = z.path.endpoint(appname, 'tcp', 'nodeinfo') _LOGGER.info('register endpoint: %s %s', path, hostport) zkutils.create(zkclient, path, hostport, acl=[_SERVERS_ACL], ephemeral=True) _LOGGER.info('Starting nodeinfo server on port: %s', port) utils.drop_privileges() api_paths = [] if modules: api_paths = api.init(modules, title.replace('_', ' '), cors_origin) rest_server = rest.TcpRestServer(port, auth_type=auth, protect=api_paths) rest_server.run()
def make_lock(zkclient, path): """Make lock.""" _LOGGER.debug('Creating lock on: %s', path) zkclient.ensure_path(path) zkclient.add_listener(exit_on_disconnect) me = '%s.%d' % (sysinfo.hostname(), os.getpid()) return zkclient.Lock(path, me)
def __init__(self, endpoints_dir, zkclient, instance): self.endpoints_dir = endpoints_dir self.zkclient = zkclient self.up_to_date = True self.state = set() self.hostname = sysinfo.hostname() self.node_acl = self.zkclient.make_host_acl(self.hostname, 'rwcd') self.instance = instance
def __init__(self, zkclient, manifest, hostname=None, appname=None): self.zkclient = zkclient self.manifest = manifest self.hostname = hostname if hostname else sysinfo.hostname() if appname: self.appname = appname else: self.appname = self.manifest.get('name')
def __init__(self, endpoints_dir, zkclient, scan_interval, instance=None): self.endpoints_dir = endpoints_dir self.zkclient = zkclient self.scan_interval = scan_interval self.hostname = sysinfo.hostname() self.state = collections.defaultdict(dict) self.node_acl = self.zkclient.make_host_acl(self.hostname, 'rwcd') self.instance = instance
def root(): """Root handler.""" mem = sysinfo.mem_info() return flask.jsonify({ 'hostname': sysinfo.hostname(), 'cpu': sysinfo.cpu_count(), 'memory': mem.total, })
def __init__(self, krb5keytab_servers, keytab_dir): self.hostname = sysinfo.hostname() self.keytab_dir = keytab_dir self.domain = awscontext.GLOBAL.ipa_domain self.krb5keytab_servers = krb5keytab_servers super(__class__, self).__init__()
def __init__(self, zkclient, tkt_spool_dir, trusted=None): self.zkclient = zkclient self.tkt_spool_dir = tkt_spool_dir self.zkclient.add_listener(zkutils.exit_on_lost) self.hostname = sysinfo.hostname() self.trusted = trusted if not self.trusted: self.trusted = {}
def server(approot, register, port, auth, modules, title, cors_origin): """Runs nodeinfo server.""" if port == 0: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(('0.0.0.0', 0)) port = sock.getsockname()[1] sock.close() hostname = sysinfo.hostname() hostport = '%s:%s' % (hostname, port) if register: zkclient = context.GLOBAL.zk.conn zkclient.add_listener(zkutils.exit_on_lost) appname = 'root.%s#%010d' % (hostname, os.getpid()) app_pattern = 'root.%s#*' % (hostname) path = z.path.endpoint(appname, 'tcp', 'nodeinfo') _LOGGER.info('register endpoint: %s %s', path, hostport) zkutils.create(zkclient, path, hostport, acl=[zkclient.make_servers_acl()], ephemeral=True) # TODO: remove "legacy" endpoint registration once conversion is # complete. tm_env = appenv.AppEnvironment(approot) # TODO: need to figure out how to handle windows. assert os.name != 'nt' endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir) endpoints_mgr.unlink_all( app_pattern, endpoint='nodeinfo', proto='tcp' ) endpoints_mgr.create_spec( appname=appname, endpoint='nodeinfo', proto='tcp', real_port=port, pid=os.getpid(), port=port, owner='/proc/{}'.format(os.getpid()), ) _LOGGER.info('Starting nodeinfo server on port: %s', port) utils.drop_privileges() api_paths = [] if modules: api_modules = {module: None for module in modules} api_paths = api.init( api_modules, title.replace('_', ' '), cors_origin ) rest_server = rest.TcpRestServer(port, auth_type=auth, protect=api_paths) rest_server.run()
def __init__(self, manifest, container_dir, appevents_dir, hostname=None): self.manifest = manifest self.container_dir = container_dir self.services_dir = os.path.join(container_dir, 'services') self.appevents_dir = appevents_dir self.hostname = hostname if hostname else sysinfo.hostname() self.appname = self.manifest['name'] self.uniqueid = self.manifest['uniqueid'] self.services = self._services()
def make_self_acl(self, perm): """Constucts acl for the current user. If the user is root, use host principal. """ if utils.is_root(): return self.make_host_acl(sysinfo.hostname(), perm) user = krb5.get_principal() return self.make_user_acl(user, perm)
def cleanup(expiration, interval): """Cleans up old tasks.""" context.GLOBAL.zk.conn.ensure_path('/task-cleanup-election') me = '%s' % (sysinfo.hostname()) lock = context.GLOBAL.zk.conn.Lock('/task-cleanup-election', me) _LOGGER.info('Waiting for leader lock.') with lock: while True: zk.cleanup(context.GLOBAL.zk.conn, expiration) _LOGGER.info('Finished cleanup, sleep %s sec', interval) time.sleep(interval)
def make_self_acl(perm): """Constucts acl for the current user. If the user is root, use host/<hostname> principal. """ assert _is_valid_perm(perm) if userutil.is_root(): return make_host_acl(sysinfo.hostname(), perm) user = userutil.get_current_username() return make_user_acl(user, perm)
def top(no_lock): """Sync LDAP data with Zookeeper data.""" context.GLOBAL.zk.conn.ensure_path('/appmonitor-election') me = '%s.%d' % (sysinfo.hostname(), os.getpid()) lock = context.GLOBAL.zk.conn.Lock('/appmonitor-election', me) if not no_lock: _LOGGER.info('Waiting for leader lock.') with lock: _run_sync() else: _LOGGER.info('Running without lock.') _run_sync()
def _register_endpoint(zkclient, port): """Register policy server endpoint in Zookeeper.""" hostname = sysinfo.hostname() zkclient.ensure_path(z.path.warpgate()) node_path = z.path.warpgate('%s:%s' % (hostname, port)) _LOGGER.info('registering locker: %s', node_path) if zkclient.exists(node_path): _LOGGER.info('removing previous node %s', node_path) zkutils.ensure_deleted(zkclient, node_path) zkutils.put(zkclient, node_path, {}, acl=None, ephemeral=True)
def register_endpoint(self, port): """Register ticket locker endpoint in Zookeeper.""" hostname = sysinfo.hostname() self.zkclient.ensure_path(z.TICKET_LOCKER) node_path = z.path.ticket_locker('%s:%s' % (hostname, port)) _LOGGER.info('registering locker: %s', node_path) if self.zkclient.exists(node_path): _LOGGER.info('removing previous node %s', node_path) zkutils.ensure_deleted(self.zkclient, node_path) zkutils.put(self.zkclient, node_path, {}, acl=None, ephemeral=True)
def run(interval): hostname = sysinfo.hostname() dns_domain = context.GLOBAL.dns_domain while True: _LOGGER.info('Checking host lookups: %s, %s', hostname, dns_domain) try: if not _check(hostname, dns_domain): _LOGGER.error('Check failed - exiting.') return except Exception as err: # pylint: disable=broad-except _LOGGER.exception('Error while checking host lookups: %r', err) time.sleep(interval)
def warpgate_policy_server(port, tun_dev, tun_addr, tun_cidrs, policies_dir, state_dir): """Run warpgate policy server.""" myhostname = sysinfo.hostname() policy_server.run_server( admin_address=myhostname, admin_port=port, tun_devname=tun_dev, tun_address=(tun_addr if tun_addr else socket.gethostbyname(myhostname)), tun_cidrs=tun_cidrs, policies_dir=policies_dir, state_dir=state_dir)
def sync_relations(spool_dir, database, query_proid_func): """Sync VIP host keytab/proid relation if it does not exist. :param spool_dir: Path to keep keytabs fetched from keytab locker. :param database: Path to SQLite3 db file which stores VIP keytab/proid relationships. :param query_proid_func: Function object with signature `func(ktname: str) -> str`. """ conn = sqlite3.connect(database) cur = conn.cursor() try: keytab2proid = dict( cur.execute('SELECT keytab, proid FROM %s ' % _TABLE)) except sqlite3.OperationalError as err: # table may not exist yet, try sync in the next execution _LOGGER.warning('wait for keytab locker starting.') conn.close() return hostname = sysinfo.hostname() missed = set() for ktname in glob.glob(os.path.join(spool_dir, '*')): ktname = os.path.basename(ktname) # ignore local host keytab if ktname not in keytab2proid and _keytab2hostname(ktname) != hostname: missed.add(ktname) _LOGGER.debug('keytabs without proid: %r', sorted(missed)) if missed: values = [] for ktname in missed: proid = query_proid_func(ktname) _LOGGER.debug('keytab %s matches proid %s', ktname, proid) values.append((ktname, proid)) cur.executemany( """ INSERT INTO %s (keytab, proid) VALUES (?, ?) """ % _TABLE, values, ) conn.commit() conn.close()
def krb5keytab(krb5keytab_server, principal, keytab, owner): """krb5keytab client""" username = pwd.getpwuid(os.getuid())[0] hostname = sysinfo.hostname() treadmill.logging.set_log_level(logging.INFO) if not principal: principal = '{}/{}'.format(username, hostname) if not owner: owner = username if not keytab: keytab = os.path.join(_DEFAULT_KEYTAB_DIR, owner) if not krb5keytab_server: krb5keytab_server = [] domain = awscontext.GLOBAL.ipa_domain try: srvrecs = dns.resolver.query( '_krb5keytab._tcp.{}'.format(domain), 'SRV' ) except dns.resolver.NXDOMAIN: srvrecs = [] for result in srvrecs: _, _, port, server = result.to_text().split() krb5keytab_server.append('{}:{}'.format(server, port)) if not krb5keytab_server: treadmill.cli.bad_exit( 'Configuration/usage error: ' '--krb5keytab-server not specified/DNS not configured' ' - exiting.' ) _LOGGER.info('Principal : %s', principal) _LOGGER.info('Keytab : %s', keytab) _LOGGER.info('Owner : %s', owner) kt_entries = None for endpoint in krb5keytab_server: _LOGGER.info('Connecting to %s', endpoint) server, port = endpoint.split(':') kt_entries = _request_keytab(server, int(port), principal) if kt_entries: _write_keytab(kt_entries, keytab, owner) return
def configure(self, manifest): _LOGGER.info('Configuring krb5keytab.') unique_name = appcfg.manifest_unique_name(manifest) appdir = os.path.join(self._tm_env.apps_dir, unique_name, 'data', 'root') krb5keytab_svc = { 'name': 'krb5keytab', 'restart': { 'limit': 5, 'interval': 60, }, 'proid': 'root', 'root': True, 'command': ('{treadmill}/bin/treadmill --debug admin krb5keytab' ' --owner {user}' ' --principal {user}/{hostname}' ' --keytab {appdir}/var/spool/keytabs/{user}' ' --cachedir /tmp' ' --lockdir /tmp' '; exec sleep inf').format( treadmill=subproc.resolve('treadmill'), user=manifest['proid'], hostname=sysinfo.hostname(), appdir=appdir, ), 'environ': [{ 'name': 'KRB5CCNAME', 'value': os.path.expandvars('FILE:${TREADMILL_HOST_TICKET}'), }], 'config': None, 'downed': False, 'trace': False, 'logger': 's6.logger.run', } manifest['system_services'].append(krb5keytab_svc)