def delete_bucket(zkclient, bucket_id): """Deletes bucket definition from Zoookeeper.""" zkutils.ensure_deleted(zkclient, z.path.bucket(bucket_id))
def top(exit_on_fail, zkid, approot): """Run treadmill init process.""" _LOGGER.info('Initializing Treadmill: %s', approot) tm_env = appenv.AppEnvironment(approot) zkclient = zkutils.connect(context.GLOBAL.zk.url, idpath=zkid, listener=_exit_clear_watchdog_on_lost) utils.report_ready() while not zkclient.exists(z.SERVER_PRESENCE): _LOGGER.warn('namespace not ready.') time.sleep(30) hostname = sysinfo.hostname() zk_blackout_path = z.path.blackedout_server(hostname) zk_presence_path = z.path.server_presence(hostname) zk_server_path = z.path.server(hostname) while not zkclient.exists(zk_server_path): _LOGGER.warn('server %s not defined in the cell.', hostname) time.sleep(30) _LOGGER.info('Checking blackout list.') blacklisted = bool(zkclient.exists(zk_blackout_path)) if not blacklisted: # Node startup. _node_start(tm_env, zkclient, hostname, zk_server_path, zk_presence_path) # Cleanup the watchdog directory tm_env.watchdogs.initialize() _init_network() _LOGGER.info('Ready.') down_reason = _main_loop(tm_env, zkclient, zk_presence_path) if down_reason is not None: _LOGGER.warning('Shutting down: %s', down_reason) # Blackout the server. zkutils.ensure_exists( zkclient, zk_blackout_path, acl=[zkutils.make_host_acl(hostname, 'rwcda')], data=down_reason) else: # Node was already blacked out. _LOGGER.warning('Shutting down blacked out node.') # This is the shutdown phase. # Delete the node zkutils.ensure_deleted(zkclient, zk_presence_path) zkclient.remove_listener(_exit_clear_watchdog_on_lost) zkclient.stop() zkclient.close() _cleanup_network() # to ternminate all the running apps _blackout_terminate(tm_env) if exit_on_fail: utils.sys_exit(-1) else: # Sit forever in a broken state while True: time.sleep(1000000)
def accept(tkt_spool_dir, approot, port, appname, endpoint, use_v2, keytab): """Run ticket locker acceptor.""" if keytab: _construct_keytab(keytab) if port == 0: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(('0.0.0.0', 0)) port = sock.getsockname()[1] sock.close() hostname = sysinfo.hostname() hostport = '%s:%s' % (hostname, port) endpoint_proid_path = z.path.endpoint_proid(appname) acl = context.GLOBAL.zk.conn.make_servers_acl() _LOGGER.info( 'Ensuring %s exists with ACL %r', endpoint_proid_path, acl ) zkutils.ensure_exists( context.GLOBAL.zk.conn, endpoint_proid_path, acl=[acl] ) endpoint_path = z.path.endpoint(appname, 'tcp', endpoint) _LOGGER.info('Registering %s %s', endpoint_path, hostport) # Need to delete/create endpoints for the disovery to pick it up in # case of master restart. # # Unlile typical endpoint, we cannot make the node ephemeral as we # exec into tkt-recv. zkutils.ensure_deleted(context.GLOBAL.zk.conn, endpoint_path) time.sleep(5) zkutils.put(context.GLOBAL.zk.conn, endpoint_path, hostport) context.GLOBAL.zk.conn.stop() tm_env = appenv.AppEnvironment(approot) endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir) endpoints_mgr.unlink_all( appname=appname, endpoint=endpoint, proto='tcp' ) endpoints_mgr.create_spec( appname=appname, endpoint=endpoint, proto='tcp', real_port=port, pid=os.getpid(), port=port, owner='/proc/{}'.format(os.getpid()), ) # Exec into tickets acceptor. If race condition will not allow it to # bind to the provided port, it will exit and registration will # happen again. if use_v2: subproc.safe_exec(['tkt_recv_v2', '-p{}'.format(port), '-d{}'.format(tkt_spool_dir)]) else: subproc.safe_exec(['tkt_recv', 'tcp://*:{}'.format(port), tkt_spool_dir])
def _clear_server_blackout(zkclient, server): """Clear server blackout.""" path = z.path.blackedout_server(server) zkutils.ensure_deleted(zkclient, path)
def _delete_all(zkclient, to_be_deleted, recursive=True): """Delete a list of paths.""" for path in to_be_deleted: _LOGGER.info('Removing: %s (cleanup)', path) zkutils.ensure_deleted(zkclient, path, recursive)
def delete_identity_group(zkclient, ident_group_id): """Delete identity group.""" node = z.path.identity_group(ident_group_id) zkutils.ensure_deleted(zkclient, node) create_event(zkclient, 0, 'identity_groups', [ident_group_id])
def unregister_server(zkclient, hostname): """Unregister server.""" _LOGGER.info('Unregistering server %s', hostname) server_presence_path = find_server(zkclient, hostname) if server_presence_path: zkutils.ensure_deleted(zkclient, server_presence_path)
def cell_remove_bucket(zkclient, bucket_id): """Remove bucket from the cell.""" if zkclient.exists(z.path.cell(bucket_id)): zkutils.ensure_deleted(zkclient, z.path.cell(bucket_id)) create_event(zkclient, 0, 'cell', None)
def delete_appmonitor(zkclient, monitor_id): """Deletes app monitor.""" zkutils.ensure_deleted(zkclient, z.path.appmonitor(monitor_id))
def top(ctx, exit_on_fail, zkid, notification_fd, approot, runtime): """Run treadmill init process.""" _LOGGER.info('Initializing Treadmill: %s (%s)', approot, runtime) tm_env = appenv.AppEnvironment(approot) stop_on_lost = functools.partial(_stop_on_lost, tm_env) zkclient = zkutils.connect( context.GLOBAL.zk.url, idpath=zkid, listener=stop_on_lost, session_timeout=context.GLOBAL.zk.session_timeout) while not zkclient.exists(z.SERVER_PRESENCE): _LOGGER.warning('namespace not ready.') time.sleep(30) hostname = sysinfo.hostname() zk_blackout_path = z.path.blackedout_server(hostname) zk_server_path = z.path.server(hostname) zk_presence_path = z.path.server_presence(hostname) while not zkclient.exists(zk_server_path): _LOGGER.warning('server %s not defined in the cell.', hostname) time.sleep(30) _LOGGER.info('Checking blackout list.') blacklisted = bool(zkclient.exists(zk_blackout_path)) root_cgroup = ctx.obj['ROOT_CGROUP'] os_args = {} if os.name == 'posix': os_args['cgroup_prefix'] = root_cgroup if not blacklisted: # Node startup. _node_start(tm_env, runtime, zkclient, hostname, zk_server_path, zk_presence_path, os_args) utils.report_ready(notification_fd) _init_network() _start_init1(tm_env) _LOGGER.info('Ready.') down_reason = _main_loop(tm_env, zkclient, zk_presence_path) if down_reason is not None: _LOGGER.warning('Shutting down: %s', down_reason) # Blackout the server. zkutils.ensure_exists( zkclient, zk_blackout_path, acl=[zkclient.make_host_acl(hostname, 'rwcda')], data=down_reason) trigger_postmortem = True else: # Blacked out manually trigger_postmortem = bool(zkclient.exists(zk_blackout_path)) if trigger_postmortem: postmortem.run(approot, root_cgroup) else: # Node was already blacked out. _LOGGER.warning('Shutting down blacked out node.') # This is the shutdown phase. # Delete the node if zk_presence_path: zkutils.ensure_deleted(zkclient, zk_presence_path) zkclient.remove_listener(stop_on_lost) zkclient.stop() zkclient.close() _cleanup_network() # to ternminate all the running apps _blackout_terminate(tm_env) if exit_on_fail: utils.sys_exit(-1) else: # Sit forever in a broken state while True: time.sleep(1000000)
def delete(self, path): """Delete object given the path.""" return zkutils.ensure_deleted(self.zkclient, path)
def delete_server(zkclient, server_id): """Delete the server in Zookeeper.""" zkutils.ensure_deleted(zkclient, z.path.server(server_id)) zkutils.ensure_deleted(zkclient, z.path.placement(server_id)) create_event(zkclient, 0, 'servers', [server_id])
def delete_apps(zkclient, app_ids): """Unschedules apps.""" for app_id in app_ids: zkutils.ensure_deleted(zkclient, _app_node(app_id))
def delete(endpoint, proto, app): """Delete endpoint.""" zknode = z.path.endpoint(app, proto, endpoint) zkclient = context.GLOBAL.zk.conn zkutils.ensure_deleted(zkclient, zknode)
def delete(server): """Delete server presence.""" zkclient = context.GLOBAL.zk.conn zkutils.ensure_deleted(zkclient, z.path.server_presence(server))
def accept_cmd(tkt_spool_dir, approot, port, appname, endpoint, keytab): """Run ticket locker acceptor.""" if keytab: _construct_keytab(keytab) if port == 0: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(('0.0.0.0', 0)) port = sock.getsockname()[1] sock.close() hostname = sysinfo.hostname() hostport = '%s:%s' % (hostname, port) endpoint_proid_path = z.path.endpoint_proid(appname) acl = context.GLOBAL.zk.conn.make_servers_acl() _LOGGER.info( 'Ensuring %s exists with ACL %r', endpoint_proid_path, acl ) zkutils.ensure_exists( context.GLOBAL.zk.conn, endpoint_proid_path, acl=[acl] ) endpoint_path = z.path.endpoint(appname, 'tcp', endpoint) _LOGGER.info('Registering %s %s', endpoint_path, hostport) # Need to delete/create endpoints for the disovery to pick it up in # case of master restart. # # Unlike typical endpoint, we cannot make the node ephemeral as we # exec into tkt-recv. zkutils.ensure_deleted(context.GLOBAL.zk.conn, endpoint_path) time.sleep(5) zkutils.put(context.GLOBAL.zk.conn, endpoint_path, hostport) context.GLOBAL.zk.conn.stop() # TODO: this will publish information about the endpoint state # under /discovery. Once discovery is refactored (if it will be) # we can remove the "manual" zookeeper manipulation. tm_env = appenv.AppEnvironment(approot) endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir) endpoints_mgr.unlink_all( appname=appname, endpoint=endpoint, proto='tcp' ) endpoints_mgr.create_spec( appname=appname, endpoint=endpoint, proto='tcp', real_port=port, pid=os.getpid(), port=port, owner='/proc/{}'.format(os.getpid()), ) subproc.safe_exec(['tkt_recv_v2', '-p{}'.format(port), '-d{}'.format(tkt_spool_dir)])