Example #1
0
    def load_schedule(self):
        """Run scheduler first time and update scheduled data."""
        placement = self.cell.schedule()

        for servername, server in self.cell.members().items():
            placement_node = z.path.placement(servername)
            zkutils.ensure_exists(self.zkclient,
                                  placement_node,
                                  acl=[_SERVERS_ACL])

            current = set(self.zkclient.get_children(placement_node))
            correct = set(server.apps.keys())

            for app in current - correct:
                _LOGGER.info('Unscheduling: %s - %s', servername, app)
                zkutils.ensure_deleted(self.zkclient,
                                       os.path.join(placement_node, app))
            for app in correct - current:
                _LOGGER.info('Scheduling: %s - %s,%s', servername, app,
                             self.cell.apps[app].identity)

                placement_data = self._placement_data(app)
                zkutils.put(self.zkclient,
                            os.path.join(placement_node, app),
                            placement_data,
                            acl=[_SERVERS_ACL])

                self._update_task(app, servername, why=None)

        # Store latest placement as reference.
        zkutils.put(self.zkclient, z.path.placement(), placement)
        self.up_to_date = True
Example #2
0
def create_endpoint_file(approot, port, appname, endpoint):
    """Create and link local endpoint file"""
    hostport = '%s:%s' % (sysinfo.hostname(), port)
    zkclinet = context.GLOBAL.zk.conn

    endpoint_proid_path = z.path.endpoint_proid(appname)
    acl = zkclinet.make_servers_acl()
    _LOGGER.info('Ensuring %s exists with ACL %r', endpoint_proid_path, acl)
    zkutils.ensure_exists(zkclinet, endpoint_proid_path, acl=[acl])

    endpoint_path = z.path.endpoint(appname, 'tcp', endpoint)
    _LOGGER.info('Registering %s %s', endpoint_path, hostport)

    # Need to delete/create endpoints for the disovery to pick it up in
    # case of master restart.
    zkutils.ensure_deleted(zkclinet, endpoint_path)
    time.sleep(5)
    zkutils.put(zkclinet, endpoint_path, hostport)

    tm_env = appenv.AppEnvironment(approot)
    endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir)
    endpoints_mgr.unlink_all(appname=appname, endpoint=endpoint, proto='tcp')
    endpoints_mgr.create_spec(
        appname=appname,
        endpoint=endpoint,
        proto='tcp',
        real_port=port,
        pid=os.getpid(),
        port=port,
        owner='/proc/{}'.format(os.getpid()),
    )
Example #3
0
def create_server(zkclient, server_id, parent_id):
    """Creates server definition in Zookeeper."""
    server_node = z.path.server(server_id)
    server_acl = zkutils.make_host_acl(server_id, 'rwcd')

    zkutils.ensure_exists(zkclient, server_node, acl=[server_acl])

    # zkutils.get return dict/tuple if need_metadata is true.
    #
    # pylint: disable=R0204
    data = zkutils.get(zkclient, server_node)
    if parent_id:
        if not data:
            data = {'parent': parent_id}
        else:
            data['parent'] = parent_id

    _LOGGER.info('Creating server node %s with data %r and ACL %r',
                 server_node, data, server_acl)
    if zkutils.put(zkclient,
                   server_node,
                   data,
                   acl=[server_acl],
                   check_content=True):
        create_event(zkclient, 0, 'servers', [server_id])
Example #4
0
def _blackout_app(zkclient, app, clear):
    """Blackout app."""
    # list current blacklist
    blacklisted_node = z.path.blackedout_app(app)
    if clear:
        zkutils.ensure_deleted(zkclient, blacklisted_node)
    else:
        zkutils.ensure_exists(zkclient, blacklisted_node)
Example #5
0
def sync_servers():
    """Sync global servers list."""
    _LOGGER.info('Sync servers.')
    admin_srv = context.GLOBAL.admin.server()
    global_servers = admin_srv.list({})
    zkutils.ensure_exists(context.GLOBAL.zk.conn,
                          z.path.globals('servers'),
                          data=[server['_id'] for server in global_servers])
Example #6
0
def sync_traits():
    """Sync cell traits."""
    _LOGGER.info('Sync traits.')
    admin_cell = context.GLOBAL.admin.cell()
    cell = admin_cell.get(context.GLOBAL.cell)
    payload = cell['traits']
    zkutils.ensure_exists(context.GLOBAL.zk.conn,
                          z.path.traits(),
                          data=payload)
Example #7
0
def _blackout_server(zkclient, server, reason):
    """Blackout server."""
    if not reason:
        raise click.UsageError('--reason is required.')

    path = z.path.blackedout_server(server)
    zkutils.ensure_exists(zkclient,
                          path,
                          acl=[zkutils.make_host_acl(server, 'rwcda')],
                          data=str(reason))
    presence.kill_node(zkclient, server)
Example #8
0
    def test_ensure_exists(self):
        """Tests updating/creating node content."""
        # with data
        client = kazoo.client.KazooClient()
        zkutils.ensure_exists(client, '/foo/bar', data='foo')
        kazoo.client.KazooClient.create.assert_called_with(
            '/foo/bar', b'foo', acl=mock.ANY, makepath=True,
            sequence=False)

        # non-data
        zkutils.ensure_exists(client, '/foo/bar')
        kazoo.client.KazooClient.create.assert_called_with(
            '/foo/bar', b'', acl=mock.ANY, makepath=True,
            sequence=False)
    def test_ensure_exists_existing(self):
        """Test update content of existing node."""
        def raise_exists(*_args, **_kwargs):
            """zk.create side effect, raising appropriate exception."""
            raise kazoo.client.NodeExistsError()

        client = treadmill.zkutils.ZkClient()
        treadmill.zkutils.ZkClient.create.side_effect = raise_exists
        zkutils.ensure_exists(client, '/foo/bar')
        treadmill.zkutils.ZkClient.set_acls.assert_called_with(
            '/foo/bar', mock.ANY)

        # ensure with data
        zkutils.ensure_exists(client, '/foo/bar', data='foo')
        treadmill.zkutils.ZkClient.set.assert_called_with('/foo/bar', b'foo')
        treadmill.zkutils.ZkClient.set_acls.assert_called_with(
            '/foo/bar', mock.ANY)
Example #10
0
    def load_server(self, servername, readonly=False):
        """Load individual server."""
        try:
            data = zkutils.get(self.zkclient, z.path.server(servername))
            if not data:
                # The server is configured, but never reported it's capacity.
                _LOGGER.info('No capacity detected: %s',
                             z.path.server(servername))
                return

            assert 'parent' in data
            parentname = data['parent']
            label = data.get('partition')
            if not label:
                # TODO: it will be better to have separate module for constants
                #       and avoid unnecessary cross imports.
                label = admin.DEFAULT_PARTITION
            up_since = data.get('up_since', int(time.time()))

            partition = self.cell.partitions[label]
            server = scheduler.Server(
                servername,
                resources(data),
                valid_until=partition.valid_until(up_since),
                label=label,
                traits=data.get('traits', 0))

            parent = self.buckets.get(parentname)
            if not parent:
                _LOGGER.warn('Server parent does not exist: %s/%s', servername,
                             parentname)
                return

            self.buckets[parentname].add_node(server)
            self.servers[servername] = server
            assert server.parent == self.buckets[parentname]

            if not readonly:
                zkutils.ensure_exists(self.zkclient,
                                      z.path.placement(servername),
                                      acl=[_SERVERS_ACL])

            self.adjust_server_state(servername, readonly)

        except kazoo.client.NoNodeError:
            _LOGGER.warn('Server node not found: %s', servername)
Example #11
0
    def accept(tkt_spool_dir, port, appname, endpoint, use_v2):
        """Run ticket locker acceptor."""
        if port == 0:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.bind(('0.0.0.0', 0))
            port = sock.getsockname()[1]
            sock.close()

        hostname = sysinfo.hostname()
        hostport = '%s:%s' % (hostname, port)

        endpoint_proid_path = z.path.endpoint_proid(appname)
        _LOGGER.info('Ensuring %s exists with ACL %r', endpoint_proid_path,
                     _SERVERS_ACL)
        zkutils.ensure_exists(context.GLOBAL.zk.conn,
                              endpoint_proid_path,
                              acl=[_SERVERS_ACL])

        endpoint_path = z.path.endpoint(appname, 'tcp', endpoint)
        _LOGGER.info('Registering %s %s', endpoint_path, hostport)

        # Need to delete/create endpoints for the disovery to pick it up in
        # case of master restart.
        #
        # Unlile typical endpoint, we cannot make the node ephemeral as we
        # exec into tkt-recv.
        zkutils.ensure_deleted(context.GLOBAL.zk.conn, endpoint_path)
        time.sleep(5)
        zkutils.put(context.GLOBAL.zk.conn, endpoint_path, hostport)

        context.GLOBAL.zk.conn.stop()

        # Exec into tickets acceptor. If race condition will not allow it to
        # bind to the provided port, it will exit and registration will
        # happen again.
        if use_v2:
            subproc.safe_exec([
                'tkt_recv_v2', '-p{}'.format(port),
                '-d{}'.format(tkt_spool_dir)
            ])
        else:
            subproc.safe_exec(
                ['tkt_recv', 'tcp://*:{}'.format(port), tkt_spool_dir])
Example #12
0
    def publish_tickets(self, realms, once=False):
        """Publish list of all tickets present on the locker."""
        zkutils.ensure_exists(self.zkclient, z.TICKETS)
        watcher = dirwatch.DirWatcher(self.tkt_spool_dir)

        def _publish_ticket(tkt_file):
            """Publish ticket details."""
            if tkt_file.startswith('.'):
                return

            if not any([tkt_file.endswith(realm) for realm in realms]):
                _LOGGER.info('Ignore tkt_file: %s', tkt_file)
                return

            try:
                tkt_details = subproc.check_output(
                    ['klist', '-5', '-e', '-f', tkt_file])
                tkt_node = z.path.tickets(os.path.basename(tkt_file),
                                          self.hostname)
                zkutils.put(self.zkclient,
                            tkt_node,
                            tkt_details,
                            ephemeral=True)
            except subproc.CalledProcessError:
                _LOGGER.warning('Unable to get tickets details.')

        for tkt_file in glob.glob(os.path.join(self.tkt_spool_dir, '*')):
            _publish_ticket(tkt_file)

        self.prune_tickets()
        last_prune = time.time()

        if once:
            return

        watcher.on_created = _publish_ticket
        while True:
            if time.time() - last_prune > _STALE_TKTS_PRUNE_INTERVAL:
                self.prune_tickets()
                last_prune = time.time()

            if watcher.wait_for_events(timeout=_STALE_TKTS_PRUNE_INTERVAL):
                watcher.process_events(max_events=_DIRWATCH_EVENTS_COUNT)
Example #13
0
def create_server(zkclient, server_id, parent_id, partition):
    """Creates server definition in Zookeeper."""
    server_node = z.path.server(server_id)
    server_acl = zkclient.make_host_acl(server_id, 'rwcd')

    zkutils.ensure_exists(zkclient, server_node, acl=[server_acl])

    data = zkutils.get(zkclient, server_node)
    if not data:
        data = {}
    data.update({
        'parent': parent_id,
        'partition': partition,
    })

    _LOGGER.info('Creating server node %s with data %r and ACL %r',
                 server_node, data, server_acl)
    if zkutils.put(zkclient, server_node, data,
                   acl=[server_acl], check_content=True):
        create_event(zkclient, 0, 'servers', [server_id])
Example #14
0
def _run_sync():
    """Sync Zookeeper with LDAP, runs with lock held.
    """
    while True:
        # Sync app groups
        admin_app_group = admin.AppGroup(context.GLOBAL.ldap.conn)
        app_groups = admin_app_group.list({})
        _sync_collection(context.GLOBAL.zk.conn, app_groups, z.path.appgroup(),
                         _match_appgroup)

        # Sync partitions
        admin_cell = admin.Cell(context.GLOBAL.ldap.conn)
        partitions = admin_cell.partitions(context.GLOBAL.cell)
        _sync_partitions(context.GLOBAL.zk.conn, partitions)

        # Sync allocations.
        admin_alloc = admin.CellAllocation(context.GLOBAL.ldap.conn)

        allocations = admin_alloc.list({'cell': context.GLOBAL.cell})
        _sync_allocations(context.GLOBAL.zk.conn, allocations)

        # Global servers
        admin_srv = admin.Server(context.GLOBAL.ldap.conn)
        global_servers = admin_srv.list({})
        zkutils.ensure_exists(
            context.GLOBAL.zk.conn,
            z.path.globals('servers'),
            data=[server['_id'] for server in global_servers])

        # Servers - because they can have custom topology - are loaded
        # from the plugin.
        try:
            servers_plugin = importlib.import_module(
                'treadmill.plugins.sproc.servers')
            servers_plugin.init()
        except ImportError as err:
            _LOGGER.warning(
                'Unable to load treadmill.plugins.sproc.servers: %s', err)

        time.sleep(60)
Example #15
0
    def create_rootns(self):
        """Create root nodes and set appropriate acls."""

        root_ns = {
            '/': None,
            z.ALLOCATIONS: None,
            z.APPMONITORS: None,
            z.BUCKETS: None,
            z.CELL: None,
            z.IDENTITY_GROUPS: None,
            z.PLACEMENT: None,
            z.PARTITIONS: None,
            z.SCHEDULED: [_SERVERS_ACL_DEL],
            z.SCHEDULER: None,
            z.SERVERS: None,
            z.STRATEGIES: None,
            z.FINISHED: [_SERVERS_ACL],
            z.FINISHED_HISTORY: None,
            z.TRACE: None,
            z.TRACE_HISTORY: None,
            z.VERSION_ID: None,
            z.ZOOKEEPER: None,
            z.BLACKEDOUT_SERVERS: [_SERVERS_ACL],
            z.ENDPOINTS: [_SERVERS_ACL],
            z.path.endpoint_proid('root'): [_SERVERS_ACL],
            z.EVENTS: [_SERVERS_ACL],
            z.RUNNING: [_SERVERS_ACL],
            z.SERVER_PRESENCE: [_SERVERS_ACL],
            z.VERSION: [_SERVERS_ACL],
            z.REBOOTS: [_SERVERS_ACL],
        }

        for path, acl in root_ns.items():
            zkutils.ensure_exists(self.zkclient, path, acl)
        for path in z.trace_shards():
            zkutils.ensure_exists(self.zkclient, path, acl=[_SERVERS_ACL])
Example #16
0
    def top(ctx, exit_on_fail, zkid, notification_fd, approot, runtime):
        """Run treadmill init process."""
        _LOGGER.info('Initializing Treadmill: %s (%s)', approot, runtime)

        tm_env = appenv.AppEnvironment(approot)
        stop_on_lost = functools.partial(_stop_on_lost, tm_env)
        zkclient = zkutils.connect(context.GLOBAL.zk.url,
                                   idpath=zkid,
                                   listener=stop_on_lost)

        while not zkclient.exists(z.SERVER_PRESENCE):
            _LOGGER.warning('namespace not ready.')
            time.sleep(30)

        hostname = sysinfo.hostname()

        zk_blackout_path = z.path.blackedout_server(hostname)
        zk_server_path = z.path.server(hostname)
        zk_presence_path = z.path.server_presence(hostname)

        while not zkclient.exists(zk_server_path):
            _LOGGER.warning('server %s not defined in the cell.', hostname)
            time.sleep(30)

        _LOGGER.info('Checking blackout list.')
        blacklisted = bool(zkclient.exists(zk_blackout_path))

        root_cgroup = ctx.obj['ROOT_CGROUP']
        os_args = {}
        if os.name == 'posix':
            os_args['cgroup_prefix'] = root_cgroup

        if not blacklisted:
            # Node startup.
            _node_start(tm_env, runtime, zkclient, hostname, zk_server_path,
                        zk_presence_path, os_args)

            utils.report_ready(notification_fd)

            _init_network()

            _start_init1(tm_env)
            _LOGGER.info('Ready.')

            down_reason = _main_loop(tm_env, zkclient, zk_presence_path)

            if down_reason is not None:
                _LOGGER.warning('Shutting down: %s', down_reason)
                # Blackout the server.
                zkutils.ensure_exists(
                    zkclient,
                    zk_blackout_path,
                    acl=[zkclient.make_host_acl(hostname, 'rwcda')],
                    data=down_reason)
                trigger_postmortem = True
            else:
                # Blacked out manually
                trigger_postmortem = bool(zkclient.exists(zk_blackout_path))

            if trigger_postmortem:
                postmortem.run(approot, root_cgroup)

        else:
            # Node was already blacked out.
            _LOGGER.warning('Shutting down blacked out node.')

        # This is the shutdown phase.

        # Delete the node
        if zk_presence_path:
            zkutils.ensure_deleted(zkclient, zk_presence_path)
        zkclient.remove_listener(stop_on_lost)
        zkclient.stop()
        zkclient.close()

        _cleanup_network()

        # to ternminate all the running apps
        _blackout_terminate(tm_env)

        if exit_on_fail:
            utils.sys_exit(-1)
        else:
            # Sit forever in a broken state
            while True:
                time.sleep(1000000)
    def reboot_monitor(command):
        """Runs node reboot monitor."""
        reboot_cmd = list(command)
        _LOGGER.info('Initializing reboot monitor: %r', reboot_cmd)

        zkclient = context.GLOBAL.zk.conn
        zkclient.add_listener(zkutils.exit_on_lost)

        while not zkclient.exists(z.REBOOTS):
            _LOGGER.warning('%r node not created yet. Cell masters running?',
                            z.REBOOTS)
            time.sleep(30)

        hostname = sysinfo.hostname()
        up_since = sysinfo.up_since()

        _LOGGER.info('Server: %s, up since: %s', hostname, up_since)
        reboot_path = z.path.reboot(hostname)

        reboot_trigger = zkclient.handler.event_object()
        reboot_trigger.clear()

        @zkclient.DataWatch(reboot_path)
        @utils.exit_on_unhandled
        def _watch_reboot(data, stat, event):
            """Watch reboot node."""

            if data is None and event is None:
                _LOGGER.info('Reboot node does not exist, ignore.')
                return True

            elif event is not None and event.type == 'DELETED':
                _LOGGER.info('Reboot Node deleted, ignore.')
                return True

            # We have a reboot request node
            if stat.created > up_since:
                _LOGGER.info('Reboot requested at: %s, up since: %s',
                             time.ctime(stat.created), time.ctime(up_since))

                reboot_trigger.set()
            else:
                _LOGGER.info('Reboot success, requested at %s, up since: %s',
                             time.ctime(stat.created), time.ctime(up_since))

                _LOGGER.info('Deleting zknode: %r', reboot_path)
                zkutils.ensure_deleted(zkclient, reboot_path)
            return True

        # We now wait for the reboot trigger
        reboot_trigger.wait()

        # Actual reboot procedure below

        _LOGGER.info('service shutdown.')
        # Strictly speaking this is not enough for graceful shutdown.
        #
        # We need a proper shutdown procedure developed.

        _LOGGER.info('Checking blackout list.')
        zk_blackout_path = z.path.blackedout_server(hostname)
        while zkclient.exists(zk_blackout_path):
            _LOGGER.info('Node blacked out - will wait.')
            time.sleep(60)

        if time.time() - up_since > _MIN_UPTIME_BEFORE_REBOOT:
            _LOGGER.info('exec: %r', reboot_cmd)
            utils.sane_execvp(reboot_cmd[0], reboot_cmd)
        else:
            _LOGGER.info('Possible reboot loop detected, blackout the node.')
            zkutils.ensure_exists(
                zkclient,
                zk_blackout_path,
                acl=[zkclient.make_host_acl(hostname, 'rwcda')],
                data='Possible reboot loop detected.')
Example #18
0
def reboot_server(zkclient, server_id):
    """Create server reboot event."""
    zkutils.ensure_exists(zkclient,
                          z.path.reboot(server_id),
                          acl=[_SERVERS_ACL_DEL])
Example #19
0
    def accept_cmd(tkt_spool_dir, approot, port, appname, endpoint, keytab):
        """Run ticket locker acceptor."""
        if keytab:
            _construct_keytab(keytab)

        if port == 0:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.bind(('0.0.0.0', 0))
            port = sock.getsockname()[1]
            sock.close()

        hostname = sysinfo.hostname()
        hostport = '%s:%s' % (hostname, port)

        endpoint_proid_path = z.path.endpoint_proid(appname)
        acl = context.GLOBAL.zk.conn.make_servers_acl()
        _LOGGER.info(
            'Ensuring %s exists with ACL %r',
            endpoint_proid_path,
            acl
        )
        zkutils.ensure_exists(
            context.GLOBAL.zk.conn,
            endpoint_proid_path,
            acl=[acl]
        )

        endpoint_path = z.path.endpoint(appname, 'tcp', endpoint)
        _LOGGER.info('Registering %s %s', endpoint_path, hostport)

        # Need to delete/create endpoints for the disovery to pick it up in
        # case of master restart.
        #
        # Unlile typical endpoint, we cannot make the node ephemeral as we
        # exec into tkt-recv.
        zkutils.ensure_deleted(context.GLOBAL.zk.conn, endpoint_path)
        time.sleep(5)
        zkutils.put(context.GLOBAL.zk.conn, endpoint_path, hostport)

        context.GLOBAL.zk.conn.stop()

        # TODO: this will publish information about the endpoint state
        #       under /discovery. Once discovery is refactored (if it will be)
        #       we can remove the "manual" zookeeper manipulation.
        tm_env = appenv.AppEnvironment(approot)
        endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir)
        endpoints_mgr.unlink_all(
            appname=appname,
            endpoint=endpoint,
            proto='tcp'
        )
        endpoints_mgr.create_spec(
            appname=appname,
            endpoint=endpoint,
            proto='tcp',
            real_port=port,
            pid=os.getpid(),
            port=port,
            owner='/proc/{}'.format(os.getpid()),
        )

        subproc.safe_exec(['tkt_recv_v2',
                           '-p{}'.format(port),
                           '-d{}'.format(tkt_spool_dir)])
Example #20
0
def cell_insert_bucket(zkclient, bucket_id):
    """Add bucket to the cell."""
    if not zkclient.exists(z.path.cell(bucket_id)):
        zkutils.ensure_exists(zkclient, z.path.cell(bucket_id))
        create_event(zkclient, 0, 'cell', None)
Example #21
0
def reboot_server(zkclient, server_id):
    """Create server reboot event."""
    zkutils.ensure_exists(zkclient,
                          z.path.reboot(server_id),
                          acl=[zkclient.make_servers_del_acl()])
Example #22
0
    def top(exit_on_fail, zkid, approot):
        """Run treadmill init process."""
        _LOGGER.info('Initializing Treadmill: %s', approot)

        tm_env = appenv.AppEnvironment(approot)
        zkclient = zkutils.connect(context.GLOBAL.zk.url,
                                   idpath=zkid,
                                   listener=_exit_clear_watchdog_on_lost)

        utils.report_ready()

        while not zkclient.exists(z.SERVER_PRESENCE):
            _LOGGER.warn('namespace not ready.')
            time.sleep(30)

        hostname = sysinfo.hostname()

        zk_blackout_path = z.path.blackedout_server(hostname)
        zk_presence_path = z.path.server_presence(hostname)
        zk_server_path = z.path.server(hostname)

        while not zkclient.exists(zk_server_path):
            _LOGGER.warn('server %s not defined in the cell.', hostname)
            time.sleep(30)

        _LOGGER.info('Checking blackout list.')
        blacklisted = bool(zkclient.exists(zk_blackout_path))

        if not blacklisted:
            # Node startup.
            _node_start(tm_env, zkclient, hostname, zk_server_path,
                        zk_presence_path)

            # Cleanup the watchdog directory
            tm_env.watchdogs.initialize()

            _init_network()

            _LOGGER.info('Ready.')

            down_reason = _main_loop(tm_env, zkclient, zk_presence_path)

            if down_reason is not None:
                _LOGGER.warning('Shutting down: %s', down_reason)

                # Blackout the server.
                zkutils.ensure_exists(
                    zkclient,
                    zk_blackout_path,
                    acl=[zkutils.make_host_acl(hostname, 'rwcda')],
                    data=down_reason)

        else:
            # Node was already blacked out.
            _LOGGER.warning('Shutting down blacked out node.')

        # This is the shutdown phase.

        # Delete the node
        zkutils.ensure_deleted(zkclient, zk_presence_path)
        zkclient.remove_listener(_exit_clear_watchdog_on_lost)
        zkclient.stop()
        zkclient.close()

        _cleanup_network()

        # to ternminate all the running apps
        _blackout_terminate(tm_env)

        if exit_on_fail:
            utils.sys_exit(-1)
        else:
            # Sit forever in a broken state
            while True:
                time.sleep(1000000)
Example #23
0
 def _schedule_reboot(self, servername):
     """Schedule server reboot."""
     zkutils.ensure_exists(self.zkclient,
                           z.path.reboot(servername),
                           acl=[_SERVERS_ACL_DEL])
Example #24
0
 def ensure_exists(self, path):
     """Ensure storage path exists."""
     return zkutils.ensure_exists(self.zkclient, path, acl=self._acl(path))
Example #25
0
    def accept(tkt_spool_dir, approot, port, appname, endpoint, use_v2,
               keytab):
        """Run ticket locker acceptor."""
        if keytab:
            _construct_keytab(keytab)

        if port == 0:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.bind(('0.0.0.0', 0))
            port = sock.getsockname()[1]
            sock.close()

        hostname = sysinfo.hostname()
        hostport = '%s:%s' % (hostname, port)

        endpoint_proid_path = z.path.endpoint_proid(appname)
        acl = context.GLOBAL.zk.conn.make_servers_acl()
        _LOGGER.info(
            'Ensuring %s exists with ACL %r',
            endpoint_proid_path,
            acl
        )
        zkutils.ensure_exists(
            context.GLOBAL.zk.conn,
            endpoint_proid_path,
            acl=[acl]
        )

        endpoint_path = z.path.endpoint(appname, 'tcp', endpoint)
        _LOGGER.info('Registering %s %s', endpoint_path, hostport)

        # Need to delete/create endpoints for the disovery to pick it up in
        # case of master restart.
        #
        # Unlile typical endpoint, we cannot make the node ephemeral as we
        # exec into tkt-recv.
        zkutils.ensure_deleted(context.GLOBAL.zk.conn, endpoint_path)
        time.sleep(5)
        zkutils.put(context.GLOBAL.zk.conn, endpoint_path, hostport)

        context.GLOBAL.zk.conn.stop()

        tm_env = appenv.AppEnvironment(approot)
        endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir)
        endpoints_mgr.unlink_all(
            appname=appname,
            endpoint=endpoint,
            proto='tcp'
        )
        endpoints_mgr.create_spec(
            appname=appname,
            endpoint=endpoint,
            proto='tcp',
            real_port=port,
            pid=os.getpid(),
            port=port,
            owner='/proc/{}'.format(os.getpid()),
        )

        # Exec into tickets acceptor. If race condition will not allow it to
        # bind to the provided port, it will exit and registration will
        # happen again.
        if use_v2:
            subproc.safe_exec(['tkt_recv_v2',
                               '-p{}'.format(port),
                               '-d{}'.format(tkt_spool_dir)])
        else:
            subproc.safe_exec(['tkt_recv',
                               'tcp://*:{}'.format(port),
                               tkt_spool_dir])