コード例 #1
0
def _node_initialize(tm_env, runtime, zkclient, hostname,
                     zk_server_path, zk_presence_path):
    """Node initialization. Should only be done on a cold start.
    """
    try:
        new_node_info = sysinfo.node_info(tm_env, runtime)

        traitz = zkutils.get(zkclient, z.path.traits())
        new_node_info['traits'] = traits.detect(traitz)

        # Merging scheduler data with node_info data
        node_info = zkutils.get(zkclient, zk_server_path)
        node_info.update(new_node_info)
        _LOGGER.info('Registering node: %s: %s, %r',
                     zk_server_path, hostname, node_info)

        zkutils.update(zkclient, zk_server_path, node_info)
        host_acl = zkutils.make_host_acl(hostname, 'rwcda')
        _LOGGER.debug('host_acl: %r', host_acl)
        zkutils.put(zkclient,
                    zk_presence_path, {'seen': False},
                    acl=[host_acl],
                    ephemeral=True)

        # TODO: Fix the network initialization. Then the below can be part of
        # appenv.initialize()
        if os.name == 'posix':
            # Flush all rules in iptables nat and mangle tables (it is assumed
            # that none but Treadmill manages these tables) and bulk load all
            # the Treadmill static rules
            iptables.initialize(node_info['network']['external_ip'])

    except Exception:  # pylint: disable=W0703
        _LOGGER.exception('Node initialization failed')
        zkclient.stop()
コード例 #2
0
ファイル: init.py プロジェクト: sarveshsparab/treadmill
def _node_initialize(tm_env, runtime, zkclient, hostname, zk_server_path,
                     zk_presence_path):
    """Node initialization. Should only be done on a cold start.
    """
    try:
        new_node_info = sysinfo.node_info(tm_env, runtime)

        # Merging scheduler data with node_info data
        node_info = zkutils.get(zkclient, zk_server_path)
        node_info.update(new_node_info)
        _LOGGER.info('Registering node: %s: %s, %r', zk_server_path, hostname,
                     node_info)

        zkutils.update(zkclient, zk_server_path, node_info)
        host_acl = zkutils.make_host_acl(hostname, 'rwcda')
        _LOGGER.debug('host_acl: %r', host_acl)
        zkutils.put(zkclient,
                    zk_presence_path, {'seen': False},
                    acl=[host_acl],
                    ephemeral=True)

        # Invoke the local node initialization
        tm_env.initialize(node_info)

    except Exception:  # pylint: disable=W0703
        _LOGGER.exception('Node initialization failed')
        zkclient.stop()
コード例 #3
0
def create_server(zkclient, server_id, parent_id):
    """Creates server definition in Zookeeper."""
    server_node = z.path.server(server_id)
    server_acl = zkutils.make_host_acl(server_id, 'rwcd')

    zkutils.ensure_exists(zkclient, server_node, acl=[server_acl])

    # zkutils.get return dict/tuple if need_metadata is true.
    #
    # pylint: disable=R0204
    data = zkutils.get(zkclient, server_node)
    if parent_id:
        if not data:
            data = {'parent': parent_id}
        else:
            data['parent'] = parent_id

    _LOGGER.info('Creating server node %s with data %r and ACL %r',
                 server_node, data, server_acl)
    if zkutils.put(zkclient,
                   server_node,
                   data,
                   acl=[server_acl],
                   check_content=True):
        create_event(zkclient, 0, 'servers', [server_id])
コード例 #4
0
ファイル: endpoints.py プロジェクト: sattvic108/treadmill
 def __init__(self, endpoints_dir, zkclient, instance):
     self.endpoints_dir = endpoints_dir
     self.zkclient = zkclient
     self.up_to_date = True
     self.state = set()
     self.hostname = sysinfo.hostname()
     self.node_acl = zkutils.make_host_acl(self.hostname, 'rwcd')
     self.instance = instance
コード例 #5
0
ファイル: endpoints.py プロジェクト: sattvic108/treadmill
 def __init__(self, endpoints_dir, zkclient, scan_interval, instance=None):
     self.endpoints_dir = endpoints_dir
     self.zkclient = zkclient
     self.scan_interval = scan_interval
     self.hostname = sysinfo.hostname()
     self.state = collections.defaultdict(dict)
     self.node_acl = zkutils.make_host_acl(self.hostname, 'rwcd')
     self.instance = instance
コード例 #6
0
def _blackout_server(zkclient, server, reason):
    """Blackout server."""
    if not reason:
        raise click.UsageError('--reason is required.')

    path = z.path.blackedout_server(server)
    zkutils.ensure_exists(zkclient,
                          path,
                          acl=[zkutils.make_host_acl(server, 'rwcda')],
                          data=str(reason))
    presence.kill_node(zkclient, server)
コード例 #7
0
ファイル: presence.py プロジェクト: cesaragarcia/treadmill
def register_server(zkclient, hostname, node_info):
    """Register server."""
    server_path = z.path.server(hostname)

    server_data = zkutils.get(zkclient, server_path)
    server_data.update(node_info)

    _LOGGER.info('Registering server %s: %r', hostname, server_data)

    zkutils.update(zkclient, server_path, server_data)

    host_acl = zkutils.make_host_acl(hostname, 'rwcda')
    return zkutils.put(zkclient,
                       z.path.server_presence(hostname + '#'), {'seen': False},
                       acl=[host_acl],
                       ephemeral=True,
                       sequence=True)
コード例 #8
0
ファイル: init.py プロジェクト: gaocegege/treadmill
def _node_initialize(tm_env, zkclient, hostname, zk_server_path,
                     zk_presence_path):
    """Node initialization. Should only be done on a cold start.
    """
    tm_env.initialize()
    new_node_info = sysinfo.node_info(tm_env)

    # XXX: Why a get/update dance instead of set
    node_info = zkutils.get(zkclient, zk_server_path)
    node_info.update(new_node_info)
    _LOGGER.info('Registering node: %s: %s, %r', zk_server_path, hostname,
                 node_info)

    zkutils.update(zkclient, zk_server_path, node_info)
    host_acl = zkutils.make_host_acl(hostname, 'rwcda')
    _LOGGER.debug('host_acl: %r', host_acl)
    zkutils.put(zkclient,
                zk_presence_path, {'seen': False},
                acl=[host_acl],
                ephemeral=True)
コード例 #9
0
ファイル: masterapi.py プロジェクト: sattvic108/treadmill
def create_server(zkclient, server_id, parent_id, partition):
    """Creates server definition in Zookeeper."""
    server_node = z.path.server(server_id)
    server_acl = zkutils.make_host_acl(server_id, 'rwcd')

    zkutils.ensure_exists(zkclient, server_node, acl=[server_acl])

    data = zkutils.get(zkclient, server_node)
    data.update({
        'parent': parent_id,
        'partition': partition,
    })

    _LOGGER.info('Creating server node %s with data %r and ACL %r',
                 server_node, data, server_acl)
    if zkutils.put(zkclient,
                   server_node,
                   data,
                   acl=[server_acl],
                   check_content=True):
        create_event(zkclient, 0, 'servers', [server_id])
コード例 #10
0
    def top(exit_on_fail, zkid, approot):
        """Run treadmill init process."""
        _LOGGER.info('Initializing Treadmill: %s', approot)

        tm_env = appenv.AppEnvironment(approot)
        zkclient = zkutils.connect(context.GLOBAL.zk.url,
                                   idpath=zkid,
                                   listener=_exit_clear_watchdog_on_lost)

        utils.report_ready()

        while not zkclient.exists(z.SERVER_PRESENCE):
            _LOGGER.warn('namespace not ready.')
            time.sleep(30)

        hostname = sysinfo.hostname()

        zk_blackout_path = z.path.blackedout_server(hostname)
        zk_presence_path = z.path.server_presence(hostname)
        zk_server_path = z.path.server(hostname)

        while not zkclient.exists(zk_server_path):
            _LOGGER.warn('server %s not defined in the cell.', hostname)
            time.sleep(30)

        _LOGGER.info('Checking blackout list.')
        blacklisted = bool(zkclient.exists(zk_blackout_path))

        if not blacklisted:
            # Node startup.
            _node_start(tm_env, zkclient, hostname, zk_server_path,
                        zk_presence_path)

            # Cleanup the watchdog directory
            tm_env.watchdogs.initialize()

            _init_network()

            _LOGGER.info('Ready.')

            down_reason = _main_loop(tm_env, zkclient, zk_presence_path)

            if down_reason is not None:
                _LOGGER.warning('Shutting down: %s', down_reason)

                # Blackout the server.
                zkutils.ensure_exists(
                    zkclient,
                    zk_blackout_path,
                    acl=[zkutils.make_host_acl(hostname, 'rwcda')],
                    data=down_reason)

        else:
            # Node was already blacked out.
            _LOGGER.warning('Shutting down blacked out node.')

        # This is the shutdown phase.

        # Delete the node
        zkutils.ensure_deleted(zkclient, zk_presence_path)
        zkclient.remove_listener(_exit_clear_watchdog_on_lost)
        zkclient.stop()
        zkclient.close()

        _cleanup_network()

        # to ternminate all the running apps
        _blackout_terminate(tm_env)

        if exit_on_fail:
            utils.sys_exit(-1)
        else:
            # Sit forever in a broken state
            while True:
                time.sleep(1000000)
コード例 #11
0
    def reboot_monitor(command):
        """Runs node reboot monitor."""
        reboot_cmd = list(command)
        _LOGGER.info('Initializing reboot monitor: %r', reboot_cmd)

        zkclient = context.GLOBAL.zk.conn
        zkclient.add_listener(zkutils.exit_on_lost)

        while not zkclient.exists(z.REBOOTS):
            _LOGGER.warning('%r node not created yet. Cell masters running?',
                            z.REBOOTS)
            time.sleep(30)

        hostname = sysinfo.hostname()
        up_since = sysinfo.up_since()

        _LOGGER.info('Server: %s, up since: %s', hostname, up_since)
        reboot_path = z.path.reboot(hostname)

        reboot_trigger = zkclient.handler.event_object()
        reboot_trigger.clear()

        @zkclient.DataWatch(reboot_path)
        @utils.exit_on_unhandled
        def _watch_reboot(data, stat, event):
            """Watch reboot node."""

            if data is None and event is None:
                _LOGGER.info('Reboot node does not exist, ignore.')
                return True

            elif event is not None and event.type == 'DELETED':
                _LOGGER.info('Reboot Node deleted, ignore.')
                return True

            # We have a reboot request node
            if stat.created > up_since:
                _LOGGER.info('Reboot requested at: %s, up since: %s',
                             time.ctime(stat.created), time.ctime(up_since))

                reboot_trigger.set()
            else:
                _LOGGER.info('Reboot success, requested at %s, up since: %s',
                             time.ctime(stat.created), time.ctime(up_since))

                _LOGGER.info('Deleting zknode: %r', reboot_path)
                zkutils.ensure_deleted(zkclient, reboot_path)
            return True

        # We now wait for the reboot trigger
        reboot_trigger.wait()

        # Actual reboot procedure below

        _LOGGER.info('service shutdown.')
        # Strictly speaking this is not enough for graceful shutdown.
        #
        # We need a proper shutdown procedure developed.

        _LOGGER.info('Checking blackout list.')
        zk_blackout_path = z.path.blackedout_server(hostname)
        while zkclient.exists(zk_blackout_path):
            _LOGGER.info('Node blacked out - will wait.')
            time.sleep(60)

        if time.time() - up_since > _MIN_UPTIME_BEFORE_REBOOT:
            _LOGGER.info('exec: %r', reboot_cmd)
            utils.sane_execvp(reboot_cmd[0], reboot_cmd)
        else:
            _LOGGER.info('Possible reboot loop detected, blackout the node.')
            zkutils.ensure_exists(
                zkclient,
                zk_blackout_path,
                acl=[zkutils.make_host_acl(hostname, 'rwcda')],
                data='Possible reboot loop detected.')