コード例 #1
0
    def _cache(self, zkclient, app):
        """Reads the manifest from Zk and stores it as YAML in <cache>/<app>.
        """
        appnode = z.path.scheduled(app)
        placement_node = z.path.placement(self._hostname, app)
        manifest_file = None
        try:
            manifest = zkutils.get(zkclient, appnode)
            # TODO: need a function to parse instance id from name.
            manifest['task'] = app[app.index('#') + 1:]

            placement_info = zkutils.get(zkclient, placement_node)
            if placement_info is not None:
                manifest.update(placement_info)

            manifest_file = os.path.join(self.tm_env.cache_dir, app)
            with tempfile.NamedTemporaryFile(dir=self.tm_env.cache_dir,
                                             prefix='.%s-' % app,
                                             delete=False,
                                             mode='w') as temp_manifest:
                yaml.dump(manifest, stream=temp_manifest)
            os.rename(temp_manifest.name, manifest_file)
            _LOGGER.info('Created cache manifest: %s', manifest_file)

        except kazoo.exceptions.NoNodeError:
            _LOGGER.warning('App %r not found', app)
コード例 #2
0
    def endpoints():
        """Show endpoints and their status."""
        zkclient = context.GLOBAL.zk.conn
        discovery_state = zkclient.get_children(z.DISCOVERY_STATE)
        state = collections.defaultdict(dict)
        for hostname in discovery_state:
            state[hostname] = zkutils.get(zkclient,
                                          z.path.discovery_state(hostname))

        discovery = zkclient.get_children(z.DISCOVERY)
        all_endpoints = []
        for hostname in discovery:
            endpoints = []
            for entry in zkutils.get(zkclient, z.path.discovery(hostname)):
                app, endpoint, proto, port = entry.split(':')
                port = int(port)
                endpoint_state = state[hostname].get(port)
                hostport = '{}:{}'.format(hostname, port)
                endpoints.append(
                    (app, proto, endpoint, hostport, endpoint_state))
            all_endpoints.append(endpoints)

        merged = heapq.merge(*all_endpoints)

        formatter = cli.make_formatter('endpoint')
        cli.out(
            formatter([{
                'name': name,
                'endpoint': endpoint,
                'proto': proto,
                'hostport': hostport,
                'state': state,
            } for name, proto, endpoint, hostport, state in merged]))
コード例 #3
0
ファイル: presence.py プロジェクト: cesaragarcia/treadmill
def kill_node(zkclient, node):
    """Kills app, endpoints, and server node."""
    _LOGGER.info('killing node: %s', node)
    try:
        zkutils.get(zkclient, z.path.server(node))
    except kazoo.client.NoNodeError:
        _LOGGER.info('node does not exist.')
        return

    apps = zkclient.get_children(z.path.placement(node))
    for app in apps:
        _LOGGER.info('removing app presence: %s', app)
        try:
            manifest = zkutils.get(zkclient, z.path.scheduled(app))
            app_presence = EndpointPresence(zkclient,
                                            manifest,
                                            hostname=node,
                                            appname=app)
            app_presence.unregister_running()
            app_presence.unregister_endpoints()
        except kazoo.client.NoNodeError:
            _LOGGER.info('app %s no longer scheduled.', app)

    _LOGGER.info('removing server presence: %s', node)
    unregister_server(zkclient, node)
コード例 #4
0
ファイル: eventmgr.py プロジェクト: sarveshsparab/treadmill
    def _cache(self, zkclient, app):
        """Reads the manifest from Zk and stores it as YAML in <cache>/<app>.
        """
        appnode = z.path.scheduled(app)
        placement_node = z.path.placement(self._hostname, app)
        manifest_file = None
        try:
            manifest = zkutils.get(zkclient, appnode)
            # TODO: need a function to parse instance id from name.
            manifest['task'] = app[app.index('#') + 1:]

            placement_info = zkutils.get(zkclient, placement_node)
            if placement_info is not None:
                manifest.update(placement_info)

            manifest_file = os.path.join(self.tm_env.cache_dir, app)
            fs.write_safe(manifest_file,
                          lambda f: yaml.dump(manifest, stream=f),
                          prefix='.%s-' % app,
                          mode='w',
                          permission=0o644)
            _LOGGER.info('Created cache manifest: %s', manifest_file)

        except kazoo.exceptions.NoNodeError:
            _LOGGER.warning('App %r not found', app)
コード例 #5
0
def _node_initialize(tm_env, runtime, zkclient, hostname,
                     zk_server_path, zk_presence_path):
    """Node initialization. Should only be done on a cold start.
    """
    try:
        new_node_info = sysinfo.node_info(tm_env, runtime)

        traitz = zkutils.get(zkclient, z.path.traits())
        new_node_info['traits'] = traits.detect(traitz)

        # Merging scheduler data with node_info data
        node_info = zkutils.get(zkclient, zk_server_path)
        node_info.update(new_node_info)
        _LOGGER.info('Registering node: %s: %s, %r',
                     zk_server_path, hostname, node_info)

        zkutils.update(zkclient, zk_server_path, node_info)
        host_acl = zkutils.make_host_acl(hostname, 'rwcda')
        _LOGGER.debug('host_acl: %r', host_acl)
        zkutils.put(zkclient,
                    zk_presence_path, {'seen': False},
                    acl=[host_acl],
                    ephemeral=True)

        # TODO: Fix the network initialization. Then the below can be part of
        # appenv.initialize()
        if os.name == 'posix':
            # Flush all rules in iptables nat and mangle tables (it is assumed
            # that none but Treadmill manages these tables) and bulk load all
            # the Treadmill static rules
            iptables.initialize(node_info['network']['external_ip'])

    except Exception:  # pylint: disable=W0703
        _LOGGER.exception('Node initialization failed')
        zkclient.stop()
コード例 #6
0
    def test_get(self):
        """Test zkutils.get parsing of YAML data."""
        client = treadmill.zkutils.ZkClient()
        treadmill.zkutils.ZkClient.get.return_value = ('{xxx: 123}', None)
        self.assertEqual({'xxx': 123}, zkutils.get(client, '/foo'))

        # parsing error
        treadmill.zkutils.ZkClient.get.return_value = ('{xxx: 123', None)
        self.assertEqual('{xxx: 123', zkutils.get(client, '/foo',
                                                  strict=False))
        self.assertRaises(yaml.YAMLError, zkutils.get, client, '/foo')

        treadmill.zkutils.ZkClient.get.return_value = (None, None)
        self.assertIsNone(zkutils.get(client, '/foo'))
コード例 #7
0
ファイル: sysapp.py プロジェクト: ywong587/treadmill
def _get_identity_group(app):
    """get identity group if exists
    """
    zkclient = context.GLOBAL.zk.conn
    data = zkutils.get(zkclient, z.path.identity_group(app))

    return data['count']
コード例 #8
0
ファイル: sysapp.py プロジェクト: ywong587/treadmill
def _get_appmonitor(app):
    """get appmonitor count from appname
    """
    zkclient = context.GLOBAL.zk.conn
    data = zkutils.get(zkclient, z.path.appmonitor(app))

    return data['count']
コード例 #9
0
ファイル: init.py プロジェクト: sarveshsparab/treadmill
def _node_initialize(tm_env, runtime, zkclient, hostname, zk_server_path,
                     zk_presence_path):
    """Node initialization. Should only be done on a cold start.
    """
    try:
        new_node_info = sysinfo.node_info(tm_env, runtime)

        # Merging scheduler data with node_info data
        node_info = zkutils.get(zkclient, zk_server_path)
        node_info.update(new_node_info)
        _LOGGER.info('Registering node: %s: %s, %r', zk_server_path, hostname,
                     node_info)

        zkutils.update(zkclient, zk_server_path, node_info)
        host_acl = zkutils.make_host_acl(hostname, 'rwcda')
        _LOGGER.debug('host_acl: %r', host_acl)
        zkutils.put(zkclient,
                    zk_presence_path, {'seen': False},
                    acl=[host_acl],
                    ephemeral=True)

        # Invoke the local node initialization
        tm_env.initialize(node_info)

    except Exception:  # pylint: disable=W0703
        _LOGGER.exception('Node initialization failed')
        zkclient.stop()
コード例 #10
0
def create_server(zkclient, server_id, parent_id):
    """Creates server definition in Zookeeper."""
    server_node = z.path.server(server_id)
    server_acl = zkutils.make_host_acl(server_id, 'rwcd')

    zkutils.ensure_exists(zkclient, server_node, acl=[server_acl])

    # zkutils.get return dict/tuple if need_metadata is true.
    #
    # pylint: disable=R0204
    data = zkutils.get(zkclient, server_node)
    if parent_id:
        if not data:
            data = {'parent': parent_id}
        else:
            data['parent'] = parent_id

    _LOGGER.info('Creating server node %s with data %r and ACL %r',
                 server_node, data, server_acl)
    if zkutils.put(zkclient,
                   server_node,
                   data,
                   acl=[server_acl],
                   check_content=True):
        create_event(zkclient, 0, 'servers', [server_id])
コード例 #11
0
def update_server_features(zkclient, server_id, features):
    """Updates server features."""
    node = z.path.server(server_id)
    data = zkutils.get(zkclient, node)
    data['features'] = features

    if zkutils.update(zkclient, node, data, check_content=True):
        create_event(zkclient, 0, 'servers', [server_id])
コード例 #12
0
def update_server_parent(zkclient, server_id, parent_id):
    """Update server parent."""
    node = z.path.server(server_id)
    data = zkutils.get(zkclient, node)
    data['parent'] = parent_id

    if zkutils.update(zkclient, node, data, check_content=True):
        create_event(zkclient, 0, 'servers', [server_id])
コード例 #13
0
def update_server_attrs(zkclient, server_id, partition):
    """Updates server traits."""
    node = z.path.server(server_id)
    data = zkutils.get(zkclient, node)
    data['partition'] = partition

    if zkutils.update(zkclient, node, data, check_content=True):
        create_event(zkclient, 0, 'servers', [server_id])
コード例 #14
0
ファイル: masterapi.py プロジェクト: sattvic108/treadmill
def get_server(zkclient, server_id, placement=False):
    """Return server object."""
    data = zkutils.get(zkclient, z.path.server(server_id))
    if placement:
        placement_data = zkutils.get_default(zkclient,
                                             z.path.placement(server_id), {})
        data.update(placement_data)

    return data
コード例 #15
0
    def reload_server(self, servername):
        """Reload individual server."""
        _LOGGER.info('reloading server: %s', servername)
        if servername not in self.servers:
            # This server was never loaded.
            self.load_server(servername)
            return

        current_server = self.servers[servername]
        # Check if server is same
        try:
            data = zkutils.get(self.zkclient, z.path.server(servername))
            if not data:
                # The server is configured, but never reported it's capacity.
                self.remove_server(servername)
                return

            # TODO: need better error handling.
            assert 'parent' in data
            assert data['parent'] in self.buckets

            # TODO: seems like this is cut/paste code from load_server.
            label = data.get('partition')
            if not label:
                label = admin.DEFAULT_PARTITION
            up_since = data.get('up_since', time.time())

            partition = self.cell.partitions[label]
            server = scheduler.Server(
                servername,
                resources(data),
                valid_until=partition.valid_until(up_since),
                label=label,
                traits=data.get('traits', 0))

            parent = self.buckets[data['parent']]
            # TODO: assume that bucket topology is constant, e.g.
            #                rack can never change buiding. If this does not
            #                hold, comparing parents is not enough, need to
            #                compare recursively all the way up.
            if (current_server.is_same(server)
                    and current_server.parent == parent):
                # Nothing changed, no need to update anything.
                _LOGGER.info('server is same, keeping old.')
                current_server.valid_until = server.valid_until
            else:
                # Something changed - clear everything and re-register server
                # as new.
                _LOGGER.info('server modified, replacing.')
                self.remove_server(servername)
                self.load_server(servername)

        except kazoo.client.NoNodeError:
            self.remove_server(servername)
            _LOGGER.warn('Server node not found: %s', servername)
コード例 #16
0
def get_appmonitor(zkclient, monitor_id, raise_notfound=False):
    """Return app monitor given id."""
    try:
        data = zkutils.get(zkclient, z.path.appmonitor(monitor_id))
        data['_id'] = monitor_id
        return data
    except kazoo.client.NoNodeError:
        _LOGGER.info('App monitor does not exist: %s', monitor_id)
        if raise_notfound:
            raise
        else:
            return None
コード例 #17
0
    def load_partition(self, partition):
        """Load partition."""
        try:
            data = zkutils.get(self.zkclient, z.path.partition(partition))
            self.cell.partitions[partition] = scheduler.Partition(
                max_server_uptime=data.get('server_uptime'),
                max_lease=data.get('max_lease'),
                threshold=data.get('threshold'),
            )

        except kazoo.client.NoNodeError:
            _LOGGER.warn('Partition node not found: %s', partition)
コード例 #18
0
ファイル: masterapi.py プロジェクト: rlonstein-ms/treadmill
def update_server_capacity(zkclient, server_id,
                           memory=None, cpu=None, disk=None):
    """Update server capacity."""
    node = z.path.server(server_id)
    data = zkutils.get(zkclient, node)
    if memory:
        data['memory'] = memory
    if cpu:
        data['cpu'] = cpu
    if disk:
        data['disk'] = disk

    if zkutils.update(zkclient, node, data, check_content=True):
        create_event(zkclient, 0, 'servers', [server_id])
コード例 #19
0
    def _cache(self, zkclient, app, check_existing=False):
        """Read the manifest and placement data from Zk and store it as YAML in
        <cache>/<app>.

        :param ``str`` app:
            Instance name.
        :param ``bool`` check_existing:
            Whether to check if the file already exists and is up to date.
        """
        placement_node = z.path.placement(self._hostname, app)
        try:
            placement_data, placement_metadata = zkutils.get_with_metadata(
                zkclient, placement_node
            )
            placement_time = placement_metadata.ctime / 1000.0
        except kazoo.exceptions.NoNodeError:
            _LOGGER.info('Placement %s/%s not found', self._hostname, app)
            return

        manifest_file = os.path.join(self.tm_env.cache_dir, app)
        if check_existing:
            try:
                manifest_time = os.stat(manifest_file).st_ctime
            except FileNotFoundError:
                manifest_time = None

            if manifest_time and manifest_time >= placement_time:
                _LOGGER.info('%s is up to date', manifest_file)
                return

        app_node = z.path.scheduled(app)
        try:
            manifest = zkutils.get(zkclient, app_node)
            # TODO: need a function to parse instance id from name.
            manifest['task'] = app[app.index('#') + 1:]

            if placement_data is not None:
                manifest.update(placement_data)

            fs.write_safe(
                manifest_file,
                lambda f: yaml.dump(manifest, stream=f),
                prefix='.%s-' % app,
                mode='w',
                permission=0o644
            )
            _LOGGER.info('Created cache manifest: %s', manifest_file)

        except kazoo.exceptions.NoNodeError:
            _LOGGER.info('App %s not found', app)
コード例 #20
0
ファイル: masterapi.py プロジェクト: rlonstein-ms/treadmill
def get_appmonitor(zkclient, monitor_id,
                   raise_notfound=False, suspended_monitors=None):
    """Return app monitor given id."""
    try:
        data = zkutils.get(zkclient, z.path.appmonitor(monitor_id))
        data['_id'] = monitor_id
        if suspended_monitors is None:
            suspended_monitors = get_suspended_appmonitors(zkclient)
        data['suspend_until'] = suspended_monitors.get(monitor_id)
        return data
    except kazoo.client.NoNodeError:
        _LOGGER.info('App monitor does not exist: %s', monitor_id)
        if raise_notfound:
            raise
        else:
            return None
コード例 #21
0
    def load_server(self, servername, readonly=False):
        """Load individual server."""
        try:
            data = zkutils.get(self.zkclient, z.path.server(servername))
            if not data:
                # The server is configured, but never reported it's capacity.
                _LOGGER.info('No capacity detected: %s',
                             z.path.server(servername))
                return

            assert 'parent' in data
            parentname = data['parent']
            label = data.get('partition')
            if not label:
                # TODO: it will be better to have separate module for constants
                #       and avoid unnecessary cross imports.
                label = admin.DEFAULT_PARTITION
            up_since = data.get('up_since', int(time.time()))

            partition = self.cell.partitions[label]
            server = scheduler.Server(
                servername,
                resources(data),
                valid_until=partition.valid_until(up_since),
                label=label,
                traits=data.get('traits', 0))

            parent = self.buckets.get(parentname)
            if not parent:
                _LOGGER.warn('Server parent does not exist: %s/%s', servername,
                             parentname)
                return

            self.buckets[parentname].add_node(server)
            self.servers[servername] = server
            assert server.parent == self.buckets[parentname]

            if not readonly:
                zkutils.ensure_exists(self.zkclient,
                                      z.path.placement(servername),
                                      acl=[_SERVERS_ACL])

            self.adjust_server_state(servername, readonly)

        except kazoo.client.NoNodeError:
            _LOGGER.warn('Server node not found: %s', servername)
コード例 #22
0
ファイル: presence.py プロジェクト: cesaragarcia/treadmill
def register_server(zkclient, hostname, node_info):
    """Register server."""
    server_path = z.path.server(hostname)

    server_data = zkutils.get(zkclient, server_path)
    server_data.update(node_info)

    _LOGGER.info('Registering server %s: %r', hostname, server_data)

    zkutils.update(zkclient, server_path, server_data)

    host_acl = zkutils.make_host_acl(hostname, 'rwcda')
    return zkutils.put(zkclient,
                       z.path.server_presence(hostname + '#'), {'seen': False},
                       acl=[host_acl],
                       ephemeral=True,
                       sequence=True)
コード例 #23
0
ファイル: cell.py プロジェクト: trapexit/treadmill
    def upgrade(cell, ldap, ldap_search_base, batch, timeout, treadmill_root,
                continue_on_error, dry_run, force, servers):
        """Upgrade the supplied cell"""
        context.GLOBAL.ldap.url = ldap
        context.GLOBAL.ldap.search_base = ldap_search_base

        servers = []
        for server_list in servers:
            servers.extend(server_list.split(','))

        if not treadmill_root:
            admin_cell = admin.Cell(context.GLOBAL.ldap.conn)
            cell_info = admin_cell.get(cell)
            treadmill_root = cell_info.get('treadmill_root')

        _LOGGER.info('Treadmill root: %s', treadmill_root)
        digest = versionmgr.checksum_dir(treadmill_root).hexdigest()
        _LOGGER.info('Checksum: %s', digest)

        context.GLOBAL.resolve(cell)
        zkclient = context.GLOBAL.zk.conn

        if not servers:
            # pylint: disable=R0204
            servers = zkutils.get(zkclient, zkutils.SERVERS)

        if dry_run:
            failed = versionmgr.verify(zkclient, digest, servers)
        else:
            failed = versionmgr.upgrade(
                zkclient,
                digest,
                servers,
                batch,
                timeout,
                stop_on_error=(not continue_on_error),
                force_upgrade=force,
            )

        if not failed:
            _LOGGER.info('All servers are up to date.')
        else:
            _LOGGER.error('Upgrade failed.')

        utils.print_yaml(failed)
コード例 #24
0
ファイル: presence.py プロジェクト: cesaragarcia/treadmill
    def unregister_identity(self):
        """Register app identity."""
        identity_group = self.manifest.get('identity_group')

        # If identity_group is not set or set to None, nothing to register.
        if not identity_group:
            return

        identity = self.manifest.get('identity', _INVALID_IDENTITY)

        _LOGGER.info('Unregister identity: %s, %s', identity_group, identity)
        path = z.path.identity_group(identity_group, str(identity))
        try:
            data = zkutils.get(self.zkclient, path)
            if data['host'] == self.hostname:
                zkutils.ensure_deleted(self.zkclient, path)
        except kazoo.client.NoNodeError:
            _LOGGER.info('identity node %s does not exist.', path)
コード例 #25
0
ファイル: init.py プロジェクト: gaocegege/treadmill
def _node_initialize(tm_env, zkclient, hostname, zk_server_path,
                     zk_presence_path):
    """Node initialization. Should only be done on a cold start.
    """
    tm_env.initialize()
    new_node_info = sysinfo.node_info(tm_env)

    # XXX: Why a get/update dance instead of set
    node_info = zkutils.get(zkclient, zk_server_path)
    node_info.update(new_node_info)
    _LOGGER.info('Registering node: %s: %s, %r', zk_server_path, hostname,
                 node_info)

    zkutils.update(zkclient, zk_server_path, node_info)
    host_acl = zkutils.make_host_acl(hostname, 'rwcda')
    _LOGGER.debug('host_acl: %r', host_acl)
    zkutils.put(zkclient,
                zk_presence_path, {'seen': False},
                acl=[host_acl],
                ephemeral=True)
コード例 #26
0
ファイル: masterapi.py プロジェクト: rlonstein-ms/treadmill
def create_server(zkclient, server_id, parent_id, partition):
    """Creates server definition in Zookeeper."""
    server_node = z.path.server(server_id)
    server_acl = zkclient.make_host_acl(server_id, 'rwcd')

    zkutils.ensure_exists(zkclient, server_node, acl=[server_acl])

    data = zkutils.get(zkclient, server_node)
    if not data:
        data = {}
    data.update({
        'parent': parent_id,
        'partition': partition,
    })

    _LOGGER.info('Creating server node %s with data %r and ACL %r',
                 server_node, data, server_acl)
    if zkutils.put(zkclient, server_node, data,
                   acl=[server_acl], check_content=True):
        create_event(zkclient, 0, 'servers', [server_id])
コード例 #27
0
def _list_server_blackouts(zkclient, fmt):
    """List server blackouts."""
    # List currently blacked out nodes.
    blacked_out = []
    try:
        blacked_out_nodes = zkclient.get_children(z.BLACKEDOUT_SERVERS)
        for server in blacked_out_nodes:
            node_path = z.path.blackedout_server(server)
            data, metadata = zkutils.get(zkclient,
                                         node_path,
                                         need_metadata=True)
            blacked_out.append((metadata.created, server, data))

    except kazoo.client.NoNodeError:
        pass

    # [%t] %h %r will be printed as below
    # [Thu, 05 May 2016 02:59:58 +0000] <hostname> -
    mapping = {'t': 0, 'h': 1, 'r': 2}
    formatter = _gen_formatter(mapping, fmt)

    for when, server, reason in reversed(sorted(blacked_out)):
        reason = '-' if reason is None else reason
        print(formatter.format(utils.strftime_utc(when), server, reason))
コード例 #28
0
ファイル: zkbackend.py プロジェクト: vrautela/treadmill
 def get(self, path):
     """Return stored object given path."""
     try:
         return zkutils.get(self.zkclient, path)
     except kazoo.client.NoNodeError:
         raise backend.ObjectNotFoundError()
コード例 #29
0
def get_identity_group(zkclient, ident_group_id):
    """Return app monitor given id."""
    data = zkutils.get(zkclient, z.path.identity_group(ident_group_id))
    data['_id'] = ident_group_id
    return data
コード例 #30
0
def get_suspended_appmonitors(zkclient):
    """Return appmonitor suspension information."""
    # we avoid returning None
    return zkutils.get(zkclient, z.path.appmonitor()) or {}