Ejemplo n.º 1
0
    def load_schedule(self):
        """Run scheduler first time and update scheduled data."""
        placement = self.cell.schedule()

        for servername, server in self.cell.members().items():
            placement_node = z.path.placement(servername)
            zkutils.ensure_exists(self.zkclient,
                                  placement_node,
                                  acl=[_SERVERS_ACL])

            current = set(self.zkclient.get_children(placement_node))
            correct = set(server.apps.keys())

            for app in current - correct:
                _LOGGER.info('Unscheduling: %s - %s', servername, app)
                zkutils.ensure_deleted(self.zkclient,
                                       os.path.join(placement_node, app))
            for app in correct - current:
                _LOGGER.info('Scheduling: %s - %s,%s', servername, app,
                             self.cell.apps[app].identity)

                placement_data = self._placement_data(app)
                zkutils.put(self.zkclient,
                            os.path.join(placement_node, app),
                            placement_data,
                            acl=[_SERVERS_ACL])

                self._update_task(app, servername, why=None)

        # Store latest placement as reference.
        zkutils.put(self.zkclient, z.path.placement(), placement)
        self.up_to_date = True
Ejemplo n.º 2
0
 def test_put(self):
     """Tests updating/creating node content."""
     client = kazoo.client.KazooClient()
     zkutils.put(client, '/foo/bar')
     kazoo.client.KazooClient.create.assert_called_with(
         '/foo/bar', b'', acl=mock.ANY, makepath=True,
         sequence=False, ephemeral=False)
Ejemplo n.º 3
0
def _node_initialize(tm_env, runtime, zkclient, hostname,
                     zk_server_path, zk_presence_path):
    """Node initialization. Should only be done on a cold start.
    """
    try:
        new_node_info = sysinfo.node_info(tm_env, runtime)

        traitz = zkutils.get(zkclient, z.path.traits())
        new_node_info['traits'] = traits.detect(traitz)

        # Merging scheduler data with node_info data
        node_info = zkutils.get(zkclient, zk_server_path)
        node_info.update(new_node_info)
        _LOGGER.info('Registering node: %s: %s, %r',
                     zk_server_path, hostname, node_info)

        zkutils.update(zkclient, zk_server_path, node_info)
        host_acl = zkutils.make_host_acl(hostname, 'rwcda')
        _LOGGER.debug('host_acl: %r', host_acl)
        zkutils.put(zkclient,
                    zk_presence_path, {'seen': False},
                    acl=[host_acl],
                    ephemeral=True)

        # TODO: Fix the network initialization. Then the below can be part of
        # appenv.initialize()
        if os.name == 'posix':
            # Flush all rules in iptables nat and mangle tables (it is assumed
            # that none but Treadmill manages these tables) and bulk load all
            # the Treadmill static rules
            iptables.initialize(node_info['network']['external_ip'])

    except Exception:  # pylint: disable=W0703
        _LOGGER.exception('Node initialization failed')
        zkclient.stop()
Ejemplo n.º 4
0
def create_endpoint_file(approot, port, appname, endpoint):
    """Create and link local endpoint file"""
    hostport = '%s:%s' % (sysinfo.hostname(), port)
    zkclinet = context.GLOBAL.zk.conn

    endpoint_proid_path = z.path.endpoint_proid(appname)
    acl = zkclinet.make_servers_acl()
    _LOGGER.info('Ensuring %s exists with ACL %r', endpoint_proid_path, acl)
    zkutils.ensure_exists(zkclinet, endpoint_proid_path, acl=[acl])

    endpoint_path = z.path.endpoint(appname, 'tcp', endpoint)
    _LOGGER.info('Registering %s %s', endpoint_path, hostport)

    # Need to delete/create endpoints for the disovery to pick it up in
    # case of master restart.
    zkutils.ensure_deleted(zkclinet, endpoint_path)
    time.sleep(5)
    zkutils.put(zkclinet, endpoint_path, hostport)

    tm_env = appenv.AppEnvironment(approot)
    endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir)
    endpoints_mgr.unlink_all(appname=appname, endpoint=endpoint, proto='tcp')
    endpoints_mgr.create_spec(
        appname=appname,
        endpoint=endpoint,
        proto='tcp',
        real_port=port,
        pid=os.getpid(),
        port=port,
        owner='/proc/{}'.format(os.getpid()),
    )
Ejemplo n.º 5
0
def _node_initialize(tm_env, runtime, zkclient, hostname, zk_server_path,
                     zk_presence_path):
    """Node initialization. Should only be done on a cold start.
    """
    try:
        new_node_info = sysinfo.node_info(tm_env, runtime)

        # Merging scheduler data with node_info data
        node_info = zkutils.get(zkclient, zk_server_path)
        node_info.update(new_node_info)
        _LOGGER.info('Registering node: %s: %s, %r', zk_server_path, hostname,
                     node_info)

        zkutils.update(zkclient, zk_server_path, node_info)
        host_acl = zkutils.make_host_acl(hostname, 'rwcda')
        _LOGGER.debug('host_acl: %r', host_acl)
        zkutils.put(zkclient,
                    zk_presence_path, {'seen': False},
                    acl=[host_acl],
                    ephemeral=True)

        # Invoke the local node initialization
        tm_env.initialize(node_info)

    except Exception:  # pylint: disable=W0703
        _LOGGER.exception('Node initialization failed')
        zkclient.stop()
Ejemplo n.º 6
0
def _save_appgroup_lookup(zkclient, db_file, proid, digest):
    """Save appgroup lookup to Zookeeper."""
    with io.open(db_file, 'rb') as f:
        zkutils.put(zkclient, z.path.appgroup_lookup(proid, digest),
                    f.read())

    _remove_extra_appgroup_lookup(zkclient, proid, digest)
Ejemplo n.º 7
0
def create_bucket(zkclient, bucket_id, parent_id, traits=0):
    """Creates bucket definition in Zookeeper."""
    data = {
        'traits': traits,
        'parent': parent_id
    }
    zkutils.put(zkclient, z.path.bucket(bucket_id), data, check_content=True)
    create_event(zkclient, 0, 'buckets', None)
Ejemplo n.º 8
0
def _save_version(zkclient, hostname, version):
    """Save server version data to ZK.
    """
    node_path = z.path.version_history(hostname)
    versions = zkutils.get_default(zkclient, node_path)
    if not versions:
        versions = []
    versions.insert(0, version)
    zkutils.put(zkclient, node_path, versions[0:_MAX_VERSIONS])
Ejemplo n.º 9
0
def update_appmonitor(zkclient, monitor_id, count):
    """Configures app monitor."""
    node = z.path.appmonitor(monitor_id)
    data = {'count': count}
    zkutils.put(zkclient, node, data, check_content=True)

    # return data directly. As check_content=True, we believe data is correct
    data['_id'] = monitor_id
    return data
Ejemplo n.º 10
0
def _sync_allocations(zkclient, allocations):
    """Syncronize allocations."""
    filtered = []
    for alloc in allocations:
        _LOGGER.info('Sync allocation: %s', alloc)
        name, _cell = alloc['_id'].rsplit('/', 1)
        alloc['name'] = name
        filtered.append(alloc)

    zkutils.put(zkclient, z.path.allocation(), filtered, check_content=True)
Ejemplo n.º 11
0
 def _publish(self):
     """Publish updated discovery info to Zookeeper."""
     _LOGGER.info('Publishing discovery info')
     state = list(sorted(self.state))
     if self.instance:
         instance = '#'.join([self.hostname, self.instance])
     else:
         instance = self.hostname
     zkutils.put(self.zkclient, z.path.discovery(instance),
                 state,
                 ephemeral=True, acl=[self.node_acl])
Ejemplo n.º 12
0
 def _publish(self, result):
     """Publish network info to Zookeeper."""
     if self.instance:
         instance = '#'.join([self.hostname, self.instance])
     else:
         instance = self.hostname
     zkutils.put(self.zkclient,
                 z.path.discovery_state(instance),
                 result,
                 ephemeral=True,
                 acl=[self.node_acl])
Ejemplo n.º 13
0
    def test_put_check_content(self):
        """Verifies put/update with check_content=True."""
        treadmill.zkutils.ZkClient.create.side_effect = (
            kazoo.client.NodeExistsError)
        treadmill.zkutils.ZkClient.get.return_value = (b'aaa', {})
        zkclient = treadmill.zkutils.ZkClient()
        zkutils.put(zkclient, '/a', 'aaa', check_content=True)
        self.assertFalse(treadmill.zkutils.ZkClient.set.called)

        zkutils.put(zkclient, '/a', 'bbb', check_content=True)
        treadmill.zkutils.ZkClient.set.assert_called_with('/a', b'bbb')
Ejemplo n.º 14
0
def _register_endpoint(zkclient, port):
    """Register policy server endpoint in Zookeeper."""
    hostname = sysinfo.hostname()
    zkclient.ensure_path(z.path.warpgate())

    node_path = z.path.warpgate('%s:%s' % (hostname, port))
    _LOGGER.info('registering locker: %s', node_path)
    if zkclient.exists(node_path):
        _LOGGER.info('removing previous node %s', node_path)
        zkutils.ensure_deleted(zkclient, node_path)

    zkutils.put(zkclient, node_path, {}, acl=None, ephemeral=True)
Ejemplo n.º 15
0
    def register_endpoint(self, port):
        """Register ticket locker endpoint in Zookeeper."""
        hostname = sysinfo.hostname()
        self.zkclient.ensure_path(z.TICKET_LOCKER)

        node_path = z.path.ticket_locker('%s:%s' % (hostname, port))
        _LOGGER.info('registering locker: %s', node_path)
        if self.zkclient.exists(node_path):
            _LOGGER.info('removing previous node %s', node_path)
            zkutils.ensure_deleted(self.zkclient, node_path)

        zkutils.put(self.zkclient, node_path, {}, acl=None, ephemeral=True)
Ejemplo n.º 16
0
    def test_put_existing(self):
        """Test update content of existing node."""
        def raise_exists(*_args, **_kwargs):
            """zk.create side effect, raising appropriate exception."""
            raise kazoo.client.NodeExistsError()

        client = treadmill.zkutils.ZkClient()
        treadmill.zkutils.ZkClient.create.side_effect = raise_exists
        zkutils.put(client, '/foo/bar')
        treadmill.zkutils.ZkClient.set.assert_called_with('/foo/bar', b'')
        treadmill.zkutils.ZkClient.set_acls.assert_called_with(
            '/foo/bar', mock.ANY)
Ejemplo n.º 17
0
    def test_put_existing(self):
        """Test update content of existing node."""
        def raise_exists(*args_unused, **kwargs_unused):
            """zk.create side effect, raising appropriate exception."""
            raise kazoo.client.NodeExistsError()

        client = kazoo.client.KazooClient()
        kazoo.client.KazooClient.create.side_effect = raise_exists
        zkutils.put(client, '/foo/bar')
        kazoo.client.KazooClient.set.assert_called_with('/foo/bar', '')
        kazoo.client.KazooClient.set_acls.assert_called_with(
            '/foo/bar', mock.ANY)
Ejemplo n.º 18
0
    def reschedule(self):
        """Run scheduler and adjust placement."""
        placement = self.cell.schedule()

        # Filter out placement records where nothing changed.
        changed_placement = [
            (app, before, exp_before, after, exp_after)
            for app, before, exp_before, after, exp_after in placement
            if before != after or exp_before != exp_after
        ]

        # We run two loops. First - remove all old placement, before creating
        # any new ones. This ensures that in the event of loop interruption
        # for anyreason (like Zookeeper connection lost or master restart)
        # there are no duplicate placements.
        for app, before, exp_before, after, exp_after in changed_placement:
            if before and before != after:
                _LOGGER.info('Unscheduling: %s - %s', before, app)
                zkutils.ensure_deleted(self.zkclient,
                                       z.path.placement(before, app))

        for app, before, exp_before, after, exp_after in changed_placement:
            placement_data = self._placement_data(app)

            why = ''
            if before is not None:
                if (before not in self.servers
                        or self.servers[before].state == scheduler.State.down):
                    why = '{server}:down'.format(server=before)
                else:
                    # TODO: it will be nice to put app utilization at the time
                    #       of eviction, but this info is not readily
                    #       available yet in the scheduler.
                    why = 'evicted'

            if after:
                _LOGGER.info('Scheduling: %s - %s,%s, expires at: %s', after,
                             app, self.cell.apps[app].identity, exp_after)

                zkutils.put(self.zkclient,
                            z.path.placement(after, app),
                            placement_data,
                            acl=[_SERVERS_ACL])
                self._update_task(app, after, why=why)
            else:
                self._update_task(app, None, why=why)

        self._unschedule_evicted()

        # Store latest placement as reference.
        zkutils.put(self.zkclient, z.path.placement(), placement)
        self.up_to_date = True
Ejemplo n.º 19
0
def update_allocations(zkclient, allocations):
    """Updates identity group count."""
    if zkutils.put(zkclient,
                   z.path.allocation(),
                   allocations,
                   check_content=True):
        create_event(zkclient, 0, 'allocations', None)
Ejemplo n.º 20
0
def _sync_partitions(zkclient, entities):
    """Syncs partitions to Zookeeper.
    """
    _LOGGER.info('Sync: %s', z.path.partition())

    zkclient.ensure_path(z.path.partition())

    in_zk = zkclient.get_children(z.path.partition())
    names = [entity['_id'] for entity in entities]

    for extra in set(in_zk) - set(names):
        _LOGGER.debug('Delete: %s', extra)
        zkutils.ensure_deleted(zkclient, z.path.partition(extra))

    # Add or update current partitions
    for entity in entities:
        zkname = entity['_id']

        if 'reboot-schedule' in entity:
            try:
                entity['reboot-schedule'] = utils.reboot_schedule(
                    entity['reboot-schedule'])
            except ValueError:
                _LOGGER.info('Invalid reboot schedule, ignoring.')

        if zkutils.put(zkclient,
                       z.path.partition(zkname),
                       entity,
                       check_content=True):
            _LOGGER.info('Update: %s', zkname)
        else:
            _LOGGER.info('Up to date: %s', zkname)
Ejemplo n.º 21
0
def create_apps(zkclient, app_id, app, count):
    """Schedules new apps."""
    instance_ids = []
    acl = zkutils.make_role_acl('servers', 'rwcd')
    for _idx in range(0, count):
        node_path = zkutils.put(zkclient,
                                _app_node(app_id, existing=False),
                                app,
                                sequence=True,
                                acl=[acl])
        instance_id = os.path.basename(node_path)

        # Create task for the app, and put it in pending state.
        # TODO: probably need to create PendingEvent and use to_data method.
        task_node = z.path.trace(
            instance_id, '{time},{hostname},pending,{data}'.format(
                time=time.time(), hostname=sysinfo.hostname(), data='created'))
        try:
            zkclient.create(task_node, b'', acl=[_SERVERS_ACL], makepath=True)
        except kazoo.client.NodeExistsError:
            pass

        instance_ids.append(instance_id)

    return instance_ids
Ejemplo n.º 22
0
def create_server(zkclient, server_id, parent_id):
    """Creates server definition in Zookeeper."""
    server_node = z.path.server(server_id)
    server_acl = zkutils.make_host_acl(server_id, 'rwcd')

    zkutils.ensure_exists(zkclient, server_node, acl=[server_acl])

    # zkutils.get return dict/tuple if need_metadata is true.
    #
    # pylint: disable=R0204
    data = zkutils.get(zkclient, server_node)
    if parent_id:
        if not data:
            data = {'parent': parent_id}
        else:
            data['parent'] = parent_id

    _LOGGER.info('Creating server node %s with data %r and ACL %r',
                 server_node, data, server_acl)
    if zkutils.put(zkclient,
                   server_node,
                   data,
                   acl=[server_acl],
                   check_content=True):
        create_event(zkclient, 0, 'servers', [server_id])
Ejemplo n.º 23
0
def sync_partitions():
    """Syncs partitions to Zookeeper.
    """
    _LOGGER.info('Sync: partitions.')
    zkclient = context.GLOBAL.zk.conn

    admin_cell = admin.Cell(context.GLOBAL.ldap.conn)
    partitions = admin_cell.partitions(context.GLOBAL.cell)

    zkclient.ensure_path(z.path.partition())

    in_zk = zkclient.get_children(z.path.partition())
    names = [partition['_id'] for partition in partitions]

    for extra in set(in_zk) - set(names):
        _LOGGER.debug('Delete: %s', extra)
        zkutils.ensure_deleted(zkclient, z.path.partition(extra))

    # Add or update current partitions
    for partition in partitions:
        zkname = partition['_id']

        if 'reboot-schedule' in partition:
            try:
                partition['reboot-schedule'] = utils.reboot_schedule(
                    partition['reboot-schedule']
                )
            except ValueError:
                _LOGGER.info('Invalid reboot schedule, ignoring.')

        if zkutils.put(zkclient, z.path.partition(zkname),
                       partition, check_content=True):
            _LOGGER.info('Update: %s', zkname)
        else:
            _LOGGER.info('Up to date: %s', zkname)
Ejemplo n.º 24
0
def _sync_collection(zkclient, entities, zkpath, match=None):
    """Syncs ldap collection to Zookeeper."""
    _LOGGER.info('Sync: %s', zkpath)

    zkclient.ensure_path(zkpath)

    in_zk = zkclient.get_children(zkpath)
    names = [entity['_id'] for entity in entities]

    for entity in entities:
        _remove_id(entity)

    for extra in set(in_zk) - set(names):
        _LOGGER.debug('Delete: %s', extra)
        zkutils.ensure_deleted(zkclient, z.join_zookeeper_path(zkpath, extra))

    # Add or update current app-groups
    for name, entity in zip(names, entities):
        zkname = name
        if match:
            zkname = match(name, entity)
            if not zkname:
                _LOGGER.debug('Skip: %s', name)
                continue

        if zkutils.put(zkclient, z.join_zookeeper_path(zkpath, zkname),
                       entity, check_content=True):
            _LOGGER.info('Update: %s', zkname)
        else:
            _LOGGER.info('Up to date: %s', zkname)
Ejemplo n.º 25
0
def _sync_collection(zkclient, entities, zkpath, match=None):
    """Sync ldap collection to Zookeeper.
    """
    _LOGGER.info('Sync: %s', zkpath)
    zkclient.ensure_path(zkpath)

    in_zk = zkclient.get_children(zkpath)

    to_sync = {}
    for entity in entities:
        name = entity.pop('_id')
        if match and not match(entity):
            _LOGGER.debug('Skip: %s', name)
            continue
        to_sync[name] = entity

    for to_del in set(in_zk) - set(to_sync):
        _LOGGER.info('Delete: %s', to_del)
        zkutils.ensure_deleted(zkclient, z.join_zookeeper_path(zkpath, to_del))

    # Add or update current app-groups
    for name, entity in to_sync.items():
        if zkutils.put(zkclient,
                       z.join_zookeeper_path(zkpath, name),
                       entity,
                       check_content=True):
            _LOGGER.info('Update: %s', name)
        else:
            _LOGGER.info('Up to date: %s', name)
Ejemplo n.º 26
0
def update_appmonitor(zkclient, monitor_id, count, policy=None):
    """Configures app monitor."""
    data = get_appmonitor(zkclient, monitor_id)
    if data is None:
        data = {}

    if count is not None:
        data['count'] = count
    if policy is not None:
        data['policy'] = policy

    node = z.path.appmonitor(monitor_id)
    zkutils.put(zkclient, node, data, check_content=True)

    # return data directly. As check_content=True, we believe data is correct
    data['_id'] = monitor_id
    return data
Ejemplo n.º 27
0
def create_event(zkclient, priority, event, payload):
    """Places event on the event queue."""
    assert 0 <= priority <= 100
    node_path = z.path.event(
        '%(priority)03d-%(event)s-' % {'priority': priority, 'event': event})

    return os.path.basename(
        zkutils.put(zkclient, node_path, payload, acl=[_SERVERS_ACL],
                    sequence=True))
Ejemplo n.º 28
0
def update_identity_group(zkclient, ident_group_id, count):
    """Updates identity group count."""
    node = z.path.identity_group(ident_group_id)
    data = {'count': count}
    if zkutils.put(zkclient,
                   node,
                   data,
                   check_content=True,
                   acl=[zkclient.make_servers_acl()]):
        create_event(zkclient, 0, 'identity_groups', [ident_group_id])
Ejemplo n.º 29
0
    def accept(tkt_spool_dir, port, appname, endpoint, use_v2):
        """Run ticket locker acceptor."""
        if port == 0:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.bind(('0.0.0.0', 0))
            port = sock.getsockname()[1]
            sock.close()

        hostname = sysinfo.hostname()
        hostport = '%s:%s' % (hostname, port)

        endpoint_proid_path = z.path.endpoint_proid(appname)
        _LOGGER.info('Ensuring %s exists with ACL %r', endpoint_proid_path,
                     _SERVERS_ACL)
        zkutils.ensure_exists(context.GLOBAL.zk.conn,
                              endpoint_proid_path,
                              acl=[_SERVERS_ACL])

        endpoint_path = z.path.endpoint(appname, 'tcp', endpoint)
        _LOGGER.info('Registering %s %s', endpoint_path, hostport)

        # Need to delete/create endpoints for the disovery to pick it up in
        # case of master restart.
        #
        # Unlile typical endpoint, we cannot make the node ephemeral as we
        # exec into tkt-recv.
        zkutils.ensure_deleted(context.GLOBAL.zk.conn, endpoint_path)
        time.sleep(5)
        zkutils.put(context.GLOBAL.zk.conn, endpoint_path, hostport)

        context.GLOBAL.zk.conn.stop()

        # Exec into tickets acceptor. If race condition will not allow it to
        # bind to the provided port, it will exit and registration will
        # happen again.
        if use_v2:
            subproc.safe_exec([
                'tkt_recv_v2', '-p{}'.format(port),
                '-d{}'.format(tkt_spool_dir)
            ])
        else:
            subproc.safe_exec(
                ['tkt_recv', 'tcp://*:{}'.format(port), tkt_spool_dir])
Ejemplo n.º 30
0
        def _publish_ticket(tkt_file):
            """Publish ticket details."""
            if tkt_file.startswith('.'):
                return

            if not any([tkt_file.endswith(realm) for realm in realms]):
                _LOGGER.info('Ignore tkt_file: %s', tkt_file)
                return

            try:
                tkt_details = subproc.check_output(
                    ['klist', '-5', '-e', '-f', tkt_file])
                tkt_node = z.path.tickets(os.path.basename(tkt_file),
                                          self.hostname)
                zkutils.put(self.zkclient,
                            tkt_node,
                            tkt_details,
                            ephemeral=True)
            except subproc.CalledProcessError:
                _LOGGER.warning('Unable to get tickets details.')