Example #1
0
def cleanup_trace(zkclient, batch_size, expires_after):
    """Move expired traces into history folder, compressed as sqlite db.
    """
    scheduled = zkclient.get_children(z.SCHEDULED)
    shards = zkclient.get_children(z.TRACE)
    traces = []
    for shard in shards:
        events = zkclient.get_children(z.path.trace_shard(shard))
        for event in events:
            instanceid, timestamp, _ = event.split(',', 2)
            timestamp = float(timestamp)
            if ((instanceid not in scheduled
                 and timestamp < time.time() - expires_after)):
                traces.append((timestamp, shard, event))

    # Sort traces from older to latest.
    traces.sort()

    for idx in range(0, len(traces), batch_size):
        # Take a slice of batch_size
        batch = traces[idx:idx + batch_size]
        if len(batch) < batch_size:
            _LOGGER.info('Traces: batch = %s, total = %s, exiting.',
                         batch_size, len(batch))
            break

        db_rows = [(z.join_zookeeper_path(z.TRACE, shard,
                                          event), timestamp, None,
                    z.join_zookeeper_path(z.TRACE, shard), event)
                   for timestamp, shard, event in batch]

        _upload_batch(zkclient, z.path.trace_history('trace.db.gzip-'),
                      'trace', db_rows)
Example #2
0
def _sync_collection(zkclient, entities, zkpath, match=None):
    """Sync ldap collection to Zookeeper.
    """
    _LOGGER.info('Sync: %s', zkpath)
    zkclient.ensure_path(zkpath)

    in_zk = zkclient.get_children(zkpath)

    to_sync = {}
    for entity in entities:
        name = entity.pop('_id')
        if match and not match(entity):
            _LOGGER.debug('Skip: %s', name)
            continue
        to_sync[name] = entity

    for to_del in set(in_zk) - set(to_sync):
        _LOGGER.info('Delete: %s', to_del)
        zkutils.ensure_deleted(zkclient, z.join_zookeeper_path(zkpath, to_del))

    # Add or update current app-groups
    for name, entity in to_sync.items():
        if zkutils.put(zkclient,
                       z.join_zookeeper_path(zkpath, name),
                       entity,
                       check_content=True):
            _LOGGER.info('Update: %s', name)
        else:
            _LOGGER.info('Up to date: %s', name)
Example #3
0
def _sync_collection(zkclient, entities, zkpath, match=None):
    """Syncs ldap collection to Zookeeper."""
    _LOGGER.info('Sync: %s', zkpath)

    zkclient.ensure_path(zkpath)

    in_zk = zkclient.get_children(zkpath)
    names = [entity['_id'] for entity in entities]

    for entity in entities:
        _remove_id(entity)

    for extra in set(in_zk) - set(names):
        _LOGGER.debug('Delete: %s', extra)
        zkutils.ensure_deleted(zkclient, z.join_zookeeper_path(zkpath, extra))

    # Add or update current app-groups
    for name, entity in zip(names, entities):
        zkname = name
        if match:
            zkname = match(name, entity)
            if not zkname:
                _LOGGER.debug('Skip: %s', name)
                continue

        if zkutils.put(zkclient, z.join_zookeeper_path(zkpath, zkname),
                       entity, check_content=True):
            _LOGGER.info('Update: %s', zkname)
        else:
            _LOGGER.info('Up to date: %s', zkname)
Example #4
0
    def _children_watch(self, zkpath, children, watch_data, on_add, on_del):
        """Callback invoked on children watch."""
        fpath = self.fpath(zkpath)
        filenames = set(
            map(os.path.basename, glob.glob(os.path.join(fpath, '*'))))
        children = set(children)

        for extra in filenames - children:
            _LOGGER.info('Delete: %s', extra)
            self.watches.discard(z.join_zookeeper_path(zkpath, extra))
            on_del(z.join_zookeeper_path(zkpath, extra))

        if zkpath not in self.processed_once:
            self.processed_once.add(zkpath)
            for common in filenames & children:
                _LOGGER.info('Common: %s', common)

                zknode = z.join_zookeeper_path(zkpath, common)
                if watch_data:
                    self.watches.add(zknode)

                on_add(zknode)

        for missing in children - filenames:
            _LOGGER.info('Add: %s', missing)

            zknode = z.join_zookeeper_path(zkpath, missing)
            if watch_data:
                self.watches.add(zknode)

            on_add(zknode)

        return True
Example #5
0
    def test_join_zookeeper_path(self):
        """Checks zookeeper path construction."""

        path = z.join_zookeeper_path('/root', 'node')
        self.assertEqual('/root/node', path)

        path = z.join_zookeeper_path('/root', 'node1', 'node2')
        self.assertEqual('/root/node1/node2', path)
 def zk_get(fullpath):
     """Mock the zkclient.get() method."""
     if fullpath.startswith(
             z.join_zookeeper_path(z.ENDPOINTS, 'proid_A', 'foo')):
         return (b'xxx:123', None)
     elif fullpath.startswith(
             z.join_zookeeper_path(z.ENDPOINTS, 'proid_B', 'bar')):
         return (b'yyy:987', None)
     else:
         raise ValueError(fullpath)
Example #7
0
    def _children_watch(self,
                        zkpath,
                        children,
                        watch_data,
                        on_add,
                        on_del,
                        cont_watch_predicate=None):
        """Callback invoked on children watch."""
        fpath = self.fpath(zkpath)

        sorted_children = sorted(children)
        sorted_filenames = sorted(
            map(os.path.basename, glob.glob(os.path.join(fpath, '*'))))

        add = []
        remove = []
        common = []

        self._filter_children_actions(sorted_children, sorted_filenames, add,
                                      remove, common)

        for node in remove:
            _LOGGER.info('Delete: %s', node)
            zknode = z.join_zookeeper_path(zkpath, node)
            self.watches.discard(zknode)
            on_del(zknode)

        if zkpath not in self.processed_once:
            self.processed_once.add(zkpath)
            for node in common:
                _LOGGER.info('Common: %s', node)

                zknode = z.join_zookeeper_path(zkpath, node)
                if watch_data:
                    self.watches.add(zknode)

                on_add(zknode)

        for node in add:
            _LOGGER.info('Add: %s', node)

            zknode = z.join_zookeeper_path(zkpath, node)
            if watch_data:
                self.watches.add(zknode)

            on_add(zknode)

        if cont_watch_predicate:
            return cont_watch_predicate(zkpath, sorted_children)

        return True
Example #8
0
    def refresh_zk(self, zknodes=None):
        """Parse data from Zookeeper nodes.

        NOTE: This is intended to be called with the output of a
        `:func:get_children` or in the callback of a `:class:ChildrenWatch`.
        If zknodes is None, get Zookeeper nodes first and then parse data.
        """
        if zknodes is None:
            try:
                zknodes = self._zkclient.get_children(self._zkpath)
            except kazoo.exceptions.NoNodeError:
                zknodes = []

        data = {}
        for node in zknodes:
            (name, chksum, seq) = node.split('#', 2)
            data.setdefault(name, []).append(
                ZkDataEntry(zname=z.join_zookeeper_path(self._zkpath, node),
                            chksum=chksum,
                            seq=int(seq)))
        for name in data:
            data[name].sort(
                key=lambda e: e.seq,  # Sort nodes by their sequence numbers
                reverse=True)

        self._zkdata = data
Example #9
0
def watch_task(zkclient, cell_state, scheduled, task):
    """Watch individual task."""
    task_node = z.join_zookeeper_path(z.TASKS, task)

    # Establish watch on task instances.

    @exc.exit_on_unhandled
    @zkclient.ChildrenWatch(task_node)
    def _watch_task_instances(instance_ids):

        instance = None
        for instance_id in instance_ids:
            instance = '#'.join([task, instance_id])

            # Either watch is established or data is acquired.
            if instance in cell_state.tasks:
                continue

            # On first load, optimize lookup by preloading state
            # of all scheduled instances.
            #
            # Once initial load is done, scheduled will be cleared.
            if scheduled:
                need_watch = instance in scheduled
            else:
                need_watch = zkclient.exists(z.path.scheduled(instance))

            if need_watch:
                watch_task_instance(zkclient, cell_state, instance)
            else:
                data = zkutils.get_default(zkclient, z.path.task(instance))
                cell_state.tasks[instance] = data

        return True
Example #10
0
def make_endpoint_watcher(zkclient, state, proid):
    """Make endpoint watcher function."""
    proid_instances = z.join_zookeeper_path(z.ENDPOINTS, proid)

    @zkclient.ChildrenWatch(proid_instances)
    @utils.exit_on_unhandled
    def _watch_instances(children):
        """Watch for proid instances."""

        # TODO: current implementation does nto support instances, so
        #       state from masters will be stored, but will be never displayed.
        current = set(state[proid].keys())
        target = set(children)

        for name in current - target:
            del state[proid][name]

        endpoints = dict()
        for name in target - current:
            try:
                endpoint_node = z.join_zookeeper_path(proid_instances, name)
                data, _metadata = zkclient.get(endpoint_node)
                endpoints[name] = data.decode()
            except kazoo.client.NoNodeError:
                pass

        state[proid].update(endpoints)
        return True

    return _watch_instances
Example #11
0
def prune_trace(zkclient, max_count):
    """Prune trace. Cleanup service (running/exited) events.
    """
    shards = zkclient.get_children(z.TRACE)
    for shard in shards:
        service_events = collections.Counter()
        events = zkclient.get_children(z.path.trace_shard(shard))
        for event in sorted(events, reverse=True):
            instanceid, ts, src, event_type, event_data = event.split(',')

            if event_type not in ('service_running', 'service_exited'):
                continue

            service_event = traceevents.AppTraceEvent.from_data(
                timestamp=ts,
                source=src,
                instanceid=instanceid,
                event_type=event_type,
                event_data=event_data,
            )
            if not service_event:
                continue

            uniqueid, service = service_event.uniqueid, service_event.service
            service_events[(instanceid, uniqueid, service)] += 1
            if service_events[(instanceid, uniqueid, service)] > max_count:
                path = z.join_zookeeper_path(z.TRACE, shard, event)
                _LOGGER.info('Pruning trace: %s', path)
                zkutils.with_retry(zkutils.ensure_deleted, zkclient, path)
Example #12
0
def prune_trace_evictions(zkclient, max_count):
    """Cleanup excessive trace events caused by evictions.
    """
    assert max_count > 0
    shards = zkclient.get_children(z.TRACE)
    for shard in shards:
        evictions = collections.Counter()
        events = zkclient.get_children(z.path.trace_shard(shard))
        for event in sorted(events, reverse=True):
            instanceid, ts, src, event_type, event_data = event.split(',')

            event_obj = traceevents.AppTraceEvent.from_data(
                timestamp=ts,
                source=src,
                instanceid=instanceid,
                event_type=event_type,
                event_data=event_data,
            )
            if not event_obj:
                continue

            # Leave pending/created events.
            if event_type == 'pending' and 'created' in event_obj.why:
                continue

            # Prune when number of evictions for an instance reached max_count.
            if evictions.get(instanceid, 0) >= max_count:
                path = z.join_zookeeper_path(z.TRACE, shard, event)
                _LOGGER.info('Pruning trace: %s', path)
                zkutils.with_retry(zkutils.ensure_deleted, zkclient, path)
            else:
                if ((event_type in ['pending', 'scheduled'] and
                     event_obj.why == 'evicted')):
                    evictions[instanceid] += 1
Example #13
0
def _cleanup(zkclient, path, max_count):
    """Cleanup old nodes given path.
    """
    nodes = sorted(zkclient.get_children(path))
    extra = len(nodes) - max_count
    if extra > 0:
        for node in nodes[0:extra]:
            zkutils.ensure_deleted(zkclient, z.join_zookeeper_path(path, node))
Example #14
0
    def resolve_endpoint(self, endpoint):
        """Resolves a endpoint to a hostport"""
        fullpath = z.join_zookeeper_path(z.ENDPOINTS, self.prefix, endpoint)
        try:
            hostport, _metadata = self.zkclient.get(fullpath)
        except kazoo.exceptions.NoNodeError:
            hostport = None

        return hostport
Example #15
0
        def _is_up(self, server, server_endpoints):
            """Nodeinfo is up for server: {server}."""
            hostport, _metadata = zkclient.get(z.join_zookeeper_path(
                z.ENDPOINTS, 'root', server_endpoints[server]))

            host, port = hostport.split(':')

            url = 'http://%s:%s' % (host, port)
            print(url)
            self.assertTrue(chk.connect(host, port))
            self.assertTrue(chk.url_check(url))
Example #16
0
def ensure_deleted(zkclient, path, recursive=True):
    """Deletes the node if it exists."""
    try:
        _LOGGER.debug('Deleting %s', path)
        if recursive:
            for child in zkclient.get_children(path):
                ensure_deleted(zkclient, z.join_zookeeper_path(path, child))

        zkclient.delete(path)
    except kazoo.client.NoNodeError:
        _LOGGER.debug('Node %s does not exist.', path)
Example #17
0
 def invoke_callback(self, path, node):
     """Invokes callback for each new node."""
     try:
         fullpath = z.join_zookeeper_path(path, node)
         data = None
         stat = None
         if self.include_data:
             data, stat = self.zkclient.get(fullpath)
         self.func(fullpath, data, stat)
     except Exception:  # pylint: disable=W0703
         _LOGGER.critical('Unexpected error: %s', sys.exc_info()[0])
Example #18
0
def cleanup_server_trace(zkclient, batch_size):
    """Move expired traces into history folder, compressed as sqlite db.
    """
    num_events = uploaded_events = 0

    while True:
        batch = []
        num_events = 0
        shards = zkclient.get_children(z.SERVER_TRACE)
        for shard in shards:
            traces = []
            events = zkclient.get_children(z.path.server_trace_shard(shard))
            num_events += len(events)
            for event in events:
                servername, timestamp, _ = event.split(',', 2)
                timestamp = float(timestamp)
                traces.append((timestamp, shard, event))
            # Sort traces from older to latest.
            traces.sort()
            # Keep batch_size traces ordered by timestamp.
            batch = [val for val in heapq.merge(batch, traces)][:batch_size]

        if len(batch) < batch_size:
            _LOGGER.info('Traces: batch = %s, total = %s, exiting.',
                         batch_size, len(batch))
            break

        db_rows = [(z.join_zookeeper_path(z.SERVER_TRACE, shard,
                                          event), timestamp, None,
                    z.join_zookeeper_path(z.SERVER_TRACE, shard), event)
                   for timestamp, shard, event in batch]
        _zk.upload_batch(zkclient,
                         z.path.server_trace_history('server_trace.db.gzip-'),
                         SERVER_TRACE_SOW_TABLE, db_rows)
        uploaded_events += len(db_rows)

    _LOGGER.info('Cleaned up %s server trace events, live events: %s',
                 uploaded_events, num_events)
Example #19
0
def snapshot(zkclient, root, zkpath='/'):
    """Create a snapshot of ZK state to the filesystem."""
    if zkpath in _ZK_BLACKLIST:
        return

    _LOGGER.debug('snapshot %s', zkpath)

    fpath = _fpath(root, zkpath)
    fs.mkdir_safe(os.path.dirname(fpath))

    data, stat = zkclient.get(zkpath)
    _write_data(fpath, data, stat)

    children = zkclient.get_children(zkpath)
    for node in children:
        zknode = z.join_zookeeper_path(zkpath, node)
        snapshot(zkclient, root, zknode)
Example #20
0
    def get_endpoints_zk(self, watch_cb=None):
        """Returns the current list of endpoints."""
        endpoints_path = z.join_zookeeper_path(z.ENDPOINTS, self.prefix)
        full_pattern = ':'.join([self.pattern, '*', self.endpoint])
        try:
            endpoints = self.zkclient.get_children(
                endpoints_path, watch=watch_cb
            )

            match = set([endpoint for endpoint in endpoints
                         if fnmatch.fnmatch(endpoint, full_pattern)])
        except kazoo.exceptions.NoNodeError:
            if watch_cb:
                self.zkclient.exists(endpoints_path, watch=watch_cb)
            match = set()

        return match
Example #21
0
def _instance_healthy(instance, endpoints):
    """helper to see if instance is healthy (connectable)
    """
    (proid, instance_name) = instance.split('.', 1)
    instance_endpoints = [
        val for val in endpoints
        if val.startswith(instance_name)
    ]

    zkclient = context.GLOBAL.zk.conn
    for endpoint in instance_endpoints:
        fullpath = z.join_zookeeper_path(z.ENDPOINTS, proid, endpoint)
        hostport, _metadata = zkclient.get(fullpath)
        (host, port) = hostport.decode().split(':')
        if not checkout.connect(host, port):
            return False

    return True
Example #22
0
    def refresh_zk(self, zknodes):
        """Parse data from Zookeeper nodes.

        NOTE: This is intended to be called with the output of a
        `:func:get_children` or in the callback of a `:class:ChildrenWatch`.
        """
        data = {}
        for node in zknodes:
            (name, chksum, seq) = node.split('#', 2)
            data.setdefault(name, []).append(
                ZkDataEntry(zname=z.join_zookeeper_path(self._zkpath, node),
                            chksum=chksum,
                            seq=int(seq)))
        for name in data:
            data[name].sort(
                key=lambda e: e.seq,  # Sort nodes by their sequence numbers
                reverse=True)

        self._zkdata = data
Example #23
0
    def _watch_instances(children):
        """Watch for proid instances."""

        current = set(state[proid].keys())
        target = set(children)

        for name in current - target:
            del state[proid][name]

        endpoints = dict()
        for name in target - current:
            try:
                endpoint_node = z.join_zookeeper_path(proid_instances, name)
                data, _metadata = zkclient.get(endpoint_node)
                endpoints[name] = data.decode()
            except kazoo.client.NoNodeError:
                pass

        state[proid].update(endpoints)
        return True
Example #24
0
    def _watch_instances(children):
        """Watch for proid instances."""

        # TODO: current implementation does nto support instances, so
        #       state from masters will be stored, but will be never displayed.
        current = set(state[proid].keys())
        target = set(children)

        for name in current - target:
            del state[proid][name]

        endpoints = dict()
        for name in target - current:
            try:
                endpoint_node = z.join_zookeeper_path(proid_instances, name)
                data, _metadata = zkclient.get(endpoint_node)
                endpoints[name] = data.decode()
            except kazoo.client.NoNodeError:
                pass

        state[proid].update(endpoints)
        return True
Example #25
0
    def run(no_lock, proid, root):
        """Run Treadmill DNS endpoint engine.
        """
        zkclient = context.GLOBAL.zk.conn

        zkendpointpath = z.join_zookeeper_path(z.ENDPOINTS, proid)
        zkclient.ensure_path(zkendpointpath)
        zk2fs_endpointpath = '{}{}'.format(root, zkendpointpath)

        if not os.path.isabs(zk2fs_endpointpath):
            _LOGGER.error('Invalid path: %s', zk2fs_endpointpath)
            sys.exit(1)

        if no_lock:
            _do_watch(zkclient=zkclient,
                      zkfs_dir=zk2fs_endpointpath)
        else:
            lock = zkutils.make_lock(
                zkclient, z.path.election(__name__)
            )
            _LOGGER.info('Waiting for leader lock.')
            with lock:
                _do_watch(zkclient=zkclient,
                          zkfs_dir=zk2fs_endpointpath)
Example #26
0
def _get_endpoints(proid):
    """get all endpoints of a proid
    """
    zkclient = context.GLOBAL.zk.conn
    endpoint_path = z.join_zookeeper_path(z.ENDPOINTS, proid)
    return zkclient.get_children(endpoint_path)