コード例 #1
0
ファイル: pingpong.py プロジェクト: ycaihua/twitter-commons
class PingPongServer(Observable):
    PING_DELAY = Amount(1, Time.SECONDS)

    def __init__(self, target_host, target_port, clock=time):
        self._clock = clock
        self._target = (target_host, target_port)
        self._pings = AtomicGauge('pings')
        self.metrics.register(self._pings)

    def send_request(self, endpoint, message, ttl):
        url_base = 'http://%s:%d' % self._target
        try:
            urllib2.urlopen('%s/%s/%s/%d' %
                            (url_base, endpoint, message, ttl)).read()
        except Exception as e:
            log.error('Failed to query %s: %s' % (url_base, e))

    @HttpServer.route('/ping/:message')
    @HttpServer.route('/ping/:message/:ttl')
    def ping(self, message, ttl=60):
        self._pings.increment()
        log.info('Got ping (ttl=%s): %s' % (message, ttl))
        ttl = int(ttl) - 1
        if ttl > 0:
            defer(partial(self.send_request, 'ping', message, ttl),
                  delay=self.PING_DELAY,
                  clock=self._clock)
コード例 #2
0
ファイル: client.py プロジェクト: CodeWarltz/commons
 def _init_metrics(self):
   self._session_expirations = AtomicGauge('session_expirations')
   self._connection_losses = AtomicGauge('connection_losses')
   self.metrics.register(self._session_expirations)
   self.metrics.register(self._connection_losses)
   self.metrics.register(LambdaGauge('session_id', lambda: self.session_id))
   self.metrics.register(LambdaGauge('live', lambda: int(self._live.is_set())))
コード例 #3
0
ファイル: kazoo_client.py プロジェクト: wcauchois/commons-1
class TwitterKazooClient(KazooClient, Observable):
  @classmethod
  def make(cls, *args, **kw):
    # TODO(jcohen): Consider removing verbose option entirely in favor of just using loglevel.
    verbose = kw.pop('verbose', False)
    async = kw.pop('async', True)

    if verbose:
      loglevel = kw.pop('loglevel', logging.INFO)
    else:
      loglevel = kw.pop('loglevel', sys.maxsize)

    logger = logging.getLogger('kazoo.devnull')
    logger.setLevel(loglevel)
    kw['logger'] = logger

    zk = cls(*args, **kw)
    if async:
      zk.start_async()
      zk.connecting.wait()
    else:
      zk.start()

    return zk

  def __init__(self, *args, **kw):
    if 'connection_retry' not in kw:
      # The default backoff delay limit in kazoo is 3600 seconds, which is generally
      # too conservative for our use cases.  If not supplied by the caller, provide
      # a backoff that will truncate earlier.
      kw['connection_retry'] = KazooRetry(
          max_delay=DEFAULT_RETRY_MAX_DELAY_SECS, **DEFAULT_RETRY_DICT)

    super(TwitterKazooClient, self).__init__(*args, **kw)
    self.connecting = threading.Event()
    self.__session_expirations = AtomicGauge('session_expirations')
    self.__connection_losses = AtomicGauge('connection_losses')
    self.__session_id = LambdaGauge('session_id', lambda: (self._session_id or 0))
    self.metrics.register(self.__session_expirations)
    self.metrics.register(self.__connection_losses)
    self.metrics.register(self.__session_id)
    self.add_listener(self._observable_listener)

  def _observable_listener(self, state):
    if state == KazooState.LOST:
      self.__session_expirations.increment()
    elif state == KazooState.SUSPENDED:
      self.__connection_losses.increment()

  def _session_callback(self, state):
    rc = super(TwitterKazooClient, self)._session_callback(state)
    if state == KeeperState.CONNECTING:
      self.connecting.set()
    return rc

  @property
  def live(self):
    return self._live
コード例 #4
0
 def _init_metrics(self):
     self._session_expirations = AtomicGauge('session_expirations')
     self._connection_losses = AtomicGauge('connection_losses')
     self.metrics.register(self._session_expirations)
     self.metrics.register(self._connection_losses)
     self.metrics.register(
         LambdaGauge('session_id', lambda: self.session_id))
     self.metrics.register(
         LambdaGauge('live', lambda: int(self._live.is_set())))
コード例 #5
0
def test_named_gauge_types():
  with pytest.raises(TypeError):
    ag = AtomicGauge(0)
  with pytest.raises(TypeError):
    ag = AtomicGauge(None)
  with pytest.raises(TypeError):
    lb = Label(None, 3)
  with pytest.raises(TypeError):
    mg = MutatorGauge({})
コード例 #6
0
ファイル: varz.py プロジェクト: CodeWarltz/commons
class StatusStats(Observable):
  def __init__(self):
    self._count = AtomicGauge('count')
    self._ns = AtomicGauge('total_ns')
    self.metrics.register(self._count)
    self.metrics.register(self._ns)

  def increment(self, ns):
    self._count.increment()
    self._ns.add(ns)
コード例 #7
0
def test_atomic_gauge_types():
  with pytest.raises(TypeError):
    ag = AtomicGauge('a', None)
  with pytest.raises(TypeError):
    ag = AtomicGauge('a', 'hello')
  ag = AtomicGauge('a', 23)
  with pytest.raises(TypeError):
    ag.add(None)
  with pytest.raises(TypeError):
    ag.add('hello')
コード例 #8
0
ファイル: kazoo_client.py プロジェクト: CodeWarltz/commons
 def __init__(self, *args, **kw):
   super(TwitterKazooClient, self).__init__(*args, **kw)
   self.connecting = threading.Event()
   self.__session_expirations = AtomicGauge('session_expirations')
   self.__connection_losses = AtomicGauge('connection_losses')
   self.__session_id = LambdaGauge('session_id', lambda: (self._session_id or 0))
   self.metrics.register(self.__session_expirations)
   self.metrics.register(self.__connection_losses)
   self.metrics.register(self.__session_id)
   self.add_listener(self._observable_listener)
コード例 #9
0
ファイル: varz.py プロジェクト: wcauchois/commons-1
class StatusStats(Observable):
    def __init__(self):
        self._count = AtomicGauge('count')
        self._ns = AtomicGauge('total_ns')
        self.metrics.register(self._count)
        self.metrics.register(self._ns)

    def increment(self, ns):
        self._count.increment()
        self._ns.add(ns)
コード例 #10
0
ファイル: kazoo_client.py プロジェクト: testvidya11/commons
 def __init__(self, *args, **kw):
     super(TwitterKazooClient, self).__init__(*args, **kw)
     self.connecting = threading.Event()
     self.__session_expirations = AtomicGauge('session_expirations')
     self.__connection_losses = AtomicGauge('connection_losses')
     self.__session_id = LambdaGauge('session_id', lambda:
                                     (self._session_id or 0))
     self.metrics.register(self.__session_expirations)
     self.metrics.register(self.__connection_losses)
     self.metrics.register(self.__session_id)
     self.add_listener(self._observable_listener)
コード例 #11
0
def test_atomic_gauge():
  ag = AtomicGauge('a')
  assert ag.name() == 'a'
  assert ag.read() == 0
  assert ag.add(-2) == -2
  ag = AtomicGauge('a')
  assert ag.decrement() == -1
コード例 #12
0
ファイル: kazoo_client.py プロジェクト: wcauchois/commons-1
  def __init__(self, *args, **kw):
    if 'connection_retry' not in kw:
      # The default backoff delay limit in kazoo is 3600 seconds, which is generally
      # too conservative for our use cases.  If not supplied by the caller, provide
      # a backoff that will truncate earlier.
      kw['connection_retry'] = KazooRetry(
          max_delay=DEFAULT_RETRY_MAX_DELAY_SECS, **DEFAULT_RETRY_DICT)

    super(TwitterKazooClient, self).__init__(*args, **kw)
    self.connecting = threading.Event()
    self.__session_expirations = AtomicGauge('session_expirations')
    self.__connection_losses = AtomicGauge('connection_losses')
    self.__session_id = LambdaGauge('session_id', lambda: (self._session_id or 0))
    self.metrics.register(self.__session_expirations)
    self.metrics.register(self.__connection_losses)
    self.metrics.register(self.__session_id)
    self.add_listener(self._observable_listener)
コード例 #13
0
    def _setup_metrics(self):
        self._metrics = self.Metrics()

        self._metrics.cluster_count = self.metrics.register(
            AtomicGauge('cluster_count', 0))

        # Total resources requested by the scheduler's clients. When a cluster is created its resources
        # are added to the total; when it's deleted its resources are subtracted from the total.
        # NOTE: These are 'requested' resources that are independent of resources offered by Mesos or
        # allocated to or used by Mysos tasks running on Mesos cluster.
        self._metrics.total_requested_cpus = self.metrics.register(
            MutatorGauge('total_requested_cpus', 0.))
        self._metrics.total_requested_mem_mb = self.metrics.register(
            MutatorGauge('total_requested_mem_mb', 0.))
        self._metrics.total_requested_disk_mb = self.metrics.register(
            MutatorGauge('total_requested_disk_mb', 0.))

        # 1: registered; 0: not registered.
        self._metrics.framework_registered = self.metrics.register(
            MutatorGauge('framework_registered', 0))

        self._startup_time = datetime.utcnow()
        self._metrics.uptime = self.metrics.register(
            LambdaGauge(
                'uptime', lambda:
                (datetime.utcnow() - self._startup_time).total_seconds()))

        # Counters for tasks in terminal states.
        self._metrics.tasks_lost = self.metrics.register(
            AtomicGauge('tasks_lost', 0))
        self._metrics.tasks_finished = self.metrics.register(
            AtomicGauge('tasks_finished', 0))
        self._metrics.tasks_failed = self.metrics.register(
            AtomicGauge('tasks_failed', 0))
        self._metrics.tasks_killed = self.metrics.register(
            AtomicGauge('tasks_killed', 0))

        self._metrics.resource_offers = self.metrics.register(
            AtomicGauge('resource_offers', 0))
        self._metrics.offers_incompatible_role = self.metrics.register(
            AtomicGauge('offers_incompatible_role', 0))

        self._metrics.tasks_launched = self.metrics.register(
            AtomicGauge('tasks_launched', 0))

        # 'offers_unused' are due to idle scheduler or resources don't fit, i.e.,
        # 'resource_offers' - 'tasks_launched' - 'offers_incompatible_role'.
        self._metrics.offers_unused = self.metrics.register(
            AtomicGauge('offers_unused', 0))
コード例 #14
0
ファイル: kazoo_client.py プロジェクト: testvidya11/commons
class TwitterKazooClient(KazooClient, Observable):
    @classmethod
    def make(cls, *args, **kw):
        verbose = kw.pop('verbose', False)
        async = kw.pop('async', True)
        if verbose is False:
            kw['logger'] = logging.Logger('kazoo.devnull', level=sys.maxsize)
        zk = cls(*args, **kw)
        if async:
            zk.start_async()
            zk.connecting.wait()
        else:
            zk.start()

        return zk

    def __init__(self, *args, **kw):
        super(TwitterKazooClient, self).__init__(*args, **kw)
        self.connecting = threading.Event()
        self.__session_expirations = AtomicGauge('session_expirations')
        self.__connection_losses = AtomicGauge('connection_losses')
        self.__session_id = LambdaGauge('session_id', lambda:
                                        (self._session_id or 0))
        self.metrics.register(self.__session_expirations)
        self.metrics.register(self.__connection_losses)
        self.metrics.register(self.__session_id)
        self.add_listener(self._observable_listener)

    def _observable_listener(self, state):
        if state == KazooState.LOST:
            self.__session_expirations.increment()
        elif state == KazooState.SUSPENDED:
            self.__connection_losses.increment()

    def _session_callback(self, state):
        rc = super(TwitterKazooClient, self)._session_callback(state)
        if state == KeeperState.CONNECTING:
            self.connecting.set()
        return rc

    @property
    def live(self):
        return self._live
コード例 #15
0
ファイル: kazoo_client.py プロジェクト: CodeWarltz/commons
class TwitterKazooClient(KazooClient, Observable):
  @classmethod
  def make(cls, *args, **kw):
    verbose = kw.pop('verbose', False)
    async = kw.pop('async', True)
    if verbose is False:
      kw['logger'] = logging.Logger('kazoo.devnull', level=sys.maxsize)
    zk = cls(*args, **kw)
    if async:
      zk.start_async()
      zk.connecting.wait()
    else:
      zk.start()

    return zk

  def __init__(self, *args, **kw):
    super(TwitterKazooClient, self).__init__(*args, **kw)
    self.connecting = threading.Event()
    self.__session_expirations = AtomicGauge('session_expirations')
    self.__connection_losses = AtomicGauge('connection_losses')
    self.__session_id = LambdaGauge('session_id', lambda: (self._session_id or 0))
    self.metrics.register(self.__session_expirations)
    self.metrics.register(self.__connection_losses)
    self.metrics.register(self.__session_id)
    self.add_listener(self._observable_listener)

  def _observable_listener(self, state):
    if state == KazooState.LOST:
      self.__session_expirations.increment()
    elif state == KazooState.SUSPENDED:
      self.__connection_losses.increment()

  def _session_callback(self, state):
    rc = super(TwitterKazooClient, self)._session_callback(state)
    if state == KeeperState.CONNECTING:
      self.connecting.set()
    return rc

  @property
  def live(self):
    return self._live
コード例 #16
0
ファイル: pingpong.py プロジェクト: EricCen/commons
class PingPongServer(Observable):
    PING_DELAY = Amount(1, Time.SECONDS)

    def __init__(self, target_host, target_port, clock=time):
        self._clock = clock
        self._target = (target_host, target_port)
        self._pings = AtomicGauge("pings")
        self.metrics.register(self._pings)

    def send_request(self, endpoint, message, ttl):
        url_base = "http://%s:%d" % self._target
        try:
            urllib2.urlopen("%s/%s/%s/%d" % (url_base, endpoint, message, ttl)).read()
        except Exception as e:
            log.error("Failed to query %s: %s" % (url_base, e))

    @HttpServer.route("/ping/:message")
    @HttpServer.route("/ping/:message/:ttl")
    def ping(self, message, ttl=60):
        self._pings.increment()
        log.info("Got ping (ttl=%s): %s" % (message, ttl))
        ttl = int(ttl) - 1
        if ttl > 0:
            defer(partial(self.send_request, "ping", message, ttl), delay=self.PING_DELAY, clock=self._clock)
コード例 #17
0
ファイル: client.py プロジェクト: xianxu/pants
 def _init_stats(self):
     self._gauge_session_expirations = AtomicGauge('session-expirations')
     self._gauge_connection_losses = AtomicGauge('connection-losses')
コード例 #18
0
ファイル: pingpong.py プロジェクト: ycaihua/twitter-commons
 def __init__(self, target_host, target_port, clock=time):
     self._clock = clock
     self._target = (target_host, target_port)
     self._pings = AtomicGauge('pings')
     self.metrics.register(self._pings)
コード例 #19
0
class ZooKeeper(Observable):
    """A convenience wrapper around the low-level ZooKeeper API.

  Blocks until the initial connection is established, and proxies method calls
  to the corresponding ZK functions, passing the handle.

  Supports both synchronous and asynchronous APIs.

  Syncronous API Notes:

    Synchronous calls will block across connection loss or session
    expiration until reconnected.

  Asynchronous API Notes:

    Asynchronous calls will queue up while the session/connection is
    unhealthy and only be dispatched while zookeeper is healthy.  It is
    still possible for asynchronous calls to fail should the session be
    severed after the call has been successfully dispatched.  In other
    words: don't assume your rc will always be zookeeper.OK.

    Watches will behave as normal assuming successful dispatch.  In general
    when using this wrapper, you should retry your call if your watch is
    fired with EXPIRED_SESSION_STATE and ignore anything else whose state is
    not CONNECTED_STATE.  This wrapper will never re-dispatch calls that
    have been sent to zookeeper without error.
  """
    class Error(Exception):
        pass

    class ConnectionTimeout(Error):
        pass

    class InvalidEnsemble(Error):
        pass

    class Stopped(Error):
        pass

    # White-list of methods that accept a ZK handle as their first argument
    _ZK_SYNC_METHODS = frozenset([
        'add_auth',
        'close',
        'create',
        'delete',
        'exists',
        'get',
        'get_acl',
        'get_children',
        'is_unrecoverable',
        'recv_timeout',
        'set',
        'set2',
        'set_acl',
        'set_watcher',
        'state',
    ])

    _ZK_ASYNC_METHODS = frozenset([
        'acreate', 'adelete', 'aexists', 'aget', 'aget_acl', 'aget_children',
        'aset', 'aset_acl', 'async'
    ])

    COMPLETION_RETRY = frozenset([
        zookeeper.CONNECTIONLOSS,
        zookeeper.OPERATIONTIMEOUT,
        zookeeper.SESSIONEXPIRED,
        zookeeper.CLOSING,
    ])

    @classmethod
    def expand_ensemble(cls, servers):
        """Expand comma-separated list of host:port to comma-separated, fully-resolved list of ip:port."""
        server_ports = []
        for server_port in servers.split(','):
            server_split = server_port.split(':', 2)
            if len(server_split) == 1:
                server, port = server_split[0], cls.DEFAULT_PORT
            else:
                try:
                    server, port = server_split[0], int(server_split[1])
                except ValueError:
                    raise cls.InvalidEnsemble('Invalid ensemble string: %s' %
                                              server_port)
            try:
                for ip in socket.gethostbyname_ex(server)[2]:
                    server_ports.append('%s:%s' % (ip, port))
            except socket.gaierror:
                raise cls.InvalidEnsemble('Could not resolve %s' % server)
        return ','.join(server_ports)

    DEFAULT_TIMEOUT_SECONDS = 30.0
    DEFAULT_ENSEMBLE = 'localhost:2181'
    DEFAULT_PORT = 2181
    DEFAULT_ACL = ZooDefs.Acls.OPEN_ACL_UNSAFE
    MAX_RECONNECTS = 1

    # (is live?, is stopped?) => human readable status
    STATUS_MATRIX = {
        (True, True): 'WTF',
        (True, False): 'OK',
        (False, True): 'STOPPED',
        (False, False): 'CONNECTING'
    }

    class Completion(object):
        def __init__(self, zk, function, *args, **kw):
            self._zk = zk
            self._cid = random.randint(0, sys.maxint - 1)
            self._logger = kw.pop('logger', log.debug)

            @wraps(function)
            def wrapper(zh):
                return function(zh, *args, **kw)

            self._fn = wrapper
            self._logger('Created %s args:(%s) kw:{%s}' % (self, ', '.join(
                map(repr, args)), ', '.join('%s: %r' % (key, val)
                                            for key, val in kw.items())))

        def __str__(self):
            return '%s(id:%s, zh:%s, %s)' % (self.__class__.__name__,
                                             self._cid, self._zk._zh,
                                             self._fn.__name__)

        def __call__(self):
            try:
                self._logger('%s start' % self)
                result = self._fn(self._zk._zh)
                self._logger('%s success' % self)
                return result
            except TypeError as e:
                # Raced; zh now dead, so re-enqueue.
                if self._zk._zh is not None:
                    raise
                self._logger('%s raced, re-enqueueing' % self)
                self._zk._add_completion(self._fn)
            except (zookeeper.ConnectionLossException,
                    zookeeper.InvalidStateException,
                    zookeeper.SessionExpiredException, SystemError) as e:
                self._logger('%s excepted (%s), re-enqueueing' % (self, e))
                self._zk._add_completion(self._fn)
            return zookeeper.OK

    # N.B.(wickman) This is code is theoretically racy.  We cannot synchronize
    # events across the zookeeper C event loop, however we do everything in
    # our power to catch transitional latches.  These are almost always
    # exercised in tests and never in practice.
    #
    # TODO(wickman) ConnectionLoss probably does not encapsulate all the
    # exception states that arise on connection loss and/or session
    # expiration.  However, we don't want to blanket catch ZooKeeperException
    # because some things e.g.  get() will raise NoNodeException.  We should
    # partition the exception space in two: behavioral exceptions and, well,
    # exceptional exceptions.
    class BlockingCompletion(Completion):
        def __call__(self):
            while True:
                try:
                    self._logger('%s start' % self)
                    result = self._fn(self._zk._zh)
                    self._logger('%s success' % self)
                    return result
                except (zookeeper.ConnectionLossException,
                        zookeeper.InvalidStateException,
                        zookeeper.SessionExpiredException, TypeError) as e:
                    # TypeError because we raced on live latch from True=>False when _zh gets reinitialized.
                    if isinstance(e, TypeError) and self._zk._zh is not None:
                        self._logger('%s excepted, user error' % self)
                        raise
                    # We had the misfortune of the live latch being set but having a session event propagate
                    # before the BlockingCompletion could be executed.
                    while not self._zk._stopped.is_set():
                        self._logger(
                            '%s [live: %s] excepted on connection event: %s' %
                            (self, self._zk._live.is_set(), e))
                        self._zk._live.wait(timeout=0.1)
                        if self._zk._live.is_set():
                            break
                    if self._zk._stopped.is_set():
                        raise ZooKeeper.Stopped('ZooKeeper is stopped.')
                except Exception as e:
                    self._logger('%s excepted unexpectedly: %s' % (self, e))
                    raise

    def __init__(self,
                 servers=None,
                 timeout_secs=None,
                 watch=None,
                 max_reconnects=None,
                 authentication=None,
                 logger=log.debug):
        """Create new ZooKeeper object.

    Blocks until ZK negotation completes, or the timeout expires. By default
    only tries to connect once.  Use a larger 'max_reconnects' if you want
    to be resilient to things such as DNS outages/changes.

    If watch is set to a function, it is called whenever the global
    zookeeper watch is dispatched using the same function signature, with the
    exception that this object is used in place of the zookeeper handle.

    If authentication is set, it should be a tuple of (scheme, credentials),
    for example, ('digest', 'username:password')
    """

        default_ensemble = self.DEFAULT_ENSEMBLE
        default_timeout = self.DEFAULT_TIMEOUT_SECONDS
        default_reconnects = self.MAX_RECONNECTS
        if WITH_APP:
            options = app.get_options()
            default_ensemble = options.twitter_common_zookeeper_ensemble
            default_timeout = options.twitter_common_zookeeper_timeout
            default_reconnects = options.twitter_common_zookeeper_reconnects
        self._servers = servers or default_ensemble
        self._timeout_secs = timeout_secs or default_timeout
        self._init_count = 0
        self._credentials = authentication
        self._authenticated = threading.Event()
        self._live = threading.Event()
        self._stopped = threading.Event()
        self._completions = Queue()
        self._zh = None
        self._watch = watch
        self._logger = logger
        self._max_reconnects = max_reconnects if max_reconnects is not None else default_reconnects
        self._init_metrics()
        self.reconnect()

    def __del__(self):
        self._safe_close()

    def _log(self, msg):
        self._logger('[zh:%s] %s' % (self._zh, msg))

    def _init_metrics(self):
        self._session_expirations = AtomicGauge('session_expirations')
        self._connection_losses = AtomicGauge('connection_losses')
        self.metrics.register(self._session_expirations)
        self.metrics.register(self._connection_losses)
        self.metrics.register(
            LambdaGauge('session_id', lambda: self.session_id))
        self.metrics.register(
            LambdaGauge('live', lambda: int(self._live.is_set())))

    @property
    def session_id(self):
        try:
            session_id, _ = zookeeper.client_id(self._zh)
            return session_id
        except:
            return None

    @property
    def session_expirations(self):
        return self._session_expirations.read()

    @property
    def connection_losses(self):
        return self._connection_losses.read()

    @property
    def live(self):
        return self._live

    def stop(self):
        """Gracefully stop this Zookeeper session."""
        self._log('Shutting down ZooKeeper')
        self._stopped.set()
        self._safe_close()
        self._completions = Queue()  # there is no .clear()

    def restart(self):
        """Stop and restart this Zookeeper session.  Unfinished completions will be retried
       on reconnection."""
        self._safe_close()
        self._stopped.clear()
        self.reconnect()

    def _safe_close(self):
        if self._zh is not None:
            zh, self._zh = self._zh, None
            try:
                zookeeper.close(zh)
            except zookeeper.ZooKeeperException:
                # the session has been corrupted or otherwise disconnected
                pass
            self._live.clear()

    def _add_completion(self, function, *args, **kw):
        self._completions.put(
            self.Completion(self, function, logger=self._log, *args, **kw))

    def _clear_completions(self):
        while self._live.is_set():
            try:
                completion = self._completions.get_nowait()
                completion()
                self._completions.task_done()
            except Empty:
                return

    def reconnect(self):
        """Attempt to reconnect to ZK."""
        if self._stopped.is_set():
            self._safe_close()
            return

        def safe_close(zh):
            try:
                zookeeper.close(zh)
            except:
                # TODO(wickman) When the SystemError bug is fixed in zkpython, narrow this except clause.
                pass

        def activate():
            self._authenticated.set()
            self._live.set()

        def on_authentication(zh, rc):
            if self._zh != zh:
                safe_close(zh)
                return
            if rc == zookeeper.OK:
                activate()

        def maybe_authenticate():
            if self._authenticated.is_set() or not self._credentials:
                activate()
                return
            try:
                scheme, credentials = self._credentials
                zookeeper.add_auth(self._zh, scheme, credentials,
                                   on_authentication)
            except zookeeper.ZooKeeperException as e:
                self._logger('Failed to authenticate: %s' % e)

        def connection_handler(handle, type, state, path):
            if self._zh != handle:
                safe_close(handle)
                return
            if self._stopped.is_set():
                return
            if self._watch:
                self._watch(self, type, state, path)
            if state == zookeeper.CONNECTED_STATE:
                self._logger('Connection started, setting live.')
                maybe_authenticate()
                self._clear_completions()
            elif state == zookeeper.EXPIRED_SESSION_STATE:
                self._logger('Session lost, clearing live state.')
                self._session_expirations.increment()
                self._live.clear()
                self._authenticated.clear()
                self._zh = None
                self._init_count = 0
                self.reconnect()
            else:
                self._logger('Connection lost, clearing live state.')
                self._connection_losses.increment()
                self._live.clear()

        # this closure is exposed for testing only -- in order to simulate session events.
        self._handler = connection_handler

        timeout_ms = int(self._timeout_secs * 1000)
        while True:
            self._safe_close()
            servers = self.expand_ensemble(self._servers)
            self._log('Connecting to ZK hosts at %s' % servers)
            self._zh = zookeeper.init(servers, connection_handler, timeout_ms)
            self._init_count += 1
            self._live.wait(self._timeout_secs + 1)
            if self._live.is_set():
                break
            elif self._max_reconnects > 0 and self._init_count >= self._max_reconnects:
                self._safe_close()
                raise ZooKeeper.ConnectionTimeout(
                    'Timed out waiting for ZK connection to %s' % servers)
        self._log('Successfully connected to ZK at %s' % servers)

    def _wrap_sync(self, function_name):
        """Wrap a zookeeper module function in an error-handling completion that injects the
       current zookeeper handle as the first parameter."""
        function = getattr(zookeeper, function_name)

        @wraps(function)
        def _curry(*args, **kwargs):
            return self.BlockingCompletion(self,
                                           function,
                                           logger=self._log,
                                           *args,
                                           **kwargs)()

        return _curry

    def _wrap_async(self, function_name):
        """Wrap an asynchronous zookeeper module function in an error-handling
       completion that injects the current zookeeper handle as the first
       parameter and puts it on a completion queue if the current connection
       state is unhealthy."""
        function = getattr(zookeeper, function_name)

        @wraps(function)
        def _curry(*args, **kwargs):
            completion = self.Completion(self,
                                         function,
                                         logger=self._log,
                                         *args,
                                         **kwargs)
            if self._live.is_set():
                return completion()
            else:
                # TODO(wickman)  This is racy, should it go from not live => live
                # prior to Queue.put.  Two solutions: a periodic background thread
                # that attempts to empty the completion queue, or use a mutex-protected
                # container for self._live.
                self._completions.put(
                    self.Completion(self,
                                    function,
                                    logger=self._log,
                                    *args,
                                    **kwargs))
                return zookeeper.OK  # proxy OK.

        return _curry

    def safe_create(self, path, acl=DEFAULT_ACL):
        child = '/'
        for component in filter(None, path.split('/')):
            child = posixpath.join(child, component)
            try:
                self.create(child, "", acl, 0)
            except zookeeper.NodeExistsException:
                continue
            except zookeeper.NoAuthException:
                if not self.exists(child):
                    raise
        return child

    def safe_delete(self, path):
        try:
            if not self.exists(path):
                return True
            for child in self.get_children(path):
                if not self.safe_delete(posixpath.join(path, child)):
                    return False
            self.delete(path)
        except zookeeper.ZooKeeperException:
            return False
        return True

    def __getattr__(self, function_name):
        """Proxy to underlying ZK functions."""
        if function_name in ZooKeeper._ZK_SYNC_METHODS:
            return self._wrap_sync(function_name)
        elif function_name in ZooKeeper._ZK_ASYNC_METHODS:
            return self._wrap_async(function_name)
        else:
            raise AttributeError('%r has no attribute %r' %
                                 (self, function_name))

    def __str__(self):
        return 'ZooKeeper(status=%s,queued=%d,servers=%r)' % (
            self.STATUS_MATRIX[(self._live.is_set(), self._stopped.is_set())],
            self._completions.qsize(), self._servers)

    def __repr__(self):
        return 'ZooKeeper(servers=%r)' % self._servers
コード例 #20
0
ファイル: client.py プロジェクト: BabyDuncan/commons
 def _init_stats(self):
   self._gauge_session_expirations = AtomicGauge('session-expirations')
   self._gauge_connection_losses = AtomicGauge('connection-losses')
コード例 #21
0
ファイル: client.py プロジェクト: BabyDuncan/commons
class ZooKeeper(object):
  """A convenience wrapper around the low-level ZooKeeper API.

  Blocks until the initial connection is established, and proxies method calls
  to the corresponding ZK functions, passing the handle.

  Supports both synchronous and asynchronous APIs.

  Syncronous API Notes:

    Synchronous calls will block across connection loss or session
    expiration until reconnected.

  Asynchronous API Notes:

    Asynchronous calls will queue up while the session/connection is
    unhealthy and only be dispatched while zookeeper is healthy.  It is
    still possible for asynchronous calls to fail should the session be
    severed after the call has been successfully dispatched.  In other
    words: don't assume your rc will always be zookeeper.OK.

    Watches will behave as normal assuming successful dispatch.  In general
    when using this wrapper, you should retry your call if your watch is
    fired with EXPIRED_SESSION_STATE and ignore anything else whose state is
    not CONNECTED_STATE.  This wrapper will never re-dispatch calls that
    have been sent to zookeeper without error.
  """

  class Error(Exception): pass
  class ConnectionTimeout(Error): pass
  class InvalidEnsemble(Error): pass
  class Stopped(Error): pass

  # White-list of methods that accept a ZK handle as their first argument
  _ZK_SYNC_METHODS = frozenset([
      'add_auth', 'close', 'create', 'delete', 'exists', 'get', 'get_acl',
      'get_children', 'is_unrecoverable', 'recv_timeout', 'set', 'set2',
      'set_acl', 'set_watcher', 'state',
   ])

  _ZK_ASYNC_METHODS = frozenset([
      'acreate', 'adelete', 'aexists', 'aget', 'aget_acl', 'aget_children', 'aset',
      'aset_acl', 'async'
  ])

  COMPLETION_RETRY = frozenset([
    zookeeper.CONNECTIONLOSS,
    zookeeper.OPERATIONTIMEOUT,
    zookeeper.SESSIONEXPIRED,
    zookeeper.CLOSING,
  ])

  @classmethod
  def expand_ensemble(cls, servers):
    """Expand comma-separated list of host:port to comma-separated, fully-resolved list of ip:port."""
    server_ports = []
    for server_port in servers.split(','):
      server_split = server_port.split(':', 2)
      if len(server_split) == 1:
        server, port = server_split[0], cls.DEFAULT_PORT
      else:
        try:
          server, port = server_split[0], int(server_split[1])
        except ValueError:
          raise cls.InvalidEnsemble('Invalid ensemble string: %s' % server_port)
      try:
        for ip in set(socket.gethostbyname_ex(server)[2]):
          server_ports.append('%s:%s' % (ip, port))
      except socket.gaierror:
        raise cls.InvalidEnsemble('Could not resolve %s' % server)
    return ','.join(server_ports)

  DEFAULT_TIMEOUT_SECONDS = 30.0
  DEFAULT_ENSEMBLE = 'localhost:2181'
  DEFAULT_PORT = 2181
  DEFAULT_ACL = ZooDefs.Acls.OPEN_ACL_UNSAFE
  MAX_RECONNECTS = 1

  # (is live?, is stopped?) => human readable status
  STATUS_MATRIX = {
    (True, True): 'WTF',
    (True, False): 'OK',
    (False, True): 'STOPPED',
    (False, False): 'CONNECTING'
  }

  class Completion(object):
    def __init__(self, zk, function, *args, **kw):
      self._zk = zk
      self._cid = random.randint(0, sys.maxint - 1)
      self._logger = kw.pop('logger', log.debug)
      @wraps(function)
      def wrapper(zh):
        return function(zh, *args, **kw)
      self._fn = wrapper
      self._logger('Created %s args:(%s) kw:{%s}' % (
        self,
        ', '.join(map(repr, args)),
        ', '.join('%s: %r' % (key, val) for key, val in kw.items())))

    def __str__(self):
      return '%s(id:%s, zh:%s, %s)' % (
          self.__class__.__name__, self._cid, self._zk._zh, self._fn.__name__)

    def __call__(self):
      try:
        self._logger('%s start' % self)
        result = self._fn(self._zk._zh)
        self._logger('%s success' % self)
        return result
      except TypeError as e:
        # Raced; zh now dead, so re-enqueue.
        if self._zk._zh is not None:
          raise
        self._logger('%s raced, re-enqueueing' % self)
        self._zk._add_completion(self._fn)
      except (zookeeper.ConnectionLossException, zookeeper.InvalidStateException, SystemError) as e:
        self._logger('%s excepted (%s), re-enqueueing' % (self, e))
        self._zk._add_completion(self._fn)
      return zookeeper.OK

  # N.B.(wickman) This is code is theoretically racy.  We cannot synchronize
  # events across the zookeeper C event loop, however we do everything in
  # our power to catch transitional latches.  These are almost always
  # exercised in tests and never in practice.
  #
  # TODO(wickman) ConnectionLoss probably does not encapsulate all the
  # exception states that arise on connection loss and/or session
  # expiration.  However, we don't want to blanket catch ZooKeeperException
  # because some things e.g.  get() will raise NoNodeException.  We should
  # partition the exception space in two: behavioral exceptions and, well,
  # exceptional exceptions.
  class BlockingCompletion(Completion):
    def __call__(self):
      while True:
        try:
          self._logger('%s start' % self)
          result = self._fn(self._zk._zh)
          self._logger('%s success' % self)
          return result
        except (zookeeper.ConnectionLossException,
                zookeeper.InvalidStateException,
                TypeError) as e:
          # TypeError because we raced on live latch from True=>False when _zh gets reinitialized.
          if isinstance(e, TypeError) and self._zk._zh is not None:
            self._logger('%s excepted, user error' % self)
            raise
          # We had the misfortune of the live latch being set but having a session event propagate
          # before the BlockingCompletion could be executed.
          while not self._zk._stopped.is_set():
            self._logger('%s [live: %s] excepted on connection event: %s' % (
                self, self._zk._live.is_set(), e))
            self._zk._live.wait(timeout=0.1)
            if self._zk._live.is_set():
              break
          if self._zk._stopped.is_set():
            raise ZooKeeper.Stopped('ZooKeeper is stopped.')
        except Exception as e:
          self._logger('%s excepted unexpectedly: %s' % (self, e))
          raise

  def __init__(self,
               servers=None,
               timeout_secs=None,
               watch=None,
               max_reconnects=None,
               authentication=None,
               logger=log.debug):
    """Create new ZooKeeper object.

    Blocks until ZK negotation completes, or the timeout expires. By default
    only tries to connect once.  Use a larger 'max_reconnects' if you want
    to be resilient to things such as DNS outages/changes.

    If watch is set to a function, it is called whenever the global
    zookeeper watch is dispatched using the same function signature, with the
    exception that this object is used in place of the zookeeper handle.

    If authentication is set, it should be a tuple of (scheme, credentials),
    for example, ('digest', 'username:password')
    """

    default_ensemble = self.DEFAULT_ENSEMBLE
    default_timeout = self.DEFAULT_TIMEOUT_SECONDS
    default_reconnects = self.MAX_RECONNECTS
    if WITH_APP:
      options = app.get_options()
      default_ensemble = options.twitter_common_zookeeper_ensemble
      default_timeout = options.twitter_common_zookeeper_timeout
      default_reconnects = options.twitter_common_zookeeper_reconnects
    self._servers = servers or default_ensemble
    self._timeout_secs = timeout_secs or default_timeout
    self._init_count = 0
    self._credentials = authentication
    self._authenticated = threading.Event()
    self._live = threading.Event()
    self._stopped = threading.Event()
    self._completions = Queue()
    self._zh = None
    self._watch = watch
    self._logger = logger
    self._max_reconnects = max_reconnects if max_reconnects is not None else default_reconnects
    self._init_stats()
    self.reconnect()

  def __del__(self):
    self._safe_close()

  def _log(self, msg):
    self._logger('[zh:%s] %s' % (self._zh, msg))

  def _init_stats(self):
    self._gauge_session_expirations = AtomicGauge('session-expirations')
    self._gauge_connection_losses = AtomicGauge('connection-losses')

  def session_id(self):
    try:
      session_id, _ = zookeeper.client_id(self._zh)
      return session_id
    except:
      return None

  @property
  def session_expirations(self):
    return self._gauge_session_expirations.read()

  @property
  def connection_losses(self):
    return self._gauge_connection_losses.read()

  @property
  def live(self):
    return self._live.is_set()

  def stop(self):
    """Gracefully stop this Zookeeper session."""
    self._log('Shutting down ZooKeeper')
    self._stopped.set()
    self._safe_close()
    self._completions = Queue()  # there is no .clear()

  def restart(self):
    """Stop and restart this Zookeeper session.  Unfinished completions will be retried
       on reconnection."""
    self._safe_close()
    self._stopped.clear()
    self.reconnect()

  def _safe_close(self):
    if self._zh is not None:
      zh, self._zh = self._zh, None
      try:
        zookeeper.close(zh)
      except zookeeper.ZooKeeperException:
        # the session has been corrupted or otherwise disconnected
        pass
      self._live.clear()

  def _add_completion(self, function, *args, **kw):
    self._completions.put(self.Completion(self, function, logger=self._log, *args, **kw))

  def _clear_completions(self):
    while self._live.is_set():
      try:
        completion = self._completions.get_nowait()
        completion()
        self._completions.task_done()
      except Empty:
        return

  def reconnect(self):
    """Attempt to reconnect to ZK."""
    if self._stopped.is_set():
      self._safe_close()
      return

    def safe_close(zh):
      try:
        zookeeper.close(zh)
      except:
        # TODO(wickman) When the SystemError bug is fixed in zkpython, narrow this except clause.
        pass

    def activate():
      self._authenticated.set()
      self._live.set()

    def on_authentication(zh, rc):
      if self._zh != zh:
        safe_close(zh)
        return
      if rc == zookeeper.OK:
        activate()

    def maybe_authenticate():
      if self._authenticated.is_set() or not self._credentials:
        activate()
        return
      try:
        scheme, credentials = self._credentials
        zookeeper.add_auth(self._zh, scheme, credentials, on_authentication)
      except zookeeper.ZooKeeperException as e:
        self._logger('Failed to authenticate: %s' % e)

    def connection_handler(handle, type, state, path):
      if self._zh != handle:
        safe_close(handle)
        return
      if self._stopped.is_set():
        return
      if self._watch:
        self._watch(self, type, state, path)
      if state == zookeeper.CONNECTED_STATE:
        self._logger('Connection started, setting live.')
        maybe_authenticate()
        self._clear_completions()
      elif state == zookeeper.EXPIRED_SESSION_STATE:
        self._logger('Session lost, clearing live state.')
        self._gauge_session_expirations.increment()
        self._live.clear()
        self._authenticated.clear()
        self._zh = None
        self._init_count = 0
        self.reconnect()
      else:
        self._logger('Connection lost, clearing live state.')
        self._gauge_connection_losses.increment()
        self._live.clear()

    # this closure is exposed for testing only -- in order to simulate session events.
    self._handler = connection_handler

    timeout_ms = int(self._timeout_secs * 1000)
    while True:
      self._safe_close()
      servers = self.expand_ensemble(self._servers)
      self._log('Connecting to ZK hosts at %s' % servers)
      self._zh = zookeeper.init(servers, connection_handler, timeout_ms)
      self._init_count += 1
      self._live.wait(self._timeout_secs + 1)
      if self._live.is_set():
        break
      elif self._max_reconnects > 0 and self._init_count >= self._max_reconnects:
        self._safe_close()
        raise ZooKeeper.ConnectionTimeout('Timed out waiting for ZK connection to %s' % servers)
    self._log('Successfully connected to ZK at %s' % servers)

  def _wrap_sync(self, function_name):
    """Wrap a zookeeper module function in an error-handling completion that injects the
       current zookeeper handle as the first parameter."""
    function = getattr(zookeeper, function_name)
    @wraps(function)
    def _curry(*args, **kwargs):
      return self.BlockingCompletion(self, function, logger=self._log, *args, **kwargs)()
    return _curry

  def _wrap_async(self, function_name):
    """Wrap an asynchronous zookeeper module function in an error-handling
       completion that injects the current zookeeper handle as the first
       parameter and puts it on a completion queue if the current connection
       state is unhealthy."""
    function = getattr(zookeeper, function_name)
    @wraps(function)
    def _curry(*args, **kwargs):
      completion = self.Completion(self, function, logger=self._log, *args, **kwargs)
      if self._live.is_set():
        return completion()
      else:
        # TODO(wickman)  This is racy, should it go from not live => live
        # prior to Queue.put.  Two solutions: a periodic background thread
        # that attempts to empty the completion queue, or use a mutex-protected
        # container for self._live.
        self._completions.put(self.Completion(self, function, logger=self._log, *args, **kwargs))
        return zookeeper.OK  # proxy OK.
    return _curry

  def safe_create(self, path, acl=DEFAULT_ACL):
    child = '/'
    for component in filter(None, path.split('/')):
      child = posixpath.join(child, component)
      try:
        self.create(child, "", acl, 0)
      except zookeeper.NodeExistsException:
        continue
      except zookeeper.NoAuthException:
        if not self.exists(child):
          raise
    return child

  def safe_delete(self, path):
    try:
      if not self.exists(path):
        return True
      for child in self.get_children(path):
        if not self.safe_delete(posixpath.join(path, child)):
          return False
      self.delete(path)
    except zookeeper.ZooKeeperException:
      return False
    return True

  def __getattr__(self, function_name):
    """Proxy to underlying ZK functions."""
    if function_name in ZooKeeper._ZK_SYNC_METHODS:
      return self._wrap_sync(function_name)
    elif function_name in ZooKeeper._ZK_ASYNC_METHODS:
      return self._wrap_async(function_name)
    else:
      raise AttributeError('%r has no attribute %r' % (self, function_name))

  def __str__(self):
    return 'ZooKeeper(status=%s,queued=%d,servers=%r)' % (
      self.STATUS_MATRIX[(self._live.is_set(), self._stopped.is_set())],
      self._completions.qsize(), self._servers)

  def __repr__(self):
    return 'ZooKeeper(servers=%r)' % self._servers
コード例 #22
0
ファイル: varz.py プロジェクト: wcauchois/commons-1
 def __init__(self):
     self._count = AtomicGauge('count')
     self._ns = AtomicGauge('total_ns')
     self.metrics.register(self._count)
     self.metrics.register(self._ns)
コード例 #23
0
ファイル: pingpong.py プロジェクト: EricCen/commons
 def __init__(self, target_host, target_port, clock=time):
     self._clock = clock
     self._target = (target_host, target_port)
     self._pings = AtomicGauge("pings")
     self.metrics.register(self._pings)
コード例 #24
0
ファイル: varz.py プロジェクト: CodeWarltz/commons
 def __init__(self):
   self._count = AtomicGauge('count')
   self._ns = AtomicGauge('total_ns')
   self.metrics.register(self._count)
   self.metrics.register(self._ns)
コード例 #25
0
ファイル: scheduler.py プロジェクト: xinqianli/mysos
    def __init__(self,
                 state,
                 state_provider,
                 framework_user,
                 executor_uri,
                 executor_cmd,
                 kazoo,
                 zk_url,
                 election_timeout,
                 admin_keypath,
                 scheduler_key,
                 installer_args=None,
                 backup_store_args=None,
                 executor_environ=None,
                 executor_source_prefix=None,
                 framework_role='*'):
        """
      :param state: The Scheduler object.
      :param state_provider: The StateProvider instance that the scheduler should use to
                             restore/persist states.
      :param framework_user: See flags.
      :param executor_uri: See flags.
      :param executor_cmd: See flags.
      :param framework_role: See flags.
      :param election_timeout: See flags.
      :param admin_keypath: See flags.
      :param scheduler_key: Scheduler uses it to encrypt cluster passwords.
      :param installer_args: See flags.
      :param backup_store_args: See flags.
      :param executor_environ: See flags.
      :param executor_source_prefix: See flags.
      :param kazoo: The Kazoo client for communicating MySQL cluster information between the
                    scheduler and the executors.
      :param zk_url: ZooKeeper URL for used by the scheduler and the executors to access ZooKeeper.
    """
        self._lock = threading.Lock()

        if not isinstance(state, Scheduler):
            raise TypeError("'state' should be an instance of Scheduler")
        self._state = state

        if not isinstance(state_provider, StateProvider):
            raise TypeError(
                "'state_provider' should be an instance of StateProvider")
        self._state_provider = state_provider

        self._framework_user = framework_user
        self._executor_uri = executor_uri
        self._executor_cmd = executor_cmd
        self._framework_role = framework_role
        self._election_timeout = election_timeout
        self._admin_keypath = admin_keypath
        self._installer_args = installer_args
        self._backup_store_args = backup_store_args
        self._executor_environ = executor_environ
        self._executor_source_prefix = executor_source_prefix

        self._driver = None  # Will be set by registered().

        # Use a subdir to avoid name collision with the state storage.
        self._discover_zk_url = posixpath.join(zk_url, "discover")
        self._kazoo = kazoo

        self._scheduler_key = scheduler_key
        self._password_box = PasswordBox(scheduler_key)

        self._tasks = {}  # {Task ID: cluster name} mappings.
        self._launchers = OrderedDict(
        )  # Order-preserving {cluster name : MySQLClusterLauncher}
        # mappings so cluster requests are fulfilled on a first come,
        # first serve (FCFS) basis.

        self.stopped = threading.Event(
        )  # An event set when the scheduler is stopped.
        self.connected = threading.Event(
        )  # An event set when the scheduler is first connected to
        # Mesos. The scheduler tolerates later disconnections.

        self._cluster_count = self.metrics.register(
            AtomicGauge('cluster_count', 0))

        # Total resources requested by the scheduler's clients. When a cluster is created its resources
        # are added to the total; when it's deleted its resources are subtracted from the total.
        # NOTE: These are 'requested' resources that are independent of resources offered by Mesos or
        # allocated to or used by Mysos tasks running on Mesos cluster.
        self._total_requested_cpus = self.metrics.register(
            MutatorGauge('total_requested_cpus', 0.))
        self._total_requested_mem_mb = self.metrics.register(
            MutatorGauge('total_requested_mem_mb', 0.))
        self._total_requested_disk_mb = self.metrics.register(
            MutatorGauge('total_requested_disk_mb', 0.))