Example #1
0
  class ZookeeperMasterDetector(FutureMasterDetector):
    @classmethod
    def from_uri(cls, uri):
      url = urlparse(uri)
      if url.scheme.lower() != 'zk':
        raise self.InvalidUrl('ZookeeperMasterDetector got invalid ensemble URI %s' % uri)
      return cls(url.netloc, url.path)

    def __init__(self, ensemble, path):
      super(ZookeeperMasterDetector, self).__init__()

      self._kazoo_client = KazooClient(ensemble)
      self._kazoo_client.start_async()

      self._group = MesosKazooGroup(self._kazoo_client, path)
      self._group.monitor(callback=self.on_change)

    def on_change(self, membership):
      if membership:
        leader = sorted(membership)[0]
        self._group.info(leader, callback=self.on_appointment)
      self._group.monitor(membership, callback=self.on_change)

    def on_appointment(self, master_data):
      master_info = MasterInfo()
      master_info.MergeFromString(master_data)
      self.appoint(master_info_to_pid(master_info))
Example #2
0
  class ZookeeperMasterDetector(FutureMasterDetector):
    @classmethod
    def from_uri(cls, uri):
      url = urlparse(uri)
      if url.scheme.lower() != 'zk':
        raise self.InvalidUrl('ZookeeperMasterDetector got invalid ensemble URI %s' % uri)
      return cls(url.netloc, url.path)

    def __init__(self, ensemble, path):
      super(ZookeeperMasterDetector, self).__init__()

      self._kazoo_client = KazooClient(ensemble)
      self._kazoo_client.start_async()

      self._group = MesosKazooGroup(self._kazoo_client, path)
      self._group.monitor(callback=self.on_change)

    def on_change(self, membership):
      if membership:
        leader = sorted(membership)[0]
        self._group.info(leader, callback=self.on_appointment)
      self._group.monitor(membership, callback=self.on_change)

    def on_appointment(self, master_data):
      master_info = MasterInfo()
      master_info.MergeFromString(master_data)
      self.appoint(master_info_to_pid(master_info))
Example #3
0
def main():
    """
    Main entry point for this persistent daemon.
    """

    # Set up the argument parser.
    parser = argparse.ArgumentParser(description='Run an LSDA worker node.')
    parser.add_argument('--zookeeper', action='append', required=True)
    parser.add_argument('--amqp', required=True)
    parser.add_argument('--queue', default='stable')

    options = parser.parse_args()

    # Connect to ZooKeeper.
    zookeeper = KazooClient(
        hosts=','.join(options.zookeeper),
        handler=SequentialGeventHandler()
    )

    zookeeper.start_async()

    # Connect to AMQP.
    parameters = pika.ConnectionParameters(options.amqp)

    connection = pika.BlockingConnection(parameters)
    logging_channel = connection.channel()
    jobs_channel = connection.channel()

    # Configure logging.
    handler = AMQPLoggingHandler(logging_channel, 'lsda_logs')

    logging.getLogger().addHandler(handler)
    logging.getLogger().setLevel(logging.INFO)

    # Disable extraneous packet logs from Kazoo.
    import kazoo.client
    kazoo.client.log.setLevel(logging.WARN)

    # Ensure that the queue we will pull from exists.
    jobs_channel.queue_declare(options.queue, durable=True)

    # Prevent flapping tasks from whacking resource.
    gevent.sleep(10)

    # Begin processing requests.
    try:
        EngineOrControllerRunner(zookeeper, jobs_channel,
                                 options.queue, handler).join()
    except Exception:
        logging.exception("Unhandled exception at root level.")
        raise
Example #4
0
    def test_session_callback_states(self):
        from kazoo.protocol.states import KazooState, KeeperState
        from kazoo.client import KazooClient

        client = KazooClient()
        client._handle = 1
        client._live.set()

        result = client._session_callback(KeeperState.CONNECTED)
        eq_(result, None)

        # Now with stopped
        client._stopped.set()
        result = client._session_callback(KeeperState.CONNECTED)
        eq_(result, None)

        # Test several state transitions
        client._stopped.clear()
        client.start_async = lambda: True
        client._session_callback(KeeperState.CONNECTED)
        eq_(client.state, KazooState.CONNECTED)

        client._session_callback(KeeperState.AUTH_FAILED)
        eq_(client.state, KazooState.LOST)

        client._handle = 1
        client._session_callback(-250)
        eq_(client.state, KazooState.SUSPENDED)
Example #5
0
class ServicePublisher:
    def __init__(self, hosts, timeout, publish_port):
        self._logger = logging.getLogger(self.__class__.__name__)
        self._publish_port = publish_port
        self._zk = KazooClient(hosts=hosts)
        event = self._zk.start_async()
        event.wait(timeout=timeout)

        if self._zk.connected:
            self._logger.info('Kazoo client successfully connected')
            self._publish_status()
        else:
            self._zk.stop()
            self._logger.error('Kazoo client failed to connect')

    def _publish_status(self):
        full_path = '%s/%s' % (PARENT_NODE, socket.gethostname())
        data = {
            'started': str(datetime.datetime.now())[:19],
            'port': self._publish_port
        }
        json_data = json.dumps(data).encode(encoding='utf-8')
        self._logger.info('Publishing status %s to path %s' %
                          (data, full_path))
        self._zk.create(full_path, json_data, ephemeral=True, makepath=True)
Example #6
0
    def test_session_callback_states(self):
        from kazoo.protocol.states import KazooState, KeeperState
        from kazoo.client import KazooClient

        client = KazooClient()
        client._handle = 1
        client._live.set()

        result = client._session_callback(KeeperState.CONNECTED)
        eq_(result, None)

        # Now with stopped
        client._stopped.set()
        result = client._session_callback(KeeperState.CONNECTED)
        eq_(result, None)

        # Test several state transitions
        client._stopped.clear()
        client.start_async = lambda: True
        client._session_callback(KeeperState.CONNECTED)
        eq_(client.state, KazooState.CONNECTED)

        client._session_callback(KeeperState.AUTH_FAILED)
        eq_(client.state, KazooState.LOST)

        client._handle = 1
        client._session_callback(-250)
        eq_(client.state, KazooState.SUSPENDED)
Example #7
0
    def init_app(self, app):
        """
        Read kazoo settings from app configuration,
        setup kazoo client for application

        :param app: Flask application instance.

        """
        app.config.setdefault('KAZOO_HOSTS', '127.0.0.1:2181')
        app.config.setdefault('KAZOO_START_TIMEOUT', 3)
        app.config.setdefault('KAZOO_START_BLOCKING', False)

        app.config.setdefault('KAZOO_SESSION_TIMEOUT', 10.0)  # kazoo default

        app.config.setdefault('KAZOO_DEFAULT_RETRY', True)
        app.config.setdefault('KAZOO_RETRY_MAX_DELAY_SECONDS', 60 * 60)  # kazoo default of 1hr.

        # Put cqlengine to application extensions
        if not 'kazoo' in app.extensions:
            app.extensions['kazoo'] = {}

        # Initialize connection and store it to extensions
        if app.config['KAZOO_DEFAULT_RETRY']:
            retry_kwargs = {
                'max_delay': app.config['KAZOO_RETRY_MAX_DELAY_SECONDS']
            }
        else:
            retry_kwargs = None

        kazoo_client = KazooClient(hosts=app.config['KAZOO_HOSTS'],
                                   timeout=app.config['KAZOO_SESSION_TIMEOUT'],
                                   connection_retry=retry_kwargs,
                                   command_retry=retry_kwargs)

        if app.config['KAZOO_START_BLOCKING']:
            kazoo_client.start(app.config['KAZOO_START_TIMEOUT'])
        else:
            kazoo_client.start_async()

        kazoo_client.add_listener(self.connection_state_listener)

        app.extensions['kazoo']['client'] = kazoo_client
Example #8
0
 def start_kazoo(host: str, credentials: str) -> KazooClient:
     """Starts a connection to the Zookeeper client"""
     zk_client = KazooClient(hosts=host)
     zk_client.add_auth_async("digest", credentials)
     try:
         event = zk_client.start_async()
         event.wait(timeout=10)
         logger.info("Zookeeper connection established")
     except KazooTimeoutError as err:
         ErrorCodes.make_graceful(err, "Zookeeper server timed out")
         sys.exit(ErrorCodes.KAZOO_TIMEOUT.value)
     return zk_client
Example #9
0
class ZookeeperSession(BaseClient):
  conext_manager = ZookeeperResponseContextManager
  loose_policy = {}
  strict_policy = {}

  def __init__(self,server_list='127.0.0.1:2181',*args,**kwargs):
    super(ZookeeperSession,self).__init__(*args,**kwargs)
    self.session_policy = "loose_policy"
    self._zookeeper_client = None 
    self.server_list = server_list

  def set_session_policy(self,session_policy="loose"):
    '''prototype not currenlty used.
    '''
    self.session_policy = session_policy+"_policy"

  def connect(self,*args,**kwargs):
    '''See http://kazoo.readthedocs.org/en/latest/api/client.html
     for details regarding available options. Any provided client
     start() parameters provided will override defaults.
    '''
    defaults = {
      "hosts" : self.server_list,
      "handler" : SequentialGeventHandler()
    }
    defaults.update(getattr(self,self.session_policy))
    defaults.update(kwargs)
    self._state = KazooState.LOST
    self._zookeeper_client = KazooClient(**defaults)
    self._zookeeper_client.add_listener(self._state_tracker)
    watchable = self._zookeeper_client.start_async()
    watchable.wait(30)
    if not self._zookeeper_client.connected:
      err = "Could not connect to Zookeeper server(s) %(server_list)s" % defaults
      raise ResponseError(err)

  @require_state(KazooState.CONNECTED)
  @record_stats
  def ensure_path(self,path,watcher=None):
    self._zookeeper_client.ensure_path(path,watcher)
    
  def _state_tracker(self,state):
    self._state = state

  def __del__(self):
    if isinstance(self._zookeeper_client, KazooClient):
      self._zookeeper_client.stop() 
Example #10
0
    def test_session_callback_states(self):
        from kazoo.client import (KazooClient, KazooState, KeeperState,
            EventType)

        client = KazooClient()
        client._handle = 1
        client._live.set()

        result = client._session_callback(1, EventType.CREATED,
                                          KeeperState.CONNECTED, '/')
        eq_(result, None)

        # Now with stopped
        client._stopped.set()
        result = client._session_callback(1, EventType.SESSION,
                                          KeeperState.CONNECTED, '/')
        eq_(result, None)

        # Test several state transitions
        client._stopped.clear()
        client.start_async = lambda: True
        client._session_callback(1, EventType.SESSION, KeeperState.CONNECTED,
                                 None)
        eq_(client.state, KazooState.CONNECTED)

        client._session_callback(1, EventType.SESSION, KeeperState.AUTH_FAILED,
                                 None)
        eq_(client._handle, None)
        eq_(client.state, KazooState.LOST)

        client._handle = 1
        client._session_callback(1, EventType.SESSION, -250, None)
        eq_(client.state, KazooState.SUSPENDED)

        # handle mismatch
        client._handle = 0
        # This will be ignored due to handle mismatch
        client._session_callback(1, EventType.SESSION, KeeperState.CONNECTED,
                                 None)
        eq_(client.state, KazooState.SUSPENDED)
Example #11
0
class Coordinator(object):
    def __init__(self, zk_hosts, hostname, port, join_cluster):
        self.me = '%s:%s' % (hostname, port)
        self.is_master = None
        self.slaves = cycle([])
        self.slave_count = 0
        self.started_shutdown = False

        if join_cluster:
            read_only = False
        else:
            read_only = True

        self.zk = KazooClient(hosts=zk_hosts,
                              handler=SequentialGeventHandler(),
                              read_only=read_only)
        event = self.zk.start_async()
        event.wait(timeout=5)

        self.lock = self.zk.Lock(path='/iris/sender_master',
                                 identifier=self.me)

        # Used to keep track of slaves / senders present in cluster
        self.party = Party(client=self.zk,
                           path='/iris/sender_nodes',
                           identifier=self.me)

        if join_cluster:
            self.zk.add_listener(self.event_listener)
            self.party.join()

    def am_i_master(self):
        return self.is_master

    # Used for API to get the current master
    def get_current_master(self):
        try:
            contenders = self.lock.contenders()
        except kazoo.exceptions.KazooException:
            logger.exception('Failed getting contenders')
            return None

        if contenders:
            return self.address_to_tuple(contenders[0])
        else:
            return None

    # Used for API to get the current slaves if master can't be reached
    def get_current_slaves(self):
        return [self.address_to_tuple(host) for host in self.party]

    def address_to_tuple(self, address):
        try:
            host, port = address.split(':')
            return host, int(port)
        except (IndexError, ValueError):
            logger.error('Failed getting address tuple from %s', address)
            return None

    def update_status(self):
        if self.started_shutdown:
            return

        if self.zk.state == KazooState.CONNECTED:
            if self.lock.is_acquired:
                self.is_master = True
            else:
                try:
                    self.is_master = self.lock.acquire(blocking=False,
                                                       timeout=2)

                # This one is expected when we're recovering from ZK being down
                except kazoo.exceptions.CancelledError:
                    self.is_master = False

                except kazoo.exceptions.LockTimeout:
                    self.is_master = False
                    logger.exception(
                        'Failed trying to acquire lock (shouldn\'t happen as we\'re using nonblocking locks)'
                    )

                except kazoo.exceptions.KazooException:
                    self.is_master = False
                    logger.exception(
                        'ZK problem while Failed trying to acquire lock')
        else:
            logger.error('ZK connection is in %s state', self.zk.state)
            self.is_master = False

        if self.zk.state == KazooState.CONNECTED:

            if self.is_master:
                slaves = [
                    self.address_to_tuple(host) for host in self.party
                    if host != self.me
                ]
                self.slave_count = len(slaves)
                self.slaves = cycle(slaves)
            else:
                self.slaves = cycle([])
                self.slave_count = 0

            # Keep us as part of the party, so the current master sees us as a slave
            if not self.party.participating:
                try:
                    self.party.join()
                except kazoo.exceptions.KazooException:
                    logger.exception('ZK problem while trying to join party')
        else:
            self.slaves = cycle([])
            self.slave_count = 0

    def update_forever(self):
        while True:
            if self.started_shutdown:
                return

            old_status = self.is_master
            self.update_status()
            new_status = self.is_master

            if old_status != new_status:
                log = logger.info
            else:
                log = logger.debug

            if self.is_master:
                log('I am the master sender')
            else:
                log('I am a slave sender')

            metrics.set('slave_instance_count', self.slave_count)
            metrics.set('is_master_sender', int(self.is_master is True))

            sleep(UPDATE_FREQUENCY)

    def leave_cluster(self):
        self.started_shutdown = True

        # cancel any attempts to acquire master lock which could make us hang
        self.lock.cancel()

        if self.zk.state == KazooState.CONNECTED:
            if self.party and self.party.participating:
                logger.info('Leaving party')
                self.party.leave()
            if self.lock and self.lock.is_acquired:
                logger.info('Releasing lock')
                self.lock.release()

    def event_listener(self, state):
        if state == KazooState.LOST or state == KazooState.SUSPENDED:
            logger.info(
                'ZK state transitioned to %s. Resetting master status.', state)

            # cancel pending attempts to acquire lock which will break and leave
            # us in bad state
            self.lock.cancel()

            # make us try to re-acquire lock during next iteration when we're connected
            if self.lock.is_acquired:
                self.lock.is_acquired = False

            # make us try to rejoin the party during next iteration when we're connected
            if self.party.participating:
                self.party.participating = False

            # in the meantime we're not master
            self.is_master = None
Example #12
0
class MainWindow(QMainWindow, ui_MainWindow.Ui_MainWindow):
    mainWriteGui = pyqtSignal(str)

    @catchExept
    def __init__(self):
        super().__init__()
        self.setupUi(self)
        self.zk = KazooClient()
        self.zkTimer = QTimer(self)
        self.zkTimer.setInterval(100)
        self.zkTimer.timeout.connect(self.zkTimeout)
        self.zkStartThread = threading.Thread(target=self.zkConnect)
        self.msgBox = QMessageBox(QMessageBox.NoIcon, "Connection",
                                  "Connecting...", QMessageBox.Cancel, self)
        self.treeWidget.itemClicked.connect(self.itemClicked)
        self.treeWidget.itemDoubleClicked.connect(self.itemOpen)
        self.tabWidget.tabCloseRequested.connect(self.closeTab)
        self.actionConnect.triggered.connect(self.msgBox.show)
        self.actionConnect.triggered.connect(self.zkStartThread.start)
        self.actionConnect.triggered.connect(self.zkTimer.start)
        self.actionDisconnect.triggered.connect(self.zkDisconnect)
        self.actionACLVersion.triggered.connect(self.aclVersion)
        self.actionCreated.triggered.connect(self.created)
        self.actionChildrenCount.triggered.connect(self.childrenCount)
        self.actionDataLength.triggered.connect(self.dataLength)
        self.actionLastModified.triggered.connect(self.lastModified)
        self.actionLastModifiedTransactionId.triggered.connect(
            self.lastModifiedTransactionId)
        self.actionOwnerSessionId.triggered.connect(self.ownerSessionId)
        self.actionVersion.triggered.connect(self.version)
        self.actionCreationTransactionId.triggered.connect(
            self.creationTransactionId)
        self.actionChangeServerAddress.triggered.connect(
            self.changeServerAddress)
        self.msgBox.rejected.connect(self.zkTimer.stop)
        self.msgBox.rejected.connect(self.msgBox.hide)
        self.msgBox.rejected.connect(self.zkDisconnect)
        self.mainWriteGui.connect(self.slotMainWriteGui)
        self.log.setCenterOnScroll(True)
        self.dialog = SelectorDialog(self)

        class PlainTextWidgetHandler:
            def __init__(self, logToWriteGui):
                self.logToWriteGui = logToWriteGui

            def write(self, text):
                self.logToWriteGui(text)

            def flush(self):
                pass

        logging.basicConfig(format='%(asctime)s.%(msecs)d: %(message)s',
                            datefmt='%H:%M:%S',
                            level=logging.DEBUG,
                            handlers=[
                                logging.StreamHandler(
                                    PlainTextWidgetHandler(
                                        self.logToWriteGui)),
                                logging.StreamHandler(sys.stderr)
                            ])

        self.treeWidget.setColumnCount(1)
        self.treeWidget.sortByColumn(0, Qt.AscendingOrder)

        l = self.msgBox.layout()
        progress = QProgressBar()
        progress.setMaximum(0)
        progress.setMinimum(0)
        l.addWidget(progress, l.rowCount() - 2, 1, 1, l.columnCount())

        self.actionConnect.setEnabled(False)
        if os.path.exists("config.txt"):
            with open("config.txt", "r") as f:
                prelines = [string.strip() for string in f.readlines()]
                lines = [prelines[0]]
                for i in range(1, len(prelines)):
                    if prelines[i -
                                1] != prelines[i] and prelines[i] not in lines:
                        lines.append(prelines[i])
                lines = list(filter(None, lines))
                self.dialog.comboBox.addItems(lines)
                self.actionConnect.setEnabled(True)

    @pyqtSlot(str)
    def slotMainWriteGui(self, text):
        self.log.ensureCursorVisible()
        self.log.textCursor().insertText(text)

    def logToWriteGui(self, text):
        self.mainWriteGui.emit(text)

    @catchExept
    def getCurrentStat(self):
        _, stat = self.zk.get(self.treeWidget.currentItem().text(1))
        return stat

    @catchExept
    @pyqtSlot()
    def aclVersion(self):
        self.print("ACL version: %s" % self.getCurrentStat().acl_version)

    @catchExept
    @pyqtSlot()
    def created(self):
        self.print("Created: %s" % self.getCurrentStat().created)

    @catchExept
    @pyqtSlot()
    def childrenCount(self):
        self.print("Children count: %s" % self.getCurrentStat().children_count)

    @catchExept
    @pyqtSlot()
    def dataLength(self):
        self.print("Data length: %s" % self.getCurrentStat().data_length)

    @catchExept
    @pyqtSlot()
    def lastModified(self):
        self.print("Last modified: %s" % self.getCurrentStat().last_modified)

    @catchExept
    @pyqtSlot()
    def lastModifiedTransactionId(self):
        self.print("Last modified transactionId: %s" %
                   self.getCurrentStat().last_modified_transaction_id)

    @catchExept
    @pyqtSlot()
    def ownerSessionId(self):
        self.print("Owner sessionId: %s" %
                   self.getCurrentStat().owner_session_id)

    @catchExept
    @pyqtSlot()
    def version(self):
        self.print("Version: %s" % self.getCurrentStat().version)

    @catchExept
    @pyqtSlot()
    def creationTransactionId(self):
        self.print("Creation transactionId: %s" %
                   self.getCurrentStat().creation_transaction_id)

    @catchExept
    @pyqtSlot()
    def changeServerAddress(self):
        code = self.dialog.exec_()
        if code == QDialog.Accepted:
            text = self.currentHost()
            with open("config.txt", "w") as f:
                hosts = [
                    self.dialog.comboBox.itemText(s)
                    for s in range(self.dialog.comboBox.count())
                    if text != self.dialog.comboBox.itemText(s)
                ]
                self.dialog.comboBox.clear()
                self.dialog.comboBox.addItems(hosts)
                hosts.insert(0, text)
                f.write('\n'.join(hosts))
            if text != "":
                if text != self.dialog.comboBox.itemText(0):
                    self.dialog.comboBox.insertItem(0, text)
                    self.dialog.comboBox.setCurrentText(text)
                self.print("Current host changed to %s" % self.currentHost())
                self.actionConnect.setEnabled(True)

    @catchExept
    @pyqtSlot()
    def zkDisconnect(self):
        self.tabWidget.clear()
        self.treeWidget.clear()
        self.zk.stop()
        self.zk.close()
        self.actionDisconnect.setEnabled(False)
        self.menuFileInfo.setEnabled(False)
        self.actionConnect.setEnabled(True)
        self.actionChangeServerAddress.setEnabled(True)
        self.zkStartThread = threading.Thread(target=self.zkConnect)
        self.actionConnect.triggered.connect(self.zkStartThread.start)

    @catchExept
    @pyqtSlot()
    def zkTimeout(self):
        if self.zk.connected:
            self.zkConnected()
            self.zkTimer.stop()

    @catchExept
    def zkConnected(self):
        self.msgBox.hide()
        self.init()
        self.menuFileInfo.setEnabled(True)
        self.actionDisconnect.setEnabled(True)
        self.actionConnect.setEnabled(False)
        self.actionChangeServerAddress.setEnabled(False)

    @catchExept
    def zkConnect(self):
        self.zk.set_hosts(self.currentHost())
        self.zk.add_listener(self.my_listener)
        try:
            self.zk.start_async()
        except Exception as e:
            logging.exception("error: {0}".format(e))

    def currentHost(self):
        return self.dialog.comboBox.currentText()

    @catchExept
    def init(self):
        for child in self.zk.get_children("/"):
            self.treeWidget.addTopLevelItem(
                QTreeWidgetItem([child, "/" + child, child]))

    @catchExept
    def my_listener(self, state):
        if state == KazooState.LOST:
            # Register somewhere that the session was lost
            self.print("state is LOST!")
        elif state == KazooState.SUSPENDED:
            # Handle being disconnected from Zookeeper
            self.print("state is SUSPENDED!")
        else:
            # Handle being connected/reconnected to Zookeeper
            self.print("state is CONNECTED!")

    @catchExept
    def print(self, text):
        logging.debug(text)

    @catchExept
    def printAllChildren(self, curPath, children, layer):
        spaces = "  " * layer
        for child in children:
            newPath = curPath + "/" + child
            data, stat = self.zk.get(newPath)
            self.print("%s: %s" % (spaces + child, data))
            self.printAllChildren(newPath, self.zk.get_children(newPath),
                                  layer + 1)

    @catchExept
    @pyqtSlot(int)
    def closeTab(self, idx):
        self.tabWidget.removeTab(idx)

    @catchExept
    @pyqtSlot(QTreeWidgetItem, int)
    def itemOpen(self, item, column):
        if not self.zk.exists(item.text(1)):
            return
        tabName = item.text(2)
        for i in range(self.tabWidget.count()):
            if tabName == self.tabWidget.tabText(i):
                self.tabWidget.setCurrentIndex(i)
                return
        innerText = QPlainTextEdit()
        innerText.setReadOnly(True)
        data, stat = self.zk.get(item.text(1))
        innerText.setPlainText(data.decode("utf8"))
        pos = self.tabWidget.addTab(innerText, tabName)
        self.tabWidget.setCurrentIndex(pos)

    @catchExept
    def drawAllTree(self):
        if self.zk.exists("/"):
            root = self.zk.get_children("/")
            self.printAllChildren("/", root, 0)
        else:
            self.print("Really?.. How?.. Why?..")

    @catchExept
    @pyqtSlot(QTreeWidgetItem, int)
    def itemClicked(self, item, column):
        item.setText(
            0,
            item.text(2) + " (%s)" % self.getCurrentStat().children_count)
        children = item.takeChildren()
        newChildren = []
        if self.zk.exists(item.text(1)):
            for child in self.zk.get_children(item.text(1)):
                for oldChild in children:
                    if oldChild.text(2) == child:
                        newChildren.append(oldChild)
                        break
                else:
                    newChildren.append(
                        QTreeWidgetItem(
                            [child, item.text(1) + "/" + child, child]))
        item.addChildren(newChildren)
Example #13
0
class ScoutsDaemon(threading.Thread):
    def __init__(self, server, timeout):
        super(ScoutsDaemon, self).__init__()
        self.logger = base_logger.getChild(self.__class__.__name__)

        self._server = server
        self._timeout = timeout
        self._zk = None
        self._scouts = {}
        self.terminated = False

        self._event = threading.Event()
        self.setDaemon(True)
        signal.signal(signal.SIGTERM, self._terminate)

        self._connect()
        self.start()

    def _connect(self):
        if self._zk and self._zk.connected:
            self.logger.info('[Connection] Kazoo client is already running')
            return
        else:
            self.logger.info(
                '[Connection] Starting Kazoo client (server="%s")' %
                self._server)
            self._zk = KazooClient(hosts=self._server, timeout=self._timeout)
            self._zk.add_listener(self._conn_listener)
            event = self._zk.start_async()
            event.wait(timeout=self._timeout)

        if self._zk.connected:
            self.logger.info(
                '[Connection] Kazoo client successfully connected')
        else:
            self._zk.stop()
            self._event.set()
            raise ConnectException('Failed connecting to Zookeeper')

    def _conn_listener(self, state):
        self.logger.info('[Connection] New state: %s' % state)

    def _terminate(self, signum, frame):
        if signum == signal.SIGTERM:
            self.logger.info('[General] Received SIGTERM, stopping...')
            self._event.set()

    def run(self):
        self._zk.ensure_path(CONFS_PATH)
        self._setup_scouts()

        while not self._event.is_set():
            self._event.wait(1)

        for scout in self._scouts.values():
            scout.stop()
        self.terminated = True
        self.logger.info('[General] Shutting down...')

    def _setup_scouts(self, event=None):
        services = self._zk.get_children(CONFS_PATH, watch=self._setup_scouts)
        self.logger.info('[Scouts] Found confs for: %s' % services)

        for scouted_service in self._scouts:
            if scouted_service not in services:
                self.logger.info(
                    '[Scouts] Service "%s" not in confs, removing its scout')
                self._scouts[scouted_service].stop()
                self._scouts.pop(scouted_service)

        for service in services:
            self._setup_scout(service)

    def _setup_scout(self, service, event=None):
        data, stat = self._zk.get(
            "%s/%s" % (CONFS_PATH, service),
            watch=lambda ev: self._setup_scout(service, ev))
        conf = json.loads(data)

        if service in self._scouts:
            self.logger.info('[Scouts] New conf for %s' % service)
            self._scouts[service].set_conf(conf)
        else:
            self.logger.info('[Scouts] Creating a scout for %s' % service)
            scout = ServiceScout(zk=self._zk,
                                 service=service,
                                 cmd=conf['cmd'],
                                 zk_path=conf['zk_path'],
                                 refresh=conf['refresh'])
            self._scouts[service] = scout
Example #14
0
# @Time     : 2018/11/19 21:37
# @describe : kazoo的异步用法,异步使用官网文档的回调及CURD操作有问题,待后续学习?

import sys
from kazoo.client import KazooClient
from kazoo.handlers.gevent import SequentialGeventHandler
# from kazoo.handlers.eventlet import SequentialEventletHandler
from kazoo.exceptions import ConnectionLossException
from kazoo.exceptions import NoAuthException

# 建立连接,Kazoo不依赖于gevent的monkey补丁,并且要求传入适当的处理程序,默认为SequentialGeventHandler()
# eventlet也同上
zk = KazooClient(hosts='39.107.88.145:2181',
                 timeout=1,
                 handler=SequentialGeventHandler())
event = zk.start_async()

event.wait(timeout=1)  # wait()方法等待start_async()返回的事件对象

if not zk.connected:  # 由于可能永远连接失败,因此判断连接状态,做异常情况处理
    zk.stop()
    raise Exception("Unable to connect")


def my_callback(async_obj):
    try:
        print '-------------------------'
        children = async_obj.get()
        do_something(children)
    except (ConnectionLossException, NoAuthException):
        sys.exit(1)
Example #15
0
class ZookeeperServiceRegistry(BaseServiceRegistry):
    def __init__(self, hosts=DEFAULT_HOSTS, chroot=DEFAULT_CHROOT):
        super(ZookeeperServiceRegistry, self).__init__()
        self.chroot = chroot
        self.client = KazooClient(
            hosts=hosts,
            handler=SequentialGeventHandler(),
        )
        self.client.add_listener(self.on_kazoo_state_change)
        self.start_count = 0

    @classmethod
    def from_config(cls, config, **kwargs):
        return cls(
            hosts=config.get('hosts', DEFAULT_HOSTS),
            chroot=config.get('chroot', DEFAULT_CHROOT),
            **kwargs
        )

    def on_start(self, timeout=10):
        self.start_count += 1
        if self.start_count > 1:
            return
        started = self.client.start_async()
        started.wait(timeout=timeout)
        if not self.client.connected:
            raise RuntimeError('could not connect to zookeeper')
        logger.debug('connected to zookeeper (version=%s)', '.'.join(map(str, self.client.server_version())))

    def on_stop(self):
        self.start_count -= 1
        if self.start_count != 0:
            return
        self.client.stop()

    def on_kazoo_state_change(self, state):
        logger.info('kazoo connection state changed to %s', state)

    def on_service_type_watch(self, service, event):
        try:
            if event.type == EventType.CHILD:
                # FIXME: figure out proper retry strategy
                self.client.retry(self.lookup, service.container, service)
        except Exception:
            logger.exception('error in service type watcher')

    def on_service_watch(self, service, event):
        try:
            prefix, service_type, identity = event.path.rsplit('/', 2)
            if event.type == EventType.DELETED:
                service.remove(identity)
        except Exception:
            logger.exception('error in service watcher')

    def _get_service_znode(self, service, service_type, identity):
        path = self._get_zk_path(service_type, identity)
        result = self.client.get_async(
            path, watch=functools.partial(self.on_service_watch, service))
        value, znode = result.get()
        items = six.iteritems(json.loads(value.decode('utf-8')))
        return {str(k): str(v) for k, v in items}

    def discover(self, container):
        result = self.client.get_children_async(
            path='%s/services' % self.chroot,
        )
        return list(result.get())

    def lookup(self, container, service, watch=True, timeout=1):
        def child_watch(event):
            print(event)
        service_type = service.service_type
        result = self.client.get_children_async(
            path='%s/services/%s' % (self.chroot, service_type),
            watch=functools.partial(self.on_service_type_watch, service),
        )
        try:
            names = result.get(timeout=timeout)
        except NoNodeError:
            raise LookupFailure(None, "failed to resolve %s" % service.service_type)
        logger.info("lookup %s %r", service_type, names)
        identities = set(service.identities())
        for name in names:
            kwargs = self._get_service_znode(service, service_type, name)
            identity = kwargs.pop('identity')
            service.update(identity, **kwargs)
            try:
                identities.remove(identity)
            except KeyError:
                pass
        for identity in identities:
            service.remove(identity)
        return service

    def _get_zk_path(self, service_type, identity):
        return '%s/services/%s/%s' % (self.chroot, service_type, identity)

    def register(self, container, service_type, timeout=1):
        path = self._get_zk_path(service_type, container.identity)
        value = json.dumps({
            'endpoint': container.endpoint,
            'identity': container.identity,
            'log_endpoint': container.log_endpoint,
        })
        result = self.client.create_async(
            path,
            value.encode('utf-8'),
            ephemeral=True, makepath=True)
        # FIXME: result.set_exception(RegistrationFailure())
        result.get(timeout=timeout)

    def unregister(self, container, service_type, timeout=1):
        path = self._get_zk_path(service_type, container.identity)
        result = self.client.delete_async(path)
        result.set_exception(RegistrationFailure())
        result.get(timeout=timeout)
Example #16
0
class Coordinator(object):
    def __init__(self, zk_hosts, hostname, port, join_cluster):
        self.me = '%s:%s' % (hostname, port)
        self.is_master = None
        self.slaves = cycle([])
        self.slave_count = 0
        self.started_shutdown = False

        if join_cluster:
            read_only = False
        else:
            read_only = True

        self.zk = KazooClient(hosts=zk_hosts,
                              handler=SequentialGeventHandler(),
                              read_only=read_only)
        event = self.zk.start_async()
        event.wait(timeout=5)

        self.lock = self.zk.Lock(path='/iris/sender_master',
                                 identifier=self.me)

        # Used to keep track of slaves / senders present in cluster
        self.party = Party(client=self.zk,
                           path='/iris/sender_nodes',
                           identifier=self.me)

        if join_cluster:
            self.party.join()

    def am_i_master(self):
        return self.is_master

    # Used for API to get the current master
    def get_current_master(self):
        try:
            contenders = self.lock.contenders()
        except kazoo.exceptions.KazooException:
            logger.exception('Failed getting contenders')
            return None

        if contenders:
            return self.address_to_tuple(contenders[0])
        else:
            return None

    # Used for API to get the current slaves if master can't be reached
    def get_current_slaves(self):
        return [self.address_to_tuple(host) for host in self.party]

    def address_to_tuple(self, address):
        try:
            host, port = address.split(':')
            return host, int(port)
        except (IndexError, ValueError):
            logger.error('Failed getting address tuple from %s', address)
            return None

    def update_status(self):
        if self.started_shutdown:
            return

        if self.zk.state == KazooState.CONNECTED:
            if self.is_master:
                self.is_master = self.lock.is_acquired
            else:
                try:
                    self.is_master = self.lock.acquire(blocking=False,
                                                       timeout=2)
                except kazoo.exceptions.LockTimeout:
                    self.is_master = False
                    logger.exception(
                        'Failed trying to acquire lock (shouldn\'t happen as we\'re using nonblocking locks)'
                    )
                except kazoo.exceptions.KazooException:
                    self.is_master = False
                    logger.exception(
                        'ZK problem while Failed trying to acquire lock')
        else:
            logger.error('ZK connection is not in connected state')
            self.is_master = False

        if self.is_master:
            slaves = [
                self.address_to_tuple(host) for host in self.party
                if host != self.me
            ]
            self.slave_count = len(slaves)
            self.slaves = cycle(slaves)
        else:
            self.slaves = cycle([])
            self.slave_count = 0

    def update_forever(self):
        while True:
            if self.started_shutdown:
                return

            old_status = self.is_master
            self.update_status()
            new_status = self.is_master

            if old_status != new_status:
                log = logger.info
            else:
                log = logger.debug

            if self.is_master:
                log('I am the master sender')
            else:
                log('I am a slave sender')

            metrics.set('slave_instance_count', self.slave_count)
            metrics.set('is_master_sender', int(self.is_master))

            sleep(UPDATE_FREQUENCY)

    def leave_cluster(self):
        self.started_shutdown = True
        if self.party and self.party.participating:
            logger.info('Leaving party')
            self.party.leave()
        if self.lock and self.lock.is_acquired:
            logger.info('Releasing lock')
            self.lock.release()
Example #17
0
class Zookeeper:
    def __init__(self, hosts):
        self.zk = KazooClient(hosts=hosts,
                              handler=SequentialGeventHandler(),
                              logger=logger)
        # returns immediately
        event = self.zk.start_async()

        # Wait for 30 seconds and see if we're connected
        event.wait(timeout=30)
        try:
            if not self.zk.connected:
                # Not connected, stop trying to connect
                self.zk.stop()
        except (ConnectionLossException, NoAuthException) as error:
            raise error
        except Exception as error:
            raise error

    @coroutine
    def get_children(self, node):
        try:
            children = self.zk.get_children_async(node)
            raise Return(children.get())
        except Exception as error:
            raise error

    @coroutine
    def get_node(self, node):
        try:
            data = self.zk.get_async(node)
            raise Return(data.get())
        except Exception as error:
            raise error

    @coroutine
    def check_path_exist(self, path):
        try:
            result = self.zk.exists(path)
            if result:
                raise Return(True)
            else:
                raise Return(False)
        except Exception as error:
            raise error

    @coroutine
    def create_path(self, path):
        try:
            result = self.zk.ensure_path_async(path)
            raise Return(result.get())
        except Exception as error:
            raise error

    @coroutine
    def create_node(self, path, value):
        try:
            result = self.zk.create_async(path=path,
                                          value=value,
                                          acl=None,
                                          ephemeral=True)
            raise Return(result.get())
        except Exception as error:
            raise error

    @coroutine
    def update_node(self, path, value, version=-1):
        try:
            result = self.zk.set_async(path, value, version)
            raise Return(result.get())
        except Exception as error:
            raise error

    @coroutine
    def update_node(self, path, value, version=-1):
        try:
            result = self.zk.set_async(path, value, version)
            raise Return(result.get())
        except Exception as error:
            raise error

    @coroutine
    def del_node(self, node):
        try:
            node_info = self.zk.delete_async(node)
            raise Return(node_info.get())
        except Exception as error:
            raise error

    def close(self):
        self.zk.stop()
Example #18
0
class ZooHandler(object):
    def __init__(self):
        self.zookeeper_client = None
        if not settings.ZOOKEEPER_SETTING['enable']:
            logging.info('zookeeper disabled')
            return
        self.zoo_hosts = settings.ZOOKEEPER_SETTING['server_address']
        logging.info('start zookeeper client, zoo hosts: %s' % self.zoo_hosts)
        self.base_dir = settings.ZOOKEEPER_SETTING['base_dir']
        self.zookeeper_client = KazooClient(hosts=self.zoo_hosts)
        self.zookeeper_client.add_listener(self.state_listener)
        self.zookeeper_client.start_async()

    def state_listener(self, state):
        # session was lost
        if state == KazooState.LOST:
            logging.error('zookeeper lost!')
        # disconnected from Zookeeper
        elif state == KazooState.SUSPENDED:
            logging.error('zookeeper disconnected!')
        # connected/reconnected to Zookeeper
        elif state == KazooState.CONNECTED:
            self.register_node()
            logging.warn('zookeeper reconnected! try to register')
        else:
            logging.error('unexpected zookeeper state!!!')
            logging.critical('unexpected zookeeper state!!!')

    def register_node(self):
        if not self.zookeeper_client or not self.zookeeper_client.connected:
            logging.error('zoo not connected, register cancel')
            return
        path = ZooHandler.get_register_path()
        try:
            # 尝试注册节点
            def try_to_create_node(result):
                logging.info('zoo try_to_create_noe called')
                try:
                    # None表示节点不存在
                    if result.value is None:
                        self.zookeeper_client.create_async(path,
                                                           makepath=True,
                                                           ephemeral=True)
                    elif result.exception:
                        logging.fatal(
                            'critical error when try to check node when reconnected, %s',
                            result.exception)
                    else:
                        logging.warn(
                            'node already exists when reconnect and try to register'
                        )
                except BaseException as e:
                    logging.exception('critical error, %s', e.message)

            # 监控节点变化
            def node_watcher(watch_event):
                logging.info('zoo node_watcher called')
                try:
                    if EventType.DELETED == watch_event.type:
                        logging.warn('zoo nodes deleted, try recreate')
                        self.zookeeper_client.create_async(path,
                                                           makepath=True,
                                                           ephemeral=True)
                    if EventType.CHANGED == watch_event.type:
                        logging.warn('zoo nodes changed,do nothing')
                    if EventType.CHILD == watch_event.type:
                        logging.warn('zoo nodes childed,do nothing')
                    if EventType.CREATED == watch_event.type:
                        logging.info('zoo nodes success created')
                    if EventType.NONE == watch_event.type:
                        logging.error('zoo nodes status return None')
                finally:
                    self.zookeeper_client.exists_async(path,
                                                       watch=node_watcher)

            future = self.zookeeper_client.exists_async(path,
                                                        watch=node_watcher)
            future.rawlink(try_to_create_node)
        except ZookeeperError as e:
            logging.exception('zookeeper exception when register node: %s' %
                              e.message)
        except BaseException as e:
            logging.exception('critical error!')

    # 1. remove nodes,stop client
    def stop(self):
        logging.info('stopping zookeeper client')
        if self.zookeeper_client:
            self.zookeeper_client.remove_listener(self.state_listener)
            self.zookeeper_client.stop()
            logging.info('zookeeper stopped')

    @staticmethod
    def get_register_path():
        base_dir = settings.ZOOKEEPER_SETTING['base_dir']
        if base_dir[-1] == '/':
            base_dir = base_dir[0:-1]
        register_name = "%s/%s:%s:%s" % (
            base_dir, settings.ZOOKEEPER_SETTING['local_name'],
            settings.ZOOKEEPER_SETTING['local_ip'],
            settings.HTTP_SERVER_SETTING['port'])
        return register_name
Example #19
0
class Pool(object):
    """
    A pool represents a set of resources and nodes that own/manage those resources.
    The pool class is responsible for tracking state of all nodes and resources
    within the entire pool.
    """
    def __init__(self, name, hosts='127.0.0.1:2181'):
        self.name = name
        self.path = '/carousel/{}'.format(name)
        self.hosts = hosts

        # Generic metadata tracked for the entire pool
        self.nodes = set()
        self.resources = set()

        self.zk = None

        if hosts:
            self.connect(hosts)

    def _on_resources_change(self, res):
        self.resources = set(res)

    def _on_nodes_change(self, res):
        self.nodes = set(res)

    @property
    def healthy(self):
        resources_with_leaders = set(
            self.zk.get_children(os.path.join(self.path, 'leaders')))
        resources_without_leaders = self.resources - resources_with_leaders
        return not len(resources_without_leaders)

    def create(self, metadata={}):
        # Create the base pool path with metadata
        self.zk.create(self.path,
                       str.encode(json.dumps(metadata)),
                       makepath=True)

        for path in ['resources', 'nodes', 'leaders']:
            self.zk.create(os.path.join(self.path, path))

        self.load()

    def load(self):
        # Check whether the pool exists
        if not self.zk.exists(self.path):
            raise PoolException("Pool with name {} does not exist!".format(
                self.name))

        # Next load the pool meta-data
        self.meta, self.meta_stat = self.zk.get(self.path)
        self.meta = json.loads(self.meta.decode())

        # Finally, we need to keep track of resources and nodes
        ChildrenWatch(self.zk, os.path.join(self.path, 'resources'),
                      self._on_resources_change)
        ChildrenWatch(self.zk, os.path.join(self.path, 'nodes'),
                      self._on_nodes_change)

    def connect(self, hosts, timeout=4):
        self.zk = KazooClient(hosts,
                              timeout=timeout,
                              handler=SequentialGeventHandler())
        self.zk.start_async().wait(timeout=5)

        if not self.zk.connected:
            self.zk.stop()
            raise Exception('Failed to reach zookeeper')

        try:
            self.load()
        except PoolException:
            self.create()

    def disconnect(self):
        self.zk.stop()

    def ensure_resources(self, *resources):
        for resource in resources:
            self.ensure_resource(resource)

    def ensure_resource(self, name, metadata=None):
        try:
            self.zk.create(os.path.join(self.path, 'resources', name),
                           json.dumps(metadata or {}))
        except NodeExistsError:
            pass

    def delete_resource(self, name):
        assert name in self.resources

        self.zk.delete(os.path.join(self.path, 'resources', name))

        try:
            self.zk.delete(os.path.join(self.path, 'leaders', name))
        except NoNodeError:
            pass

    def create_node(self, metadata=None):
        return Node(self, metadata or {})

    def get_leader(self, resource):
        result, _ = self.zk.get(os.path.join(self.path, 'leaders', resource))
        return result
Example #20
0
class kazooMaster(object):
    def __init__(self,ip,type_="p",node="",userID="",pid="",operation="",remap=False):
        self.ip = ip
        self.node = node
        self.type=type_
        self.userID = userID
        self.productID = pid
        self.operation = operation
        self.path_rev = ""
        if type_ == "e" or type_ == "E":
            self.path = "/"+self.node
        else:
            self.path_rev = "/"+self.node+"/"+self.userID+"/"+self.productID
            self.path = "/"+self.userID+"/"+self.productID+"/"+self.node
        self.version=""
        self.remap = remap
        self.start_client()

    def start_client(self):
        self.zk = KazooClient(hosts='{}:2181'.format(self.ip), read_only = False)
        self.zk.start()

    def start_client_async(self):
        self.zk = KazooClient(hosts='{}:2181'.format(self.ip), read_only = False)
        self.zk.start_async()

    def children_watch(self, path = None):
        if path is None:
            path = self.path
        print(path)
        @self.zk.ChildrenWatch(path)
        def watch_children(children):
            print("Children are now: %s" % children)
        # Above function called immediately, and from then on

    def stop_client(self):
        self.zk.stop()
    
    def get_children(self,path):
        print(path, "in get children")
        if self.zk.exists(path) == None:
            return ""
        else:
            return self.zk.get_children(path)

    def exist(self,path):
        if self.zk.exists(path) == None:
            return False
        else:
            return True

    def create(self, path, param = "p"):
        ephemeral = False
        if param == "p":
            ephemeral = False
        elif param == "e":
            ephemeral = True
            
        logging.basicConfig(filename='logs/connection.log', filemode='w', level=logging.DEBUG)

        if(self.path == ""):
            logging.error("PATH EMPTY")
            return False
        else:
            if self.zk.exists(path) == None:
                self.zk.create(path, value=b"0", makepath=True, ephemeral = ephemeral)
                # self.zk.create(self.path_rev,value=b"0",makepath=True)
                return True
        return False

#stat is blocking ,control will return to called object after ephemeral node crashes
    def stat(self):
        stop=4

        @self.zk.DataWatch("{}".format(self.path))
        def my_func(data,stat):
            nonlocal stop
            stop = stop -1
            if stat is None:
                return 
            print("changed")
            print("Data is {} ".format(data))
            print("Version is {} ".format(stat.version))

        if self.zk.exists(self.path) !=None:
            val=DataWatch(self.zk,self.path,func=my_func)
            if not val:
                return -1
            while stop > 0:
                continue
        else:
            print("PATH INVALID")
        
        self.zk.stop()

    def delete(self, node_name):
        if self.zk.exists(node_name):
            print("Node {} in ip {} exists".format(node_name))
        else:
            print("Node {} in ip {} does not exists".format(node_name))
            raise Exception("Node does not exists")
        try:
            self.zk.delete(node_name, recursive = True)
        except Exception as e:
            logging.info("Error whle updating Node " + node_name)

        self.zk.stop()

    def retrieve(self, custom_path = None):
        if not custom_path:
            custom_path = self.path
            
        if self.zk.exists(self.path) == None:
            return -1
        else:
            data,version_number = self.zk.get(custom_path)
            version_number = str(version_number.version)
            return version_number

    #Give only user ID for finding mapping
    def getmap(self):
        parent = self.userID
        parent = "/"+parent

        if not self.zk.exists(parent):
            return []

        children = self.zk.get_children(parent)
        to_return = []
        for keys in children:
            #print("KEY: ",keys)
            subChildren = parent+"/"+keys
            if not self.zk.exists(subChildren):
                continue
            
            subChild = self.zk.get_children(subChildren)
            for val in subChild:
                path = subChildren+"/"+val
                to_return.append({
                    "key": keys,
                    "device": val, 
                    "version": self.retrieve(path)
                })
                
        return to_return

    #give only Device Id for remapping
    def reMap(self):
        remap_data=[]
        if self.remap == True:
            dev_down=self.node
            dev_down = "/"+dev_down
            val = self.zk.get_children(dev_down)
            for users in val:
                user_name = dev_down+"/"+users
                allItems = self.zk.get_children(user_name)
                for items in allItems:
                    remap_data.append((users,items))
                    #TODO:delete user key pairs here
                    print(users,items)
        else:
            print("REMAP PARAMETER FALSE")
        
        return remap_data
        
    def setVersion(self,path,value):
        if self.zk.exists(path) == None:
            print("Path does not exists setversion method")
        else:
            try:
                self.zk.set(path=path,value=str(value).encode())
            except Exception as e:
                print(e, "Exception in set versioning")
Example #21
0
class ZookeeperServiceRegistry(BaseServiceRegistry):
    def __init__(self, hosts=DEFAULT_HOSTS, chroot=DEFAULT_CHROOT):
        super(ZookeeperServiceRegistry, self).__init__()
        self.chroot = chroot
        self.client = KazooClient(
            hosts=hosts,
            handler=SequentialGeventHandler(),
        )
        self.client.add_listener(self.on_kazoo_state_change)
        self.start_count = 0

    @classmethod
    def from_config(cls, config, **kwargs):
        return cls(hosts=config.get('hosts', DEFAULT_HOSTS),
                   chroot=config.get('chroot', DEFAULT_CHROOT),
                   **kwargs)

    def on_start(self, timeout=10):
        self.start_count += 1
        if self.start_count > 1:
            return
        started = self.client.start_async()
        started.wait(timeout=timeout)
        if not self.client.connected:
            raise RuntimeError('could not connect to zookeeper')
        logger.debug('connected to zookeeper (version=%s)',
                     '.'.join(map(str, self.client.server_version())))

    def on_stop(self):
        self.start_count -= 1
        if self.start_count != 0:
            return
        self.client.stop()

    def on_kazoo_state_change(self, state):
        logger.info('kazoo connection state changed to %s', state)

    def on_service_type_watch(self, service, event):
        try:
            if event.type == EventType.CHILD:
                # FIXME: figure out proper retry strategy
                self.client.retry(self.lookup, service.container, service)
        except Exception:
            logger.exception('error in service type watcher')

    def on_service_watch(self, service, event):
        try:
            prefix, service_type, identity = event.path.rsplit('/', 2)
            if event.type == EventType.DELETED:
                service.remove(identity)
        except Exception:
            logger.exception('error in service watcher')

    def _get_service_znode(self, service, service_type, identity):
        path = self._get_zk_path(service_type, identity)
        result = self.client.get_async(path,
                                       watch=functools.partial(
                                           self.on_service_watch, service))
        value, znode = result.get()
        items = six.iteritems(json.loads(value.decode('utf-8')))
        return {str(k): str(v) for k, v in items}

    def discover(self, container):
        result = self.client.get_children_async(path='%s/services' %
                                                self.chroot, )
        return list(result.get())

    def lookup(self, container, service, watch=True, timeout=1):
        def child_watch(event):
            print(event)

        service_type = service.service_type
        result = self.client.get_children_async(
            path='%s/services/%s' % (self.chroot, service_type),
            watch=functools.partial(self.on_service_type_watch, service),
        )
        try:
            names = result.get(timeout=timeout)
        except NoNodeError:
            raise LookupFailure(None,
                                "failed to resolve %s" % service.service_type)
        logger.info("lookup %s %r", service_type, names)
        identities = set(service.identities())
        for name in names:
            kwargs = self._get_service_znode(service, service_type, name)
            identity = kwargs.pop('identity')
            service.update(identity, **kwargs)
            try:
                identities.remove(identity)
            except KeyError:
                pass
        for identity in identities:
            service.remove(identity)
        return service

    def _get_zk_path(self, service_type, identity):
        return '%s/services/%s/%s' % (self.chroot, service_type, identity)

    def register(self, container, service_type, timeout=1):
        path = self._get_zk_path(service_type, container.identity)
        value = json.dumps({
            'endpoint': container.endpoint,
            'identity': container.identity,
            'log_endpoint': container.log_endpoint,
        })
        result = self.client.create_async(path,
                                          value.encode('utf-8'),
                                          ephemeral=True,
                                          makepath=True)
        # FIXME: result.set_exception(RegistrationFailure())
        result.get(timeout=timeout)

    def unregister(self, container, service_type, timeout=1):
        path = self._get_zk_path(service_type, container.identity)
        result = self.client.delete_async(path)
        result.set_exception(RegistrationFailure())
        result.get(timeout=timeout)
Example #22
0
class ZookeeperLocks(object):
    """
    Zookeeper lock class that finds and removes locks
    from a Hive object
    Adapted from here:
      https://etl.svbtle.com/removing-database-level-locks-in-hive
    """
    def __init__(self, hosts, database, table):

        self.zk_client = KazooClient(hosts=hosts)

        # TODO: Move this out of the init function
        # Currently, this only looks at the table level and
        # assumes that the locks will be under there -
        # partitions tables will have nested dirs for the
        # partition (eg. domain=x, table=y)
        self.startup()

        self.database = database
        self.table = table

    def startup(self):
        # https://kazoo.readthedocs.io/en/latest/async_usage.html
        # returns immediately
        event = self.zk_client.start_async()
        # Wait for 30 seconds and see if we're connected
        event.wait(timeout=20)

        if not self.zk_client.connected:
            # Not connected, stop trying to connect
            self.zk_client.stop()
            raise Exception("Unable to connect.")

    def shutdown(self):
        """
        Close the connection to Zookeeper
        """
        self.zk_client.stop()

    def show_all_children(self):
        """
        Get a list of child nodes of a path.
        :return: List of child node names
        """
        return self.zk_client.get_children('/')

    def get_hive_namespace(self):
        """
        Find the Hive Zookeeper name space in all the
        children of the Zookeeper
        :return: string, name of Hive Name Space
        """
        hive_namespace = None
        for child in self.show_all_children():
            match = re.search(r'hive_zookeeper_namespace_hive.?', child)
            if match:
                hive_namespace = match.group()
                break
        return hive_namespace

    def path_setup(self):
        """setup path"""
        return "/{0}/{1}".format(self.get_hive_namespace(), self.database)

    def my_rec(self, name_of_lock):
        """
        Recursively goes through and remove locks
        at all levels. Locks can be at the table
        or at a partition level. In the case of
        some tables, we are 2 partitions deep.
        It is also possible to have more than 1
        lock on a given partition.
        """
        # we are getting unicode
        # all locks start with LOCK-
        locks_deleted = []
        if 'LOCK-' in name_of_lock.encode('ascii', 'ignore'):
            try:
                self.zk_client.delete(name_of_lock)
                print 'Deleted lock: {0}'.format(name_of_lock)
                return "Deleted " + name_of_lock
            except NoNodeError:
                print "No node error - delete lock: {0}".format(name_of_lock)
        else:
            new_locks = ""
            print "Need to go deeper", name_of_lock
            try:
                new_locks = self.zk_client.get_children(name_of_lock + "/")
                # this will return a list, but finish
                # if the list is empty
            except NoNodeError:
                print "No node error - get_children: {0}".format(name_of_lock)
            if len(new_locks) > 0:
                for a_lock in new_locks:
                    # for the next depth, go through
                    # and remove anything that starts
                    # with "LOCK-"
                    deleted_lock = self.my_rec(name_of_lock + "/" + a_lock)
                    locks_deleted.append(deleted_lock)
            else:
                return "All done"
        return locks_deleted
Example #23
0
class ServiceWatcher:
    def __init__(self, hosts, timeout):
        self._logger = logging.getLogger(self.__class__.__name__)
        self._endpoint = None
        self._conf = None
        self._zk = KazooClient(hosts=hosts)
        event = self._zk.start_async()
        event.wait(timeout=timeout)

        if self._zk.connected:
            self._logger.info('Kazoo client successfully connected')
            self._init_conf_node()
            self._setup_conf()
            self._setup_data_endpoint()
        else:
            self._zk.stop()
            self._logger.error('Kazoo client failed to connect')

    def _init_conf_node(self):
        default_conf = {'last_n': 100, 'repeat_seconds': 30}
        conf_json = json.dumps(default_conf).encode('utf-8')
        try:
            self._zk.create(CONF_NODE, conf_json)
            self._logger.warning(
                'No configuration found at path %s, setting default %s' %
                (CONF_NODE, conf_json))
        except NodeExistsError:
            pass

    def _setup_data_endpoint(self, event=None):
        self._zk.ensure_path(DATA_PARENT_NODE)
        endpoints = self._zk.get_children(DATA_PARENT_NODE,
                                          watch=self._setup_data_endpoint)
        if len(endpoints) == 0:
            self._logger.error('No available endpoints found')
        elif not self._endpoint or self._endpoint.split(
                ':')[0] not in endpoints:
            self._logger.info('Found %s data service endpoints: %s' %
                              (len(endpoints), endpoints))
            self._set_endpoint(endpoints[0])

    def _set_endpoint(self, endpoint, event=None):
        full_path = '%s/%s' % (DATA_PARENT_NODE, endpoint)
        data_bytes, stat = self._zk.get(full_path)
        data = json.loads(data_bytes.decode('utf-8'))
        self._endpoint = '%s:%s' % (endpoint, data['port'])
        self._logger.info('Endpoint set to %s, which is running since %s' %
                          (self._endpoint, data['started']))

    @property
    def endpoint(self):
        return self._endpoint

    def _setup_conf(self, event=None):
        data_bytes, stat = self._zk.get(CONF_NODE, watch=self._setup_conf)
        self._conf = json.loads(data_bytes.decode('utf-8'))
        self._logger.info('New configuration found: %s' % self._conf)

    @property
    def conf(self):
        return self._conf
Example #24
0
class Node(object):
    def __init__(self, pool, metadata=None, max_inflight_acquires=1, auto_acquire=True):
        self.pool = pool
        self.zk = KazooClient(pool.hosts, timeout=5, handler=SequentialGeventHandler())
        event = self.zk.start_async()
        event.wait(timeout=5)
        if not self.zk.connected:
            self.zk.stop()
            raise Exception('Failed to reach zookeeper')

        self.metadata = metadata or {}

        self.id = None
        self.path = None
        self.auto_acquire = auto_acquire
        self.max_resources = 0

        # Set of resources we own
        self.resources = set()
        self._resource_backoff = {}
        self._resources_acquiring = gevent.lock.Semaphore(max_inflight_acquires)

        # Callbacks
        self.on_acquire_resource = None
        self.on_release_resource = None

        self._anti_entropy_greenlet = gevent.spawn(self._anti_entropy)

    def disconnect(self):
        self.zk.disconnect()

    def acquire(self, resource):
        assert resource in self.pool.resources
        return self._try_takeover(resource, force=True)

    def release(self, resource):
        assert resource in self.resources

        # TODO: transaction here
        self.zk.delete(os.path.join(self.pool.path, 'leaders', resource))

    def leave(self):
        for resource in list(self.resources):
            self.release(resource)

    def join(self):
        path = self.zk.create(os.path.join(self.pool.path, 'nodes', ''), ephemeral=True, sequence=True)
        self.path = path
        self.id = path.rsplit('/', 1)[-1]

        # Watch for leadership changes so we can possibly take over
        ChildrenWatch(self.zk, os.path.join(self.pool.path, 'leaders'), self._on_leaders_change)

        # Now that we've joined, lets see if there are any dangling resources we
        #  can take ownership of
        gevent.spawn(self._check_for_takeover, delay=0)

    def _on_leaders_change(self, data):
        # TODO: debounce this instead of just sleeping
        gevent.spawn(self._check_for_takeover, delay=5)

    def _on_resource_leader_change(self, data, stat, event):
        if not event:
            return

        resource_name = event.path.split('/')[-1]
        if resource_name not in self.pool.resources:
            return

        if resource_name in self.resources:
            if event.type == 'DELETED' or data != self.id:
                self._resource_backoff[resource_name] = time.time()
                self.resources.remove(resource_name)
                if callable(self.on_release_resource):
                    self.on_release_resource(self, resource_name)
                return False

        if event.type == 'DELETED':
            self._try_takeover(resource_name)

    def _check_for_takeover(self, delay=5):
        if not self.auto_acquire:
            return
        time.sleep(delay)

        resources_with_leaders = set(self.zk.get_children(os.path.join(self.pool.path, 'leaders')))
        resources_without_leaders = self.pool.resources - resources_with_leaders

        for resource in resources_without_leaders:
            self._try_takeover(resource)

            # If we have more than the even-split number of resources, backoff a bit
            if len(self.resources) > len(self.pool.resources) / len(self.pool.nodes):
                time.sleep(1)

    def _try_takeover(self, resource, force=False):
        if self.max_resources and len(self.resources) >= self.max_resources:
            return False

        if not force and resource in self._resource_backoff:
            if time.time() - self._resource_backoff[resource] < 10:
                return False
            del self._resource_backoff[resource]

        if self._resources_acquiring.locked():
            return False

        with self._resources_acquiring:
            path = os.path.join(self.pool.path, 'leaders', resource)

            try:
                self.zk.create(path, unicode.encode(self.id), ephemeral=True)
            except NodeExistsError:
                if not force:
                    return False

                _, metadata = self.zk.get(path)
                transaction = self.zk.transaction()
                transaction.delete(path, version=metadata.version)
                transaction.create(path, unicode.encode(self.id), ephemeral=True)
                result = transaction.commit()
                if result[0] is not True or result[1] != path:
                    return False

            DataWatch(self.zk, path, self._on_resource_leader_change)
            self.resources.add(resource)
            if callable(self.on_acquire_resource):
                self.on_acquire_resource(self, resource)
            return True

    def balance(self):
        threshold = math.ceil(len(self.pool.resources) / (len(self.pool.nodes) * 1.0))
        our_value = len(self.resources)

        if our_value > threshold + 1:
            resource = random.choice(list(self.resources))
            self._resource_backoff[resource] = time.time()
            self.release(resource)

    def _anti_entropy(self):
        while True:
            time.sleep(10)
            self.balance()