Esempio n. 1
0
def _get_json_type(request, cluster_id, type):
	data = []
	error_brokers = 0
	try:	
		cluster = get_cluster_or_404(id=cluster_id)

		zk = KazooClient(hosts=cluster['zk_host_ports'])
		zk.start()

		if type == "broker":			
			brokers, error_brokers = _get_brokers(zk,cluster_id)
			for broker in brokers:
				data.append(broker['host'])
		if type == "topic":
			topics, error_zk_topics = _get_topics(cluster)
			for topic in topics:
				data.append(topic['id'])
		if type == "metric":
			data = _get_sections_ini()
	except KazooException:
		error_zk_brokers = 1

	zk.stop()

	return JsonResponse(data, safe=False)
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser(description = DESCRIPTION)

    parser.add_argument("hosts", metavar = "<zookeeper-endpoint>", type = str,
        nargs = "+", help = "Zookeeper node endpoints to connect to")
    parser.add_argument("--timeout", dest = "timeout", action = "store", type = int,
        default = 30, help = "Zookeeper connection timeout")

    option = parser.parse_args()

    logging.debug("Using %s as a Zookeeper connection string" % option.hosts)

    client = KazooClient(hosts = ",".join(option.hosts))

    try:
        client.start(timeout = option.timeout)
    except TimeoutError as e:
        logging.error("Timed out while connecting to Zookeeper")
        return 1

    status = bootstrap(client, str(uuid.uuid4()))

    # If the client is not stopped, it will hang forever maintaining the connection.
    client.stop()

    return status
Esempio n. 3
0
def main():
    if len(sys.argv) < 5:
        print(help_msg, "\n")
        print(sys.argv[0], 'zookeeper_server broker_uri_1 broker_uri_2 broker_uri_3')
        print('Example:', sys.argv[0], 'localhost:2181 socket://localhost:10001/broker1 '
              'socket://localhost:10002/broker2 socket://localhost:10003/broker3')
        exit()
    zk_server = sys.argv[1]
    broker_uris = sys.argv[2:5]
    shift_cmd = shift_cmd_template.format(*broker_uris)
    print('Deploying', shift_cmd)
    zk_client = KazooClient(zk_server, timeout=10 * 60)
    print('Connecting to Zookeeper at', zk_server)
    zk_client.start()
    for uri in broker_uris:
        broker_status[uri] = False
        bid = get_id(uri)
        # make sure broker is free
        data, stats = zk_client.get(ZK_BROKER_OPS_STATUS_STR.format(bid))
        op_status = OpStatus(data.decode('utf-8').upper())
        if op_status not in [OpStatus.Null, OpStatus.Finished]:
            raise RuntimeError('Cannot start {}, {} is in {} state'.format(shift_cmd, bid, op_status.name))
        # update broker's ops status
        zk_client.set(ZK_BROKER_OPS_STATUS_STR.format(bid), OpStatus.Null.value.encode('utf-8'))
        # write the cmd to the broker's ops
        zk_client.set(ZK_BROKER_OPS_STR.format(bid), shift_cmd.encode('utf-8'))
        # set watches for this broker's op status
        DataWatch(zk_client, ZK_BROKER_OPS_STATUS_STR.format(bid), func=get_broker_op_data_watcher(uri))
    print('Waiting for brokers ...')
    while not all_done():
        time.sleep(1)
Esempio n. 4
0
    def create_from_zookeeper(cls, zkconnect):
        log.info("Connecting to zookeeper {0}".format(zkconnect))
        try:
            zk = KazooClient(zkconnect)
            zk.start()
        except Exception as e:
            raise ZookeeperException("Cannot connect to Zookeeper: {0}".format(e))

        # Get broker list
        cluster = cls()
        add_brokers_from_zk(cluster, zk)

        # Get current partition state
        log.info("Getting partition list from Zookeeper")
        for topic in zk.get_children("/brokers/topics"):
            zdata, zstat = zk.get("/brokers/topics/{0}".format(topic))
            add_topic_with_replicas(cluster, topic, json.loads(zdata))

        if cluster.num_topics() == 0:
            raise ZookeeperException("The cluster specified does not have any topics")

        log.info("Closing connection to zookeeper")
        zk.stop()
        zk.close()

        return cluster
Esempio n. 5
0
 def readAMHostPort(self):
   amHost = ""
   amSecuredPort = ""
   zk = None
   try:
     zk = KazooClient(hosts=self.zk_quorum, read_only=True)
     zk.start()
     data, stat = zk.get(self.zk_reg_path)
     logger.debug("Registry Data: %s" % (data.decode("utf-8")))
     sliderRegistry = json.loads(data)
     amUrl = sliderRegistry["payload"]["internalView"]["endpoints"]["org.apache.slider.agents"]["address"]
     amHost = amUrl.split("/")[2].split(":")[0]
     amSecuredPort = amUrl.split(":")[2].split("/")[0]
     # the port needs to be utf-8 encoded 
     amSecuredPort = amSecuredPort.encode('utf8', 'ignore')
   except Exception:
     # log and let empty strings be returned
     logger.error("Could not connect to zk registry at %s in quorum %s" % 
                  (self.zk_reg_path, self.zk_quorum))
     pass
   finally:
     if not zk == None:
       zk.stop()
       zk.close()
   logger.info("AM Host = %s, AM Secured Port = %s" % (amHost, amSecuredPort))
   return amHost, amSecuredPort
Esempio n. 6
0
def get_alive_master_ip():
    zk_conn_str = get_os_env('ZOOKEEPER_CONN_STR')
    master_stack_name = get_os_env('MASTER_STACK_NAME')
    master_ip = ""
    global region
    if zk_conn_str != "":
        from kazoo.client import KazooClient
        zk = KazooClient(hosts=zk_conn_str)
        zk.start()
        try:
            master_ip = zk.get("/spark/leader_election/current_master")[0].decode('utf-8')
            zk.stop()
        except:
            master_ip = ""
            zk.stop()
        return master_ip
    elif master_stack_name != "" and region is not None:
        try:
            elb = boto3.client('elb', region_name=region)
            ec2 = boto3.client('ec2', region_name=region)
            master_ips = get_instance_ips(elb, ec2, master_stack_name)
            if len(master_ips) != 1:
                return ""  # shouldn't happen without zookeeper
            elif len(master_ips) == 1:
                return master_ips[0]
            else:
                return ""
        except:
            return ""
    else:
        return ""
Esempio n. 7
0
def _get_zk_conn(hosts):
    global ZK_CONNECTION
    if ZK_CONNECTION is None:
        ZK_CONNECTION = KazooClient(hosts=hosts)
        ZK_CONNECTION.start()

    return ZK_CONNECTION
Esempio n. 8
0
def start_zoo(cport):
    '''
    Client uses this function to start an instance of zookeeper
    Arguments:
        cport : An unused TCP port for zookeeper to use as the client port
    '''
    basefile = "zookeeper-3.4.5"
    tarfile = os.path.dirname(os.path.abspath(__file__)) + "/" + basefile + ".tar.gz"
    cassbase = "/tmp/zoo." + str(cport) + "/"
    confdir = cassbase + basefile + "/conf/"
    output,_ = call_command_("mkdir " + cassbase)

    logging.info('Installing zookeeper in ' + cassbase + " conf " + confdir)
    os.system("cat " + tarfile + " | tar -xpzf - -C " + cassbase)

    output,_ = call_command_("cp " + confdir + "zoo_sample.cfg " + confdir + "zoo.cfg")

    logging.info('zookeeper Client Port %d' % cport)

    replace_string_(confdir + "zoo.cfg", \
        [("dataDir=/tmp/zookeeper", "dataDir="+cassbase)])

    replace_string_(confdir + "zoo.cfg", \
        [("clientPort=2181", "clientPort="+str(cport))])

    output,_ = call_command_(cassbase + basefile + "/bin/zkServer.sh start")

    zk = KazooClient(hosts='127.0.0.1:'+str(cport))
    zk.start()
    zk.stop()
Esempio n. 9
0
 def _open(self):
     conninfo = self.connection.client
     self.vhost = os.path.join('/', conninfo.virtual_host[0:-1])
     hosts = []
     if conninfo.alt:
         for host_port in conninfo.alt:
             if host_port.startswith('zookeeper://'):
                 host_port = host_port[len('zookeeper://'):]
             if not host_port:
                 continue
             try:
                 host, port = host_port.split(':', 1)
                 host_port = (host, int(port))
             except ValueError:
                 if host_port == conninfo.hostname:
                     host_port = (host_port, conninfo.port or DEFAULT_PORT)
                 else:
                     host_port = (host_port, DEFAULT_PORT)
             hosts.append(host_port)
     host_port = (conninfo.hostname, conninfo.port or DEFAULT_PORT)
     if host_port not in hosts:
         hosts.insert(0, host_port)
     conn_str = ','.join(['%s:%s' % (h, p) for h, p in hosts])
     conn = KazooClient(conn_str)
     conn.start()
     return conn
Esempio n. 10
0
 def kafka_save(key,content,kafka,pre=''):
     zookeeper = KazooClient()
     zookeeper.start()
     cluster = Cluster(zookeeper)
     topic = cluster.topics['topicname']
     topic.publish('msg')
     pass
Esempio n. 11
0
 def setup(self):
     zk = KazooClient(hosts=self.addr)
     zk.start()
     self.zk = zk
     cfg = self.app.cfg
     log = cfg.logger_class(cfg)
     self.log = log
Esempio n. 12
0
def resolve_master(
      cluster_url, master_callback=lambda: True, termination_callback=lambda: True, zk_client=None):
  """
    Resolve the MySQL cluster master's endpoint from the given URL for this cluster.
    :param cluster_url: The ZooKeeper URL for this cluster.
    :param master_callback: A callback method with one argument: the ServiceInstance for the elected
                            master.
    :param termination_callback: A callback method with no argument. Invoked when the cluster
                                 terminates.
    :param zk_client: Use a custom ZK client instead of Kazoo if specified.
  """
  try:
    _, zk_servers, cluster_path = zookeeper.parse(cluster_url)
  except Exception as e:
    raise ValueError("Invalid cluster_url: %s" % e.message)

  if not zk_client:
    zk_client = KazooClient(zk_servers)
    zk_client.start()

  listener = ClusterListener(
      zk_client,
      cluster_path,
      None,
      master_callback=master_callback,
      termination_callback=termination_callback)
  listener.start()
Esempio n. 13
0
class ZKTestBase(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        utdocker.pull_image(zk_tag)

    def setUp(self):

        utdocker.create_network()
        utdocker.start_container(
            zk_name,
            zk_tag,
            env={
                "ZOO_MY_ID": 1,
                "ZOO_SERVERS": "server.1=0.0.0.0:2888:3888",
            },
            port_bindings={2181: 21811}
        )

        self.zk = KazooClient('127.0.0.1:21811')
        self.zk.start()

        self.zkauthed, _ = zkutil.kazoo_client_ext(
            {'hosts': '127.0.0.1:21811', 'auth': ('digest', 'xp', '123'),
             'acl': (('xp', '123', 'cdrwa'), ('foo', 'bar', 'rw'))})

        dd('start zk-test in docker')

    def tearDown(self):

        self.zk.stop()
        self.zkauthed.stop()
        utdocker.remove_container(zk_name)
Esempio n. 14
0
File: actor.py Progetto: tlvu/mochi
class ActorAddressBook(object):
    def __init__(self, zk_hosts, timeout=60.0):
        self.retry = KazooRetry(max_tries=10)
        self.zk = KazooClient(hosts=zk_hosts, timeout=timeout)
        self.zk.start()

    def lookup(self, path):
        return self.retry(self._lookup, path)

    def _lookup(self, path):
        actor_url, stat = self.zk.get(path)
        return RemoteActor(actor_url.decode('utf-8'))

    def register(self, path, actor_url):
        return self.retry(self._register, path, actor_url)

    def _register(self, path, actor_url):
        self.zk.ensure_path(path)
        self.zk.set(path, actor_url.encode('utf-8'))

    def delete(self, path):
        self.zk.delete(path, recursive=True)

    def __del__(self):
        self.zk.stop()
Esempio n. 15
0
    def init_codis_info(self):
        if self.has_init():
            return

        # start zookeeper client
        zk_client = KazooClient(hosts=self.zk_addr)
        zk_client.start()

        # get codis server information
        zk_servers_dir = "/zk/codis/db_%s/servers" % self.product_name
        for zk_server in zk_client.get_children(zk_servers_dir):
            zk_server_path = '/'.join((zk_servers_dir, zk_server))
            for server in zk_client.get_children(zk_server_path):
                server_path = '/'.join((zk_server_path, server))
                data, stat = zk_client.get(server_path)
                server_info = json.loads(data)
                group_id = server_info.get('group_id')
                server_type = server_info.get('type')
                server_addr = server_info.get('addr')
                self.add_codis_server(group_id, server_type, server_addr)

        # get codis proxy information
        zk_proxy_dir = "/zk/codis/db_%s/proxy" % self.product_name
        for zk_proxy in zk_client.get_children(zk_proxy_dir):
            zk_proxy_path = '/'.join((zk_proxy_dir, zk_proxy))
            data, stat = zk_client.get(zk_proxy_path)
            proxy_info = json.loads(data)
            self.add_proxy(proxy_info['id'], proxy_info['addr'], proxy_info['debug_var_addr'], proxy_info['state'])

        self.redis_client.init_connection(self.get_group_info(), self.get_proxy_info())
        self.init_done()
        return None
Esempio n. 16
0
def main():
  """ Starts the groomer. """
  logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)

  parser = argparse.ArgumentParser()
  parser.add_argument('-v', '--verbose', action='store_true',
                      help='Output debug-level logging')
  args = parser.parse_args()

  if args.verbose:
    logger.setLevel(logging.DEBUG)

  zk_hosts = appscale_info.get_zk_node_ips()
  zk_client = KazooClient(hosts=','.join(zk_hosts),
                          connection_retry=ZK_PERSISTENT_RECONNECTS,
                          command_retry=KazooRetry(max_tries=-1))
  zk_client.start()

  db_access = DatastoreProxy()

  thread_pool = ThreadPoolExecutor(4)

  TransactionGroomer(zk_client, db_access, thread_pool)
  logger.info('Starting transaction groomer')

  IOLoop.current().start()
Esempio n. 17
0
    def expire_session(self, client_id=None):
        """Force ZK to expire a client session

        :param client_id: id of client to expire. If unspecified, the id of
                          self.client will be used.

        """
        client_id = client_id or self.client.client_id

        lost = threading.Event()
        safe = threading.Event()

        def watch_loss(state):
            if state == KazooState.LOST:
                lost.set()
            if lost.is_set() and state == KazooState.CONNECTED:
                safe.set()
                return True

        self.client.add_listener(watch_loss)

        # Sometimes we have to do this a few times
        attempts = 0
        while attempts < 5 and not lost.is_set():
            client = KazooClient(self.hosts, client_id=client_id, timeout=0.8)
            client.start()
            client.stop()
            lost.wait(5)
            attempts += 1
        # Wait for the reconnect now
        safe.wait(15)
        self.client.retry(self.client.get_async, '/')
Esempio n. 18
0
    def __setstate__(self, state):
        hosts = state.pop('client')
        client = KazooClient(hosts)
        client.start()

        self.__dict__ = state
        self.client = client
  def from_task(self, task, sandbox):
    data = json.loads(task.data)
    cluster_name, host, port, zk_url = data['cluster'], data['host'], data['port'], data['zk_url']
    _, servers, path = parse(zk_url)
    kazoo = KazooClient(servers)
    kazoo.start()
    self_instance = ServiceInstance(Endpoint(host, port))

    try:
      task_control = self._task_control_provider.from_task(task, sandbox)
      installer = self._installer_provider.from_task(task, sandbox)
      backup_store = self._backup_store_provider.from_task(task, sandbox)
    except (TaskControl.Error, PackageInstaller.Error) as e:
      kazoo.stop()  # Kazoo needs to be cleaned up. See kazoo/issues/217.
      raise TaskError(e.message)

    state_manager = StateManager(sandbox, backup_store)

    return MysosTaskRunner(
        self_instance,
        kazoo,
        get_cluster_path(path, cluster_name),
        installer,
        task_control,
        state_manager)
Esempio n. 20
0
def get_zoo_client(cluster_name="qconf"):
    """get zoo client by cluster_name
    """
    global ZOO_CLIENTS

    if cluster_name not in ZOO_CLIENTS:
        # get zookeeper hosts info
        zookeeper = ZdZookeeper.one(cluster_name=cluster_name, deleted="0")
        if not zookeeper:
            raise ZookeeperConfError("Zookeeper not configured for cluster: {}!".format(cluster_name))
        # connect to zookeeper
        try:
            client = KazooClient(hosts=zookeeper.hosts,
                                 connection_retry={"max_tries": 3, "backoff": 2})
            client.start(3)
            ZOO_CLIENTS[cluster_name] = client
        except KazooTimeoutError as exc:
            log.error('Failed to connnect zookeeper, %s', str(exc))
            return

    # check connection's state, if not connected, reconect
    zoo_client = ZOO_CLIENTS[cluster_name]
    if not zoo_client.connected:
        zoo_client.restart()
    return zoo_client
Esempio n. 21
0
File: zk.py Progetto: BITDM/pinot
class PinotZk(object):

  def __init__(self, config, logger, fabric):
    self.config = config
    self.fabric = fabric
    self.logger = logger
    self.zk = None

  def get_handle(self):
    host = self.config.get_zk_host(self.fabric)

    if not self.zk:
      try:
        self.zk = KazooClient(hosts=host)
        self.zk.start()
      except kazoo.exceptions.KazooException:
        error = 'Failed connecting to zk  {0}'.format(host)
        self.logger.exception(error)
        raise PinotException(error)

    return self.zk

  def close(self):
    if self.zk:
      self.zk.stop()
      self.zk.close()
Esempio n. 22
0
def init_hierarchy(hosts, hierarchy, users, auth):

    zkcli = KazooClient(hosts)
    zkcli.start()

    scheme, name, passw = auth
    zkcli.add_auth(scheme, name + ':' + passw)

    def _init_hierarchy(hierarchy, parent_path):

        if len(hierarchy) == 0:
            return

        for node, attr_children in hierarchy.items():
            val = attr_children.get('__val__', {})
            val = utfjson.dump(val)
            acl = attr_children.get('__acl__')

            path = _init_node(zkcli, parent_path, node, val, acl, users)
            children = {k: v
                        for k, v in attr_children.items()
                        if k not in ('__val__', '__acl__')
                        }

            _init_hierarchy(children, path)

    _init_hierarchy(hierarchy, '/')
    close_zk(zkcli)
Esempio n. 23
0
def chunk(args=None):
    args = chunk_parser.parse_args(args)

    # Log verbosity
    verbosity = args.verbose - args.quiet
    if args.debug:
        log_level = logging.DEBUG - verbosity*10
    else:
        log_level = logging.WARN - verbosity*10

    logging.basicConfig(level=log_level)
    logging.getLogger('kazoo.client').setLevel(log_level + 20)

    # Zookeeper servers
    if len(args.servers):
        zk_hosts = ','.join(args.servers)
    else:
        zk_hosts = '127.0.0.1:2181'

    # Zookeeper client
    zk = KazooClient(hosts=zk_hosts)

    zk.start()

    # ChunkServer
    cs = HTTPChunkServer(zk=zk, addr=(args.host,args.port), cache_path=args.chunk_cache, hash_data=args.hash_data)
    cs.run()

    # Cleanup
    zk.stop()
Esempio n. 24
0
class Exhibitor:

    def __init__(self, exhibitor, chroot):
        self.chroot = chroot
        self.exhibitor = ExhibitorEnsembleProvider(exhibitor['hosts'], exhibitor['port'], poll_interval=30)
        self.client = KazooClient(hosts=self.exhibitor.zookeeper_hosts + self.chroot,
                                  command_retry={
                                      'deadline': 10,
                                      'max_delay': 1,
                                      'max_tries': -1},
                                  connection_retry={'max_delay': 1, 'max_tries': -1})
        self.client.add_listener(self.session_listener)
        self.client.start()

    def session_listener(self, state):
        pass

    def _poll_exhibitor(self):
        if self.exhibitor.poll():
            self.client.set_hosts(self.exhibitor.zookeeper_hosts + self.chroot)

    def get(self, *params):
        self._poll_exhibitor()
        return self.client.retry(self.client.get, *params)

    def get_children(self, *params):
        self._poll_exhibitor()
        try:
            return self.client.retry(self.client.get_children, *params)
        except NoNodeError:
            return []
Esempio n. 25
0
def main():
  global datastore_path
  global deployment_config

  logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)

  parser = argparse.ArgumentParser()
  parser.add_argument('-p', '--port', type=int, default=DEFAULT_PORT,
                      required=True, help="The blobstore server's port")
  parser.add_argument('-d', '--datastore-path', required=True,
                      help='The location of the datastore server')
  args = parser.parse_args()

  datastore_path = args.datastore_path
  zk_ips = appscale_info.get_zk_node_ips()
  zk_client = KazooClient(hosts=','.join(zk_ips))
  zk_client.start()
  deployment_config = DeploymentConfig(zk_client)
  setup_env()

  http_server = tornado.httpserver.HTTPServer(
    Application(), max_buffer_size=MAX_REQUEST_BUFF_SIZE, xheaders=True)

  http_server.listen(args.port)

  # Make sure this server is accessible from each of the load balancers.
  secret = appscale_info.get_secret()
  for load_balancer in appscale_info.get_load_balancer_ips():
    acc = AppControllerClient(load_balancer, secret)
    acc.add_routing_for_blob_server()

  logger.info('Starting BlobServer on {}'.format(args.port))
  tornado.ioloop.IOLoop.instance().start()
Esempio n. 26
0
def main_loop():
    logging.basicConfig()

    zk = KazooClient(hosts=zk_connect_string)
    zk.start()

    # make sure the root folders for the sendgraph and the schedules exist
    zk.ensure_path(metrics_zk_path)
    zk.ensure_path(schedule_zk_path)

    for topology in zk.get_children(metrics_zk_path):
        topology_metrics_zk_path = metrics_zk_path + "/" + topology
        print("registering watcher schedule for " + topology_metrics_zk_path)

        # register a data watch for each
        def watchFunc(data, stat, event):
            #print("watch called")
            if event is not None and event.type == EventType.CHANGED:
                print("new sendgraph data for {0} at {1}".format(topology, byteArrayToInt(data)))
                schedule(zk, topology)
            return True  # returning false will disable the watch

        # install data watch
        #DataWatch(zk, topology_metrics_zk_path, func=watchFunc)

        # if there is some data already, schedule immediately
        if len(zk.get_children(topology_metrics_zk_path)):
            print("existing sendgraph data for {0}".format(topology))
            schedule(zk, topology)
Esempio n. 27
0
def get_children_data(ensemble, namespace, read_only=True):
  hdfs = cluster.get_hdfs()
  if hdfs is None:
    raise PopupException(_('No [hdfs] configured in hue.ini.'))

  if hdfs.security_enabled:
    sasl_server_principal = PRINCIPAL_NAME.get()
  else:
    sasl_server_principal = None

  zk = KazooClient(hosts=ensemble, read_only=read_only, sasl_server_principal=sasl_server_principal)

  zk.start()

  children_data = []

  children = zk.get_children(namespace)

  for node in children:
    data, stat = zk.get("%s/%s" % (namespace, node))
    children_data.append(data)

  zk.stop()

  return children_data
Esempio n. 28
0
def mkfs(args=None):
    args = mkfs_parser.parse_args(args)

    # Log verbosity
    verbosity = args.verbose - args.quiet
    log_level = logging.WARN - verbosity*10

    logging.basicConfig(level=log_level)
    logging.getLogger('kazoo.client').setLevel(log_level + 20)

    # ZK Path of filesystem root
    zk_root = posixpath.join(FILESYSTEMS, args.name)

    # Zookeeper
    if len(args.servers):
        zk_hosts = ','.join(args.servers)
    else:
        zk_hosts = '127.0.0.1:2181'
    zk = KazooClient(hosts=zk_hosts)

    zk.start()

    # Run
    ClowderFS.mkfs(zk=zk, fs_root=zk_root, chunk_size=args.chunk_size)

    # Cleanup
    zk.stop()
Esempio n. 29
0
    def run(self):
        zk = KazooClient(hosts='%s:%d' % (self.options.host, self.options.port),
                         read_only=True, timeout=3)

        try:
            zk.start()

            options = vars(self.options)
            options.update({
                'system.hostname': socket.gethostname()
            })

            if self.options.regex:
                content, stats = zk.get(self.options.file)

                options['stats'] = stats

                m = re.search(self.options.regex, content, re.MULTILINE | re.DOTALL)

                if m:
                    options.update(m.groupdict())

                    self.ok(self.options.message.format(**options))
                else:
                    self.critical(self.options.message.format(**options))
            elif zk.exists(self.options.file):
                self.ok(self.options.message.format(**options))
            else:
                self.critical(self.options.message.format(**options))
        except Exception as ex:
            self.critical(ex)
        finally:
            zk.stop()
Esempio n. 30
0
def processTransfer():
    try:
        conn = psycopg2.connect(dbConnectStr)
        cur = conn.cursor()
        zk = KazooClient(hosts=zkHost)
        zk.start()
        transferq = LockingQueue(zk, '/transfer/')
        while True:
            rawCode = transferq.get()
            proposal = rawCode.decode().strip()
            transferq.consume()

            # print(" proposal = {0} ".format(proposal))
            ints = datetime.now()
            inload = os.getloadavg()[0]
            pro1 = Popen(['/usr/bin/python36', './processproptran.py', proposal], stdin=None, stdout=None)
            pro1.wait()

            outts = datetime.now()
            outload = os.getloadavg()[0]
            # insert the runtime info into c*
            cluster = Cluster(cfg.cassCluster)
            session = cluster.connect(cfg.cassKeyspace)
            stmt = SimpleStatement("""insert into runstat(id,executable,ints,inload,outts,outload)
            values (%s, %s, %s, %s, %s, %s)""", consistency_level=ConsistencyLevel.ANY)
            session.execute(stmt, (uuid.uuid4(), executable, ints, inload, outts, outload))
    except psycopg2.Error as err:
        print("SQLError {0}".format(err))
    finally:
        zk.stop()
        zk.close()
        cur.close()
        conn.close()
Esempio n. 31
0
import os
import uuid
import os
import time
import subprocess
import threading
import sqlite3 as sqlite3
from kazoo.client import KazooClient
from kazoo.client import KazooState

logging.basicConfig()

new_master = 0

zk = KazooClient(hosts='zoo:2181', timeout=1.0)
zk.start(timeout=1)

# get cid, pid of container running this code
cmd = "cat /proc/self/cgroup | grep 'docker' | sed 's/^.*\///' | tail -n1"
cid = subprocess.check_output(cmd, shell=True)
cid = cid.decode("utf-8")
cid = cid[0:12]
client2 = docker.APIClient()
pid = client2.inspect_container(cid)['State']['Pid']
print("---PID---", pid)
print("---CID---", cid)

zk.ensure_path("/worker")
if zk.exists("/worker/slave"):
    print("Slave exists")
else:
Esempio n. 32
0
#!/usr/bin/env python
import pika
import sys
import json
import sqlite3
import datetime
from sqlalchemy import create_engine,and_,Column,Integer,String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
import logging
from kazoo.client import KazooClient

logging.basicConfig()
zk = KazooClient(hosts='zoo:2181')
zk.start()
zk.ensure_path("/slave")
zk.create("/slave/"+ str(random()),b"",ephemeral=True)

Base = declarative_base()

class User(Base):
    __tablename__ = "User"
    username = Column("username",String(50), primary_key=True)
    password = Column("password",String(50), primary_key=True)
class Rideshare(Base):
    __tablename__ = "Rideshare"
    rideid = Column("rideid",Integer, primary_key=True,autoincrement=True)
    created_by = Column("created_by",String(50))
    timestamp = Column("timestamp",String(50))
    source = Column("source",String(50))
    dest = Column("dest",String(50))
Esempio n. 33
0
    def check(self, instance):
        consumer_groups = self.read_config(instance, 'consumer_groups',
                                           cast=self._validate_consumer_groups)
        zk_connect_str = self.read_config(instance, 'zk_connect_str')
        kafka_host_ports = self.read_config(instance, 'kafka_connect_str')

        # Construct the Zookeeper path pattern
        zk_prefix = instance.get('zk_prefix', '')
        zk_path_tmpl = zk_prefix + '/consumers/%s/offsets/%s/%s'

        # Connect to Zookeeper
        zk_conn = KazooClient(zk_connect_str, timeout=self.zk_timeout)
        zk_conn.start()

        try:
            # Query Zookeeper for consumer offsets
            consumer_offsets = {}
            topics = defaultdict(set)
            for consumer_group, topic_partitions in consumer_groups.iteritems():
                for topic, partitions in topic_partitions.iteritems():
                    # Remember the topic partitions that we've see so that we can
                    # look up their broker offsets later
                    topics[topic].update(set(partitions))
                    for partition in partitions:
                        zk_path = zk_path_tmpl % (consumer_group, topic, partition)
                        try:
                            consumer_offset = int(zk_conn.get(zk_path)[0])
                            key = (consumer_group, topic, partition)
                            consumer_offsets[key] = consumer_offset
                        except NoNodeError:
                            self.log.warn('No zookeeper node at %s' % zk_path)
                        except Exception:
                            self.log.exception('Could not read consumer offset from %s' % zk_path)
        finally:
            try:
                zk_conn.stop()
                zk_conn.close()
            except Exception:
                self.log.exception('Error cleaning up Zookeeper connection')

        # Connect to Kafka
        kafka_conn = KafkaClient(kafka_host_ports, timeout=self.kafka_timeout)

        try:
            # Query Kafka for the broker offsets
            broker_offsets = {}
            for topic, partitions in topics.items():
                offset_responses = kafka_conn.send_offset_request([
                    OffsetRequest(topic, p, -1, 1) for p in partitions])

                for resp in offset_responses:
                    broker_offsets[(resp.topic, resp.partition)] = resp.offsets[0]
        finally:
            try:
                kafka_conn.close()
            except Exception:
                self.log.exception('Error cleaning up Kafka connection')

        # Report the broker data
        for (topic, partition), broker_offset in broker_offsets.items():
            broker_tags = ['topic:%s' % topic, 'partition:%s' % partition]
            broker_offset = broker_offsets.get((topic, partition))
            self.gauge('kafka.broker_offset', broker_offset, tags=broker_tags)

        # Report the consumer
        for (consumer_group, topic, partition), consumer_offset in consumer_offsets.items():

            # Get the broker offset
            broker_offset = broker_offsets.get((topic, partition))

            # Report the consumer offset and lag
            tags = ['topic:%s' % topic, 'partition:%s' % partition,
                    'consumer_group:%s' % consumer_group]
            self.gauge('kafka.consumer_offset', consumer_offset, tags=tags)
            self.gauge('kafka.consumer_lag', broker_offset - consumer_offset,
                       tags=tags)
Esempio n. 34
0
class ZChunkserver:
    def __init__(self, zoo_ip='localhost:2181'):
        self.chunktable = {}
        self.chunkloc = None
        self.master = zerorpc.Client()
        self.zookeeper = KazooClient(zoo_ip)

        # register with zookeeper, get IP of master
        # TODO:  need to add handling in case master is down here
        try:
            self.master_ip = self._register_with_zookeeper()
            print 'Chunkserver %d Connecting to master at %s' % (int(
                self.chunkloc), self.master_ip)
            self.master.connect(self.master_ip)
        except NoNodeError:
            print "No master record in zookeeper"
            raise  # TODO handle shadow master/waiting for master to reconnect later
        except Exception as e:
            print "Unexpected error connecting to master:"
            print e.__doc__, e.message

        # local directory where chunks are stored
        self.local_filesystem_root = "/tmp/gfs/chunks/"  #+ repr(int(self.chunkloc))
        if not os.access(self.local_filesystem_root, os.W_OK):
            os.makedirs(self.local_filesystem_root)

    def _register_with_zookeeper(self):
        def my_listener(state):
            if state == KazooState.LOST or state == KazooState.SUSPENDED:
                print "suspended|lost state"
                # TODO connect to zookeeper again

        try:
            self.zookeeper.start()
            self.zookeeper.add_listener(my_listener)
            self.zookeeper.ensure_path('chunkserver')
            master_ip = self.zookeeper.get('master')[0].split('@')[-1]

            path = self.zookeeper.create('chunkserver/',
                                         ephemeral=True,
                                         sequence=True)
            self.chunkloc = path.replace('/chunkserver/', '')

            data = '{username}@{tcpip}'.format(
                username=getpass.getuser(),
                tcpip=zutils.get_tcp(4400 + int(self.chunkloc)))

            # self.zookeeper.set(path, zutils.get_tcp(4400 + int(self.chunkloc)))
            self.zookeeper.set(path, data)

        except Exception as e:
            print "Exception while registering with zookeeper: %s, %s" % (
                type(e).__name__, e.args)

        return master_ip

    def print_name(self):
        """
        Prints name to test connectivity
        """
        print 'I am chunkserver #' + str(int(self.chunkloc))
        self.master.answer_server(int(self.chunkloc))

    def write(self, chunkuuid, chunk, forward=None):
        local_filename = self.chunk_filename(chunkuuid)
        try:
            with open(local_filename, "wb") as f:
                f.write(chunk)
                self.chunktable[chunkuuid] = local_filename
        except:
            return False

        #print "forward is ", forward
        if forward:
            print "Forwarding chunk to loc", forward
            self.send_chunk(chunkuuid, str([forward]), chunk)
        return xxhash.xxh64(chunk).digest()

    def close(self):
        self.master.close()

    @staticmethod
    def get_stats():

        results = []
        pattern = r' \d+[\.]?\d*'
        first = ['ifstat', '-q', '-i', 'enP0s3', '-S', '0.2',
                 '1']  # get network traffic
        second = ['df', '/']  # get free space
        p1 = subprocess.Popen(first, stdout=subprocess.PIPE)
        p2 = subprocess.Popen(second, stdout=subprocess.PIPE)

        # get transfer speed and parse results
        transfer_speed = p1.communicate()[0]
        transfer_speed = re.findall(pattern, transfer_speed)
        results.append(sum([float(num) for num in transfer_speed]))

        # get storage info and parse results
        storage = p2.communicate()[0]
        storage = re.findall(r'\d+%', storage)  # find entry with %
        results.append(int(storage[0][:-1]))  # append entry without %

        return results

    ##############################################################################

    def rwrite(self, chunkuuid, chunk):
        local_filename = self.chunk_filename(chunkuuid)
        try:
            with open(local_filename, "wb") as f:
                f.write(chunk)
            self.chunktable[chunkuuid] = local_filename
            return True
        except:
            return False

    def read(self, chunkuuid):
        data = None
        local_filename = self.chunk_filename(chunkuuid)
        with open(local_filename, "rb") as f:
            data = f.read()
        return data

    def _establish_connection(self, chunkloc):
        chunkservers = self.master.get('chunkservers')
        zclient = zerorpc.Client()
        print 'Server connecting to chunkserver at %s' % chunkloc
        zclient.connect(chunkservers[chunkloc])
        #zclient.print_name()
        return zclient

    def delete(self, chunkuuids):
        for chunkid in chunkuuids:
            filename = self.chunk_filename(chunkid)
            try:
                if os.path.exists(filename):
                    print "Removing " + filename
                    os.remove(filename)
                    return True
            except:
                None

    def disp(self, a):
        print str(a) + str(self.chunkloc)

    def chunk_filename(self, chunkuuid):
        local_filename = self.local_filesystem_root + "/" + str(
            chunkuuid) + '.gfs'
        return local_filename

    def copy_chunk(self, chunkid, chunklocs):
        chunklocs = ast.literal_eval(chunklocs)
        flag = False
        for chunkloc in chunklocs:
            try:
                chunkserver = self._establish_connection(chunkloc)
                # TODO md5 check
                data = chunkserver.read(chunkid)
                flag = self.rwrite(chunkid, data)
                if flag:
                    break
            except Exception as e:
                flag = False
                print "some error happend in copy_chunk", type(
                    e).__name__, e.args

        return flag

    def send_chunk(self, chunkid, chunklocs, data):
        chunklocs = ast.literal_eval(chunklocs)
        flag = False
        for chunkloc in chunklocs:
            try:
                chunkserver = self._establish_connection(chunkloc)
                flag = chunkserver.rwrite(chunkid, data)
                if flag:
                    break
            except Exception as e:
                flag = False
                self.master.print_exception('sending chunk', None,
                                            type(e).__name__)

        return flag

    def rename(self, chunkids, filename, newfilename):
        for chunkid in chunkids:
            local_filename = self.chunk_filename(chunkid)
            new_local_filename = local_filename.split('/')
            new_local_filename[-1] = new_local_filename[-1].replace(
                filename, newfilename)
            new_local_filename = '/'.join(new_local_filename)
            print "Changing %s to %s" % (local_filename, new_local_filename)
            try:
                os.rename(local_filename, new_local_filename)
            except:
                os.remove(new_local_filename)
                os.rename(local_filename, new_local_filename)
        return True

    def populate(self):
        #print "in populate, chunkloc=", self.chunkloc
        local_dir = self.chunk_filename("").replace(".gfs", "")
        #print "local dir is ", local_dir
        file_list = os.listdir(local_dir)
        if len(file_list) != 0:
            files = {}
            for items in file_list:
                # TODO
                # if master.exists
                # read all chunks (in parallel?)
                # if any xxhash is not the same, os.delete()
                # else add as regular
                items = items.replace(".gfs", "")
                filename = items.split("$%#")[0]
                self.chunktable[items] = self.chunk_filename(items)
                try:
                    files[filename].append(items)
                except:
                    files[filename] = []
                    files[filename].append(items)

            #print "files=%s, chunkloc=%s" % (files, self.chunkloc)
            # self.master.populate(files, str(self.chunkloc))
            return files, self.chunkloc
        else:
            print "nothing to populate"
            return None, None
Esempio n. 35
0
def zk_client():
    zk_client = KazooClient(hosts=os.environ.get('ZK_HOST'))
    zk_client.start()
    real_client.stop()
    #rm_files_from(UPLOAD_DIR)
    yield zk_client
Esempio n. 36
0
class ZookeeperDatabase(Database):
    # used as prefix for key, to namespace all queries
    hosts: List[str]
    namespace: str
    timeout: Numeric(
        1, 60) = 5.  # request timeout in seconds (tries another host) [s]
    ssl: Optional[SSL] = None

    def __post_init__(self):
        from kazoo.client import KazooClient

        if self.ssl:
            if isinstance(self.ssl.server_verify, str):
                self._client = KazooClient(
                    hosts=self.hosts,
                    timeout=self.timeout,
                    use_ssl=True,
                    verify_certs=True,
                    ca=self.ssl.server_verify,
                    certfile=self.ssl.client_cert_path,
                    keyfile=self.ssl.client_key_path,
                )
            elif isinstance(self.ssl.server_verify, bool):
                self._client = KazooClient(
                    hosts=self.hosts,
                    timeout=self.timeout,
                    use_ssl=True,
                    verify_certs=self.ssl.server_verify,
                    certfile=self.ssl.client_cert_path,
                    keyfile=self.ssl.client_key_path,
                )

            else:
                raise ValidationError(
                    'SSL server verify must be type of Path or boolean!')
        else:
            self._client = KazooClient(hosts=self.hosts, timeout=self.timeout)

        self._client.start()

    def set(self, key: bytes, value: bytes):
        _validate_key(key)
        _validate_value(value)

        formatted_key = key.decode('ascii')

        full_path = os.path.join(self.namespace, formatted_key)

        self._client.ensure_path(full_path)

        self._client.set(full_path, value)

    def get(self, key: bytes) -> bytes:
        from kazoo.exceptions import NoNodeError
        _validate_key(key)

        formatted_key = key.decode('ascii')

        full_path = os.path.join(self.namespace, formatted_key)

        try:
            data = self._client.get(full_path)
            return bytes(data[0])
        except NoNodeError:
            return None
Esempio n. 37
0
#! /usr/bin/python
# -*- coding:utf-8 -*-
# @zhuchen    : 2020/4/8 20:44

from kazoo.client import KazooClient

client = KazooClient()
client.start()


class ZkHosts:

    go_host = []
    python_host = []


zk_host = ZkHosts()


@client.ChildrenWatch('/zhuchen/golang')
def golang_watch(*args):
    print('golang update')
    hosts = args[0] if args else []
    new_hosts = []
    for host_name in hosts:
        d, _ = client.get(f'/zhuchen/golang/{host_name}')
        new_hosts.append(d.decode())
    zk_host.go_host = new_hosts


@client.DataWatch('/zhuchen/python')
Esempio n. 38
0
class ZooKeeperJobStore(BaseJobStore):
    """
    Stores jobs in a ZooKeeper tree. Any leftover keyword arguments are directly passed to
    kazoo's `KazooClient
    <http://kazoo.readthedocs.io/en/latest/api/client.html>`_.

    Plugin alias: ``zookeeper``

    :param str path: path to store jobs in
    :param client: a :class:`~kazoo.client.KazooClient` instance to use instead of
        providing connection arguments
    :param int pickle_protocol: pickle protocol level to use (for serialization), defaults to the
        highest available
    """
    def __init__(self,
                 path='/apscheduler',
                 client=None,
                 close_connection_on_exit=False,
                 pickle_protocol=pickle.HIGHEST_PROTOCOL,
                 **connect_args):
        super().__init__()
        self.pickle_protocol = pickle_protocol
        self.close_connection_on_exit = close_connection_on_exit

        if not path:
            raise ValueError('The "path" parameter must not be empty')

        self.path = path

        if client:
            self.client = maybe_ref(client)
        else:
            self.client = KazooClient(**connect_args)
        self._ensured_path = False

    def _ensure_paths(self):
        if not self._ensured_path:
            self.client.ensure_path(self.path)
        self._ensured_path = True

    def start(self, scheduler, alias):
        super().start(scheduler, alias)
        if not self.client.connected:
            self.client.start()

    def lookup_job(self, job_id):
        self._ensure_paths()
        node_path = os.path.join(self.path, job_id)
        try:
            content, _ = self.client.get(node_path)
            doc = pickle.loads(content)
            job = self._reconstitute_job(doc['job_state'])
            return job
        except BaseException:
            return None

    def get_due_jobs(self, now):
        timestamp = datetime_to_utc_timestamp(now)
        jobs = [
            job_def['job'] for job_def in self._get_jobs()
            if job_def['next_run_time'] is not None
            and job_def['next_run_time'] <= timestamp
        ]
        return jobs

    def get_next_run_time(self):
        next_runs = [
            job_def['next_run_time'] for job_def in self._get_jobs()
            if job_def['next_run_time'] is not None
        ]
        return utc_timestamp_to_datetime(
            min(next_runs)) if len(next_runs) > 0 else None

    def get_all_jobs(self):
        jobs = [job_def['job'] for job_def in self._get_jobs()]
        self._fix_paused_jobs_sorting(jobs)
        return jobs

    def add_job(self, job):
        self._ensure_paths()
        node_path = os.path.join(self.path, str(job.id))
        value = {
            'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
            'job_state': job.__getstate__()
        }
        data = pickle.dumps(value, self.pickle_protocol)
        try:
            self.client.create(node_path, value=data)
        except NodeExistsError:
            raise ConflictingIdError(job.id)

    def update_job(self, job):
        self._ensure_paths()
        node_path = os.path.join(self.path, str(job.id))
        changes = {
            'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
            'job_state': job.__getstate__()
        }
        data = pickle.dumps(changes, self.pickle_protocol)
        try:
            self.client.set(node_path, value=data)
        except NoNodeError:
            raise JobLookupError(job.id)

    def remove_job(self, job_id):
        self._ensure_paths()
        node_path = os.path.join(self.path, str(job_id))
        try:
            self.client.delete(node_path)
        except NoNodeError:
            raise JobLookupError(job_id)

    def remove_all_jobs(self):
        try:
            self.client.delete(self.path, recursive=True)
        except NoNodeError:
            pass
        self._ensured_path = False

    def shutdown(self):
        if self.close_connection_on_exit:
            self.client.stop()
            self.client.close()

    def _reconstitute_job(self, job_state):
        job_state = job_state
        job = Job.__new__(Job)
        job.__setstate__(job_state)
        job._scheduler = self._scheduler
        job._jobstore_alias = self._alias
        return job

    def _get_jobs(self):
        self._ensure_paths()
        jobs = []
        failed_job_ids = []
        all_ids = self.client.get_children(self.path)
        for node_name in all_ids:
            try:
                node_path = os.path.join(self.path, node_name)
                content, _ = self.client.get(node_path)
                doc = pickle.loads(content)
                job_def = {
                    'job_id':
                    node_name,
                    'next_run_time':
                    doc['next_run_time'] if doc['next_run_time'] else None,
                    'job_state':
                    doc['job_state'],
                    'job':
                    self._reconstitute_job(doc['job_state']),
                    'creation_time':
                    _.ctime
                }
                jobs.append(job_def)
            except BaseException:
                self._logger.exception(
                    'Unable to restore job "%s" -- removing it' % node_name)
                failed_job_ids.append(node_name)

        # Remove all the jobs we failed to restore
        if failed_job_ids:
            for failed_id in failed_job_ids:
                self.remove_job(failed_id)
        paused_sort_key = datetime(9999, 12, 31, tzinfo=utc)
        return sorted(jobs,
                      key=lambda job_def:
                      (job_def['job'].next_run_time or paused_sort_key,
                       job_def['creation_time']))

    def __repr__(self):
        self._logger.exception('<%s (client=%s)>' %
                               (self.__class__.__name__, self.client))
        return '<%s (client=%s)>' % (self.__class__.__name__, self.client)
Esempio n. 39
0
class ShellTestCase(unittest.TestCase):
    """ base class for all tests """
    @classmethod
    def setUpClass(cls):
        get_global_cluster().start()

    def setUp(self):
        """
        make sure that the prefix dir is empty
        """
        self.tests_path = os.getenv("ZKSHELL_PREFIX_DIR", "/tests")
        self.zk_hosts = ",".join(server.address
                                 for server in get_global_cluster())
        self.username = os.getenv("ZKSHELL_USER", "user")
        self.password = os.getenv("ZKSHELL_PASSWD", "user")
        self.digested_password = os.getenv("ZKSHELL_DIGESTED_PASSWD",
                                           "F46PeTVYeItL6aAyygIVQ9OaaeY=")
        self.super_password = os.getenv("ZKSHELL_SUPER_PASSWD", "secret")
        self.scheme = os.getenv("ZKSHELL_AUTH_SCHEME", "digest")

        self.client = KazooClient(self.zk_hosts, 5)
        self.client.start()
        self.client.add_auth(self.scheme, self.auth_id)
        if self.client.exists(self.tests_path):
            self.client.delete(self.tests_path, recursive=True)
        self.client.create(self.tests_path, str.encode(""))

        self.output = XStringIO()
        self.shell = Shell([self.zk_hosts],
                           5,
                           self.output,
                           setup_readline=False,
                           async=False)

        # Create an empty test dir (needed for some tests)
        self.temp_dir = tempfile.mkdtemp()

    @property
    def auth_id(self):
        return "%s:%s" % (self.username, self.password)

    @property
    def auth_digest(self):
        return "%s:%s" % (self.username, self.digested_password)

    def tearDown(self):
        if self.output is not None:
            self.output.close()
            self.output = None

        if self.shell is not None:
            self.shell._disconnect()
            self.shell = None

        if os.path.isdir(self.temp_dir):
            shutil.rmtree(self.temp_dir)

        if self.client is not None:
            if self.client.exists(self.tests_path):
                self.client.delete(self.tests_path, recursive=True)

            self.client.stop()
            self.client.close()
            self.client = None

    ###
    # Helpers.
    ##

    def create_compressed(self, path, value):
        """
        ZK Shell doesn't support creating directly from a bytes array so we use a Kazoo client
        to create a znode with zlib compressed content.
        """
        compressed = zlib.compress(bytes(value, "utf-8") if PYTHON3 else value)
        self.client.create(path, compressed, makepath=True)
Esempio n. 40
0
    def check(self, instance):
        """
        Check offset in kafka for consumer_groups,topics and partitions.


        Alt 1;
        You can ether specify consumer_groups, topics and partitions in
        config file like

        consumer_groups:
            my_consumer:
              my_topic: [0, 1, 4, 12]

        Alt 2;
        Ask zookeeper for the current configuration and use that, it will
        do this if no consumer_groups is specifyed in configuration.

        """

        zk_connect_str = self.read_config(instance, 'zk_connect_str')
        kafka_host_ports = self.read_config(instance, 'kafka_connect_str')

        # Construct the Zookeeper path pattern
        zk_prefix = instance.get('zk_prefix', '')
        # Connect to Zookeeper
        zk_conn = KazooClient(zk_connect_str)
        zk_conn.start()

        try:
            if instance.has_key('consumer_groups'):
                #Alt1, Only check the given consumer groups, topics and partions.
                consumer_groups = self.read_config(
                    instance,
                    'consumer_groups',
                    cast=self._validate_consumer_groups)

                (consumer_offsets, topics) = \
                    self._get_offsets_based_on_config(zk_conn, zk_prefix, consumer_groups)
            else:
                #Alt2, Non given lets ask zookeeper for a full set.
                (consumer_offsets, topics) = \
                    self._get_offsets_from_zk(zk_conn, zk_prefix)

        finally:
            try:
                zk_conn.stop()
                zk_conn.close()
            except Exception:
                self.log.exception('Error cleaning up Zookeeper connection')

        # Connect to Kafka
        kafka_conn = KafkaClient(kafka_host_ports)

        try:
            # Query Kafka for the broker offsets
            broker_offsets = {}
            for topic, partitions in topics.items():
                offset_responses = kafka_conn.send_offset_request(
                    [OffsetRequest(topic, p, -1, 1) for p in partitions])

                for resp in offset_responses:
                    broker_offsets[(resp.topic,
                                    resp.partition)] = resp.offsets[0]
        finally:
            try:
                kafka_conn.close()
            except Exception:
                self.log.exception('Error cleaning up Kafka connection')

        # Report the broker data
        for (topic, partition), broker_offset in broker_offsets.items():
            broker_tags = ['topic:%s' % topic, 'partition:%s' % partition]
            broker_offset = broker_offsets.get((topic, partition))
            self.gauge('kafka.broker_offset', broker_offset, tags=broker_tags)

        # Report the consumer
        for (consumer_group, topic,
             partition), consumer_offset in consumer_offsets.items():

            # Get the broker offset
            broker_offset = broker_offsets.get((topic, partition))

            # Report the consumer offset and lag
            tags = [
                'topic:%s' % topic,
                'partition:%s' % partition,
                'consumer_group:%s' % consumer_group
            ]
            self.gauge('kafka.consumer_offset', consumer_offset, tags=tags)
            self.gauge('kafka.consumer_lag',
                       broker_offset - consumer_offset,
                       tags=tags)
Esempio n. 41
0
 def get_kazoo_client(self, zoo_instance_name):
     zk = KazooClient(hosts=self.get_instance_ip(zoo_instance_name))
     zk.start()
     return zk
Esempio n. 42
0
class PartitionClient(object):
    """ Client Class for the Partition Library
    Example usage:
    ---------------------
    import libpartition
    from libpartition.libpartition import PartitionClient

    def own_change_cb(l):
            print "ownership change:" + str(l)

    c = PartitionClient("test", "s1", ["s1", "s2", "s3"], 32, 
            own_change_cb, "zookeeper_s1")

    ##do some real work now"
    if (c.own_partition(1)):
        ...... do something with partition #1 .....
        .........
    ...
    c.update_cluster_list(["s1", "s2"])
    ...
    ----------------------
    You should not call any partition library routine from within the 
    callback function

    Args:
        app_name(str): Name of the app for which partition cluster is used
        self_name(str): Name of the local cluster node (can be ip address)
        cluster_list(list): List of all the nodes in the cluster including 
            local node
        max_partition(int): Partition space always go from 0..max_partition-1
        partition_update_cb: Callback function invoked when partition
            ownership list is updated.x
        zk_server(str): <zookeeper server>:<zookeeper server port>
    """
    def __init__(
            self, app_name, self_name, cluster_list, max_partition,
            partition_update_cb, zk_server, logger = None):
       
        # Initialize local variables
        self._zk_server = zk_server
        self._cluster_list = set(cluster_list)
        self._max_partition = max_partition
        self._update_cb = partition_update_cb
        self._curr_part_ownership_list = []
        self._target_part_ownership_list = []
        self._con_hash = ConsistentHash(cluster_list)
        self._name = self_name

        # some sanity check
        if not(self._name in cluster_list):
            raise ValueError('cluster list is missing local server name')

        # initialize logging and other stuff
        if logger is None:
            logging.basicConfig()
            self._logger = logging
        else:
            self._logger = logger
        self._conn_state = None
        self._sandesh_connection_info_update(status='INIT', message='')

        # connect to zookeeper
        self._zk = KazooClient(zk_server)
        while True:
            try:
                self._zk.start()
                break
            except gevent.event.Timeout as e:
                # Update connection info
                self._sandesh_connection_info_update(status='DOWN',
                                                     message=str(e))
                gevent.sleep(1)
            # Zookeeper is also throwing exception due to delay in master election
            except Exception as e:
                # Update connection info
                self._sandesh_connection_info_update(status='DOWN',
                                                     message=str(e))
                gevent.sleep(1)
        # Update connection info
        self._sandesh_connection_info_update(status='UP', message='')
        # Done connecting to ZooKeeper

        # create a lock array to contain locks for each partition
        self._part_locks = []
        for part in range(0, self._max_partition):
            lockpath = "/lockpath/"+ app_name + "/" + str(part)
            l = self._zk.Lock(lockpath, self._name)
            self._part_locks.append(l)

        # initialize partition # to lock acquire greenlet dictionary
        self._part_lock_task_dict = {}
       
        self._logger.error("initial servers:" + str(self._cluster_list))

        # update target partition ownership list
        for part in range(0, self._max_partition):
            if (self._con_hash.get_node(str(part)) == self._name):
                self._target_part_ownership_list.append(part)

        # update current ownership list
        self._acquire_partition_ownership()

    #end __init__

    def _sandesh_connection_info_update(self, status, message):
        from pysandesh.connection_info import ConnectionState
        from pysandesh.gen_py.process_info.ttypes import ConnectionStatus, \
            ConnectionType
        from pysandesh.gen_py.sandesh.ttypes import SandeshLevel

        new_conn_state = getattr(ConnectionStatus, status)
        ConnectionState.update(conn_type = ConnectionType.ZOOKEEPER,
                name = 'Zookeeper', status = new_conn_state,
                message = message,
                server_addrs = self._zk_server.split(','))

        if (self._conn_state and self._conn_state != ConnectionStatus.DOWN and
                new_conn_state == ConnectionStatus.DOWN):
            msg = 'Connection to Zookeeper down: %s' %(message)
            self._logger.error(msg)
        if (self._conn_state and self._conn_state != new_conn_state and
                new_conn_state == ConnectionStatus.UP):
            msg = 'Connection to Zookeeper ESTABLISHED'
            self._logger.error(msg)

        self._conn_state = new_conn_state
    # end _sandesh_connection_info_update


    # following routine is the greenlet task function to acquire the lock
    # for a partition
    def _acquire_lock(self, part):
        # lock for the partition
        l = self._part_locks[part]

        # go in an infinite loop waiting to acquire the lock
        try:
            while True:
                ret = l.acquire(blocking=False)
                if ret == True:
                    self._logger.error("Acquired lock for:" + str(part))
                    self._curr_part_ownership_list.append(part)
                    self._update_cb(self._curr_part_ownership_list)
                    return True
                else:
                    gevent.sleep(1)
        except CancelledError:
            self._logger.error("Lock acquire cancelled for:" + str(part))
            return False
        except Exception as ex:
            # TODO: If we have a non-KazooException, the lock object
            #       may get stuck in the "cancelled" state
            self._logger.error("Lock acquire unexpected error!: " + str(ex))
            assert()
            return False
    #end _acquire_lock

    # get rid of finished spawned tasks from datastructures
    def _cleanup_greenlets(self):
        for part in self._part_lock_task_dict.keys():
            if (self._part_lock_task_dict[part].ready()):
                del self._part_lock_task_dict[part]
    #end _cleanup_greenlets 

    # following routine launches tasks to acquire partition locks
    def _acquire_partition_ownership(self):
        # cleanup any finished greenlets
        self._cleanup_greenlets()

        # this variable will help us decide if we need to call callback
        updated_curr_ownership = False 

        # list of partitions for which locks have to be released
        release_lock_list = []

        self._logger.error("known servers: %s" % self._con_hash.get_all_nodes())

        for part in range(0, self._max_partition):
            if (part in self._target_part_ownership_list):
                if (part in self._curr_part_ownership_list):
                    # do nothing, I already have ownership of this partition
                    self._logger.error("No need to acquire ownership of:" +
                            str(part))
                else:
                    # I need to acquire lock for this partition before I own
                    if (part in self._part_lock_task_dict.keys()):
                        try:
                            self._part_lock_task_dict[part].get(block=False)
                        except:
                            # do nothing there is already a greenlet running to
                            # acquire the lock
                            self._logger.error("Already a greenlet running to" 
                                    " acquire:" + str(part))
                            continue

                        # Greenlet died without getting ownership. Cleanup
                        self._logger.error("Cleanup stale greenlet running to" 
                                " acquire:" + str(part))
                        del self._part_lock_task_dict[part]

                    self._logger.error("Starting greenlet running to" 
                            " acquire:" + str(part))
                    # launch the greenlet to acquire the loc, k
                    g = Greenlet.spawn(self._acquire_lock, part)
                    self._part_lock_task_dict[part] = g

            else:
                # give up ownership of the partition

                # cancel any lock acquisition which is ongoing 
                if (part in self._part_lock_task_dict.keys()):
                    try:
                        self._part_lock_task_dict[part].get(block=False)
                    except:
                        
                        self._logger.error("canceling lock acquisition going on \
                            for:" + str(part))
                        # Cancelling the lock should result in killing the gevent
                        self._part_locks[part].cancel()
                        self._part_lock_task_dict[part].get(block=True)
                        
                    del self._part_lock_task_dict[part]
                        
                if (part in self._curr_part_ownership_list):
                    release_lock_list.append(part)
                    self._curr_part_ownership_list.remove(part)
                    updated_curr_ownership = True
                    self._logger.error("giving up ownership of:" + str(part))

        if (updated_curr_ownership is True):
            # current partition membership was updated call the callback 
            self._update_cb(self._curr_part_ownership_list)

        if (len(release_lock_list) != 0):
            # release locks which were acquired
            for part in release_lock_list:
                self._logger.error("release the lock which was acquired:" + \
                        str(part))
                try:
                    self._part_locks[part].release()
                    self._logger.error("fully gave up ownership of:" + str(part))
                except:
                    pass
    #end _acquire_partition_ownership

    def update_cluster_list(self, cluster_list):
        """ Updates the cluster node list
        Args:
            cluster_list(list): New list of names of the nodes in 
                the cluster
        Returns:
            None
        """
        # some sanity check
        if not(self._name in cluster_list):
            raise ValueError('cluster list is missing local server name')

        new_cluster_list = set(cluster_list)
        new_servers = list(new_cluster_list.difference(
            self._cluster_list))
        deleted_servers = list(set(self._cluster_list).difference(
            new_cluster_list)) 
        self._cluster_list = set(cluster_list)
        self._logger.error("deleted servers:" + str(deleted_servers))
        self._logger.error("new servers:" + str(new_servers))

        # update the hash structure
        if new_servers:
            self._con_hash.add_nodes(new_servers)
        if deleted_servers:
            self._con_hash.del_nodes(deleted_servers)

        # update target partition ownership list
        self._target_part_ownership_list = []
        for part in range(0, self._max_partition):
            if (self._con_hash.get_node(str(part)) == self._name):
                if not (part in self._target_part_ownership_list):
                    self._target_part_ownership_list.append(part)

        # update current ownership list
        self._acquire_partition_ownership()

    #end update_cluster_list

    def own_partition(self, part_no):
        """ Returns ownership information of a partition
        Args:
            part_no(int) : Partition no 
        Returns:
            True if partition is owned by the local node
            False if partition is not owned by the local node
        """
        return part_no in self._curr_part_ownership_list 
    #end own_partition

    def close(self):
        """ Closes any connections and frees up any data structures
        Args:
        Returns:
            None
        """
        # clean up greenlets
        for part in self._part_lock_task_dict.keys():
            try:
                self._part_lock_task_dict[part].kill()
            except:
                pass

        # close zookeeper
        try:
            self._zk.stop()
        except:
            pass
        try:
            self._zk.close()
        except:
            pass
Esempio n. 43
0
    def spoorer(self):  #连接kafka,获取topics
        try:
            kafka_client = SimpleClient(self.kafka_hosts, timeout=self.timeout)
            # print kafka_client.topics
        except Exception as e:
            print "Error, cannot connect kafka broker."
            sys.exit(1)
        else:
            kafka_topics = kafka_client.topics
        finally:
            kafka_client.close()

        #连接zk,获取当前消费进度current offset
        try:
            zookeeper_client = KazooClient(hosts=self.zookeeper_hosts, read_only=True, timeout=self.timeout)
            zookeeper_client.start()
        except Exception as e:
            print "Error, cannot connect zookeeper server."
            sys.exit(1)

        try:
            groups = map(str,zookeeper_client.get_children(self.zookeeper_url + 'consumers'))
        except NoNodeError as e:
            print "Error, invalid zookeeper url."
            zookeeper_client.stop()
            sys.exit(2)
        else:
            for group in groups:
                print group
                if 'offsets' not in zookeeper_client.get_children(self.zookeeper_url + 'consumers/%s' % group):continue
                topic_path = 'consumers/%s/offsets' % (group)
                print 22
                topics = map(str,zookeeper_client.get_children(self.zookeeper_url + topic_path))
                if len(topics) == 0: continue

                for topic in topics:
                    if topic not in self.white_topic_group.keys():
                        continue
                    elif group not in self.white_topic_group[topic].replace(' ','').split(','):
                        continue
                    partition_path = 'consumers/%s/offsets/%s' % (group,topic)
                    partitions = map(int,zookeeper_client.get_children(self.zookeeper_url + partition_path))
                    for partition in partitions:
                        base_path = 'consumers/%s/%s/%s/%s' % (group, '%s', topic, partition)
                        owner_path, offset_path = base_path % 'owners', base_path % 'offsets'
                        offset = zookeeper_client.get(self.zookeeper_url + offset_path)[0]

                        try:
                            owner = zookeeper_client.get(self.zookeeper_url + owner_path)[0]
                        except NoNodeError as e:
                            owner = 'null'
                        #消费进度放在字典metric中
                        metric = {'datetime':time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), 'topic':topic, 'group':group, 'partition':int(partition), 'logsize':None, 'offset':int(offset), 'lag':None, 'owner':owner}
                        self.result.append(metric)
                        print "ok"
        finally:
            zookeeper_client.stop()
        #获取每个分片的logsize(此处和原文不一样,做了修改)
        try:
            client = SimpleClient(self.kafka_hosts)
        except Exception as e:
            print "Error, cannot connect kafka broker."
            sys.exit(1)
        else:
            for kafka_topic in kafka_topics:
                self.kafka_logsize[kafka_topic] = {}
                partitions = client.topic_partitions[kafka_topic]
                offset_requests = [OffsetRequestPayload(kafka_topic, p, -1, 1) for p in partitions.keys()]
                offsets_responses = client.send_offset_request(offset_requests)
                for r in offsets_responses:
                    self.kafka_logsize[kafka_topic][r.partition] = r.offsets[0]

            #logsize减去current offset等于lag
        f1 = open(self.log_file,'a+')
        f2 = open(self.log_day_file,'a+')
        str1 = "hello"
        print 0
        # print self.result
        for metric in self.result:
            logsize = self.kafka_logsize[metric['topic']][metric['partition']]
            metric['logsize'] = int(logsize)
            metric['lag'] = int(logsize) - int(metric['offset'])
            f1.write(json.dumps(metric,sort_keys=True) + '\n')
            f1.write(str1)
            f1.flush()
            f2.write(json.dumps(metric,sort_keys=True) + '\n')
            f2.flush()
        # finally:
        client.close()
        print 3
        return ''
Esempio n. 44
0
class Publisher:
    # instantiate variables and connect to broker
    def __init__(self, ip_add, name=""):
        if name == "":
            self.name = ''.join(random.choices(string.ascii_uppercase + string.digits, k = 8))
        else:
            self.name = name
        self.history = ""
        self.kill = True
        self.topic = "Default"
        #self.full_add = "tcp://" + str(ip_add) + ":1234"
        self.context = zmq.Context()
        self.full_add = ""
        self.sock_pub = self.context.socket(zmq.PUB)
        #PRESIDENT ZNODE ADDRESS
        self.home = "/president/pres"

        self.zk_driver = KazooClient(hosts='127.0.0.1:2181')
        self.zk_driver.start()

        data, stat = self.zk_driver.get(self.home)
        ports = data.decode('ASCII').split(":")
        self.full_add = "tcp://" + str(ip_add) + ":" + ports[0]
        self.sock_pub.connect(self.full_add)


    # register a topic for this publisher
    def register_pub(self, topic):
        self.topic = topic
        msg = "REGISTER||" + str(self.topic) + "||" + str(self.name)
        ### CREATE ZNODE
        node_path = '/' + str(self.topic) + '/' + str(self.name)
        self.zk_driver.ensure_path('/' + str(self.topic) + '/')
        if not self.zk_driver.exists(node_path):
            self.zk_driver.create(node_path, b'0')
        time.sleep(1)
        print("Pub ID = ", self.name)
        self.sock_pub.send_string(msg)
        return True

    # publish the given information for pre-registered topic
    def publish(self, info):
        self.history = self.history + str(info) + "..."
        # format for published string is "topic||info"
        msg = str(self.topic) + "||" + self.history + "||" + str(self.name)
        #print("Time published: %.20f" % time.time())  # uncomment for measurements purposes
        self.sock_pub.send_string(msg)

        @self.zk_driver.DataWatch(self.home)
        def watch_node(data, stat, event):
            if event is not None and event.type == "CREATED" and self.kill:
                # DISCONNECT
                self.sock_pub.close()
                self.context.term()
                self.context = zmq.Context()
                self.sock_pub = self.context.socket(zmq.PUB)

                # RECONNECT WITH NEW PORT
                data, stat = self.zk_driver.get(self.home)
                ports = data.decode('ASCII').split(":")
                self.full_add = "tcp://" + str(ip_add) + ":" + ports[0]
                self.sock_pub.connect(self.full_add)
                self.kill = False
                print("Updated Broker! Input information about your topic and press enter to publish!")
        return True
Esempio n. 45
0
class Publish(object):

    _to_zip_node = dict()
    _to_syc_node = dict()
    _to_pub_node = dict()
    _server_list = dict()
    _root_node   = ''
    _zookeeper   = None

    def __init__(self, host = '127.0.0.1', port = 2181, root_node = '/jzqps'):
        self._root_node = root_node if root_node[0] == '/' else '/jzgps'
        self._zookeeper = KazooClient('%s:%s' % (host, port,))
        self._zookeeper.start()

        default_node = [
            self._root_node,
            self._root_node + '/server_list',
            self._root_node + '/to_zip_notice',
            self._root_node + '/to_zip_result',
            self._root_node + '/to_syc_notice',
            self._root_node + '/to_syc_result',
            self._root_node + '/to_pub_notice',
            self._root_node + '/to_pub_result',
            self._root_node + '/to_rol_notice',
            self._root_node + '/to_rol_result',
        ]

        default_node_value = json.dumps({'update_time' : Tools.g_time()})

        try:
            for node in default_node:
                if self._zookeeper.exists(node) is None:
                    self._zookeeper.create(node, default_node_value, makepath = True)
        except kazoo.exceptions.NodeExistsError:
            pass

    def server(self, server_node, now_timestamp):
        server_detail = self._zookeeper.get('%s/server_list/%s' % (self._root_node, server_node, ))
        if 0 != len(server_detail[0]):
            tmp_server_detail = json.loads(server_detail[0])
            if tmp_server_detail['update_time'] + 10 > now_timestamp:
                self._server_list[server_node] = tmp_server_detail
            elif self._server_list.get(server_node, None) is not None:
                del self._server_list[server_node]

        return self._server_list.get(server_node, None)

    def get_pub_node_id(self, pub_id):
        return 'v%s' % pub_id

    def get_server_list(self):
        server_list = []
        server_node = self._zookeeper.get_children('/test/server_list/')
        if len(server_node):
            now_timestamp = time.time()
            for s in sorted(server_node):
                if self.server(s, now_timestamp) is not None:
                    server_list.append(self._server_list[s])

        return server_list

    def to_zip(self, pub_id, zip_callback = None, **ext_data):
        pub_node_id = self.get_pub_node_id(pub_id)

        ext_data['pub_id']      = pub_id
        ext_data['pub_node_id'] = pub_node_id
        ext_data['update_time'] = Tools.g_time()

        try:
            if self._zookeeper.exists(self._root_node + '/to_zip_notice/' + pub_node_id) is None:
                self._zookeeper.create(self._root_node + '/to_zip_notice/' + pub_node_id, json.dumps(ext_data), makepath = True)
            else:
                self._zookeeper.set(self._root_node + '/to_zip_notice/' + pub_node_id, json.dumps(ext_data))

            if self._zookeeper.exists(self._root_node + '/to_zip_result/' + pub_node_id) is None:
                self._zookeeper.create(self._root_node + '/to_zip_result/' + pub_node_id, '', makepath = True)
            else:
                self._zookeeper.set(self._root_node + '/to_zip_result/' + pub_node_id, '')

        except kazoo.exceptions.NodeExistsError:
            pass

        if self._to_zip_node.get(pub_node_id, None) is None:
            self._to_zip_node[pub_node_id] = [zip_callback]
            self.zip_notice(pub_id, pub_node_id)
        else:
            self._to_zip_node[pub_node_id].append(zip_callback)

        return self

    def zip_notice(self, pub_id, pub_node_id):

        @self._zookeeper.DataWatch('%s/to_zip_result/%s' % (self._root_node, pub_node_id, ))
        def to_zip_notice(data, stat, event):
            if 0 == len(data) or \
                    event is None \
                    or event.type == 'CREATED' \
                    or event.type == 'DELETED':
                return

            LOG.info('%s/to_zip_result/%s changed %s' % (self._root_node, pub_node_id, data, ))

            for zip_callback in self._to_zip_node[pub_node_id]:
                zip_callback(data)
                self._to_zip_node[pub_node_id] = []

        return self

    def to_syc(self, pub_id, target_servers, syc_process_callback=None, syc_success_callback = None, **ext_data):
        pub_node_id = self.get_pub_node_id(pub_id)

        ext_data['pub_id']      = pub_id
        ext_data['pub_node_id'] = pub_node_id
        ext_data['update_time'] = Tools.g_time()
        ext_data['servers']     = target_servers

        try:
            if self._zookeeper.exists(self._root_node + '/to_syc_notice/' + pub_node_id) is None:
                self._zookeeper.create(self._root_node + '/to_syc_notice/' + pub_node_id, json.dumps(ext_data), makepath = True)
            else:
                self._zookeeper.set(self._root_node + '/to_syc_notice/' + pub_node_id, json.dumps(ext_data))

            if self._zookeeper.exists(self._root_node + '/to_syc_result/' + pub_node_id) is None:
                self._zookeeper.create(self._root_node + '/to_syc_result/' + pub_node_id, '', makepath = True)
            else:
                self._zookeeper.set(self._root_node + '/to_syc_result/' + pub_node_id, '')

            for target_server_id in target_servers:
                target_node = self._root_node + '/to_syc_result/' + pub_node_id + '/s' + str(target_server_id)
                if self._zookeeper.exists(target_node) is not None:
                    self._zookeeper.delete(target_node)

        except kazoo.exceptions.NodeExistsError:
            pass

        if self._to_syc_node.get(pub_node_id, None) is None:
            self._to_syc_node[pub_node_id] = {
                'callback'    : [syc_process_callback, syc_success_callback],
                'servers'     : target_servers,
                'notices'     : [],
                'results'     : {},
                'update_time' : Tools.g_time()
            }
            self.syc_children_notice(pub_id, pub_node_id)
        else :
            self._to_syc_node[pub_node_id]['callback'] = [syc_process_callback, syc_success_callback]
            self._to_syc_node[pub_node_id]['servers']  = target_servers
            self._to_syc_node[pub_node_id]['results']  = {}
            self._to_syc_node[pub_node_id]['time']     = Tools.g_time()

        return self

    def syc_children_notice(self, pub_id, pub_node_id):

        @self._zookeeper.ChildrenWatch('%s/to_syc_result/%s' % (self._root_node, pub_node_id, ))
        def to_syc_process(server_list):
            for server_node in server_list:
                if server_node not in self._to_syc_node[pub_node_id]['notices']:
                    self._to_syc_node[pub_node_id]['notices'].append(server_node)
                    self.syc_process_notice(pub_id, pub_node_id, server_node)

        return self

    def syc_process_notice(self, pub_id, pub_node_id, server_node):
        syc_server_node = '%s/to_syc_result/%s/%s' % (self._root_node, pub_node_id, server_node, )

        @self._zookeeper.DataWatch(syc_server_node)
        def to_syc_process(data, stat, event):
            if event is not None and event.type == 'DELETED':
                return

            if 0 == len(data):
                return

            LOG.info('syc children %s %s' % (syc_server_node, data, ))

            syc_detail = json.loads(data)
            if isinstance(syc_detail, dict) == False or \
                            syc_detail.get('update_time', None) is None or \
                            syc_detail.get('status', None) is None:
                return

            if syc_detail['status'] == 'ok':
                self._to_syc_node[pub_node_id]['results'][server_node] = True
            else:
                self._to_syc_node[pub_node_id]['results'][server_node] = False

            self._to_syc_node[pub_node_id]['callback'][0](server_node, data)

            all_syc_finished = True if len(self._to_syc_node[pub_node_id]['servers']) > 0 else False
            for server_id in self._to_syc_node[pub_node_id]['servers']:
                target_server_node = 's%s' % server_id
                if self._to_syc_node[pub_node_id]['results'].get(target_server_node, False) is False:
                    all_syc_finished = False
                    break

            if all_syc_finished:
                self._to_syc_node[pub_node_id]['callback'][1]()

                self._to_syc_node[pub_node_id]['callback'] = []
                self._to_syc_node[pub_node_id]['results']  = {}

                self._zookeeper.set('%s/to_syc_notice/%s' % (self._root_node, pub_node_id, ), json.dumps({
                    'pub_id'      : pub_id,
                    'pub_node_id' : pub_node_id,
                    'update_time' : self._to_syc_node[pub_node_id]['update_time'],
                    'servers'     : self._to_syc_node[pub_node_id]['servers'],
                    'finish_time' : Tools.g_time(),
                    'status'      : 'ok'
                }))

                self._zookeeper.set('%s/to_syc_result/%s' % (self._root_node, pub_node_id, ), json.dumps({
                    'update_time' : Tools.g_time(),
                    'status'      : 'ok'
                }))

        return self

    def to_pub(self, pub_id, target_servers, pub_process_callback=None, pub_success_callback = None, **ext_data):
        pub_node_id = self.get_pub_node_id(pub_id)

        ext_data['pub_id']      = pub_id
        ext_data['pub_node_id'] = pub_node_id
        ext_data['update_time'] = Tools.g_time()
        ext_data['servers']     = target_servers

        try:
            if self._zookeeper.exists(self._root_node + '/to_pub_notice/' + pub_node_id) is None:
                self._zookeeper.create(self._root_node + '/to_pub_notice/' + pub_node_id, json.dumps(ext_data), makepath = True)
            else:
                self._zookeeper.set(self._root_node + '/to_pub_notice/' + pub_node_id, json.dumps(ext_data))

            if self._zookeeper.exists(self._root_node + '/to_pub_result/' + pub_node_id) is None:
                self._zookeeper.create(self._root_node + '/to_pub_result/' + pub_node_id, '', makepath = True)
            else:
                self._zookeeper.set(self._root_node + '/to_pub_result/' + pub_node_id, '')

            for target_server_id in target_servers:
                target_node = self._root_node + '/to_pub_result/' + pub_node_id + '/s' + str(target_server_id)
                if self._zookeeper.exists(target_node) is not None:
                    self._zookeeper.delete(target_node)

        except kazoo.exceptions.NodeExistsError:
            pass

        if self._to_pub_node.get(pub_node_id, None) is None:
            self._to_pub_node[pub_node_id] = {
                'callback'    : [pub_process_callback, pub_success_callback],
                'servers'     : target_servers,
                'notices'     : [],
                'results'     : {},
                'update_time' : Tools.g_time()
            }
            self.pub_children_notice(pub_id, pub_node_id)
        else :
            self._to_pub_node[pub_node_id]['callback'] = [pub_process_callback, pub_success_callback]
            self._to_pub_node[pub_node_id]['servers']  = target_servers
            self._to_pub_node[pub_node_id]['results']  = {}
            self._to_pub_node[pub_node_id]['time']     = Tools.g_time()

        return self

    def pub_children_notice(self, pub_id, pub_node_id):

        @self._zookeeper.ChildrenWatch('%s/to_pub_result/%s' % (self._root_node, pub_node_id, ))
        def to_pub_process(server_list):
            for server_node in server_list:
                if server_node not in self._to_pub_node[pub_node_id]['notices']:
                    self._to_pub_node[pub_node_id]['notices'].append(server_node)
                    self.pub_process_notice(pub_id, pub_node_id, server_node)

        return self

    def pub_process_notice(self, pub_id, pub_node_id, server_node):
        pub_server_node = '%s/to_pub_result/%s/%s' % (self._root_node, pub_node_id, server_node, )

        @self._zookeeper.DataWatch(pub_server_node)
        def to_pub_process(data, stat, event):
            if event is not None and event.type == 'DELETED':
                return

            if 0 == len(data):
                return

            LOG.info('pub children %s %s' % (pub_server_node, data, ))

            pub_detail = json.loads(data)
            if isinstance(pub_detail, dict) == False or \
                            pub_detail.get('update_time', None) is None or \
                            pub_detail.get('status', None) is None:
                return

            if pub_detail['status'] == 'ok':
                self._to_pub_node[pub_node_id]['results'][server_node] = True
            else:
                self._to_pub_node[pub_node_id]['results'][server_node] = False

            self._to_pub_node[pub_node_id]['callback'][0](server_node, data)

            all_pub_finished = True if len(self._to_pub_node[pub_node_id]['servers']) > 0 else False
            for server_id in self._to_pub_node[pub_node_id]['servers']:
                target_server_node = 's%s' % server_id
                if self._to_pub_node[pub_node_id]['results'].get(target_server_node, False) is False:
                    all_pub_finished = False
                    break

            if all_pub_finished:
                self._to_pub_node[pub_node_id]['callback'][1]()

                self._to_pub_node[pub_node_id]['callback'] = []
                self._to_pub_node[pub_node_id]['results']  = {}

                self._zookeeper.set('%s/to_pub_notice/%s' % (self._root_node, pub_node_id, ), json.dumps({
                    'pub_id'      : pub_id,
                    'pub_node_id' : pub_node_id,
                    'update_time' : self._to_pub_node[pub_node_id]['update_time'],
                    'servers'     : self._to_pub_node[pub_node_id]['servers'],
                    'finish_time' : Tools.g_time(),
                    'status'      : 'ok'
                }))

                self._zookeeper.set('%s/to_pub_result/%s' % (self._root_node, pub_node_id, ), json.dumps({
                    'update_time' : Tools.g_time(),
                    'status'      : 'ok'
                }))

        return self

    def deprecated(self, pub_id):
        pub_node_id = self.get_pub_node_id(pub_id)

        if self._zookeeper.exists(self._root_node + '/to_syc_notice/' + pub_node_id):
            self._zookeeper.set('%s/to_syc_notice/%s' % (self._root_node, pub_node_id, ), json.dumps({
                'pub_id'      : pub_id,
                'pub_node_id' : pub_node_id,
                'update_time' : Tools.g_time(),
                'servers'     : [],
                'finish_time' : Tools.g_time(),
                'status'      : 'deprecated'
            }))

        if self._zookeeper.exists(self._root_node + '/to_pub_notice/' + pub_node_id):
            self._zookeeper.set('%s/to_pub_notice/%s' % (self._root_node, pub_node_id, ), json.dumps({
                'pub_id'      : pub_id,
                'pub_node_id' : pub_node_id,
                'update_time' : Tools.g_time(),
                'servers'     : [],
                'finish_time' : Tools.g_time(),
                'status'      : 'deprecated'
            }))
Esempio n. 46
0
class ZooKeeper(object):
    # Constants used by the REST API:
    LIVE_NODES_ZKNODE = '/live_nodes'
    ALIASES = '/aliases.json'
    CLUSTER_STATE = '/clusterstate.json'
    SHARDS = 'shards'
    REPLICAS = 'replicas'
    STATE = 'state'
    ACTIVE = 'active'
    LEADER = 'leader'
    BASE_URL = 'base_url'
    TRUE = 'true'
    FALSE = 'false'
    COLLECTION = 'collection'
    COLLECTION_STATE = '/collections/{}/state.json'
    NODE_NAME = 'node_name'

    def __init__(self,
                 zkServerAddress,
                 zkClientTimeout=15,
                 zkClientConnectTimeout=15):
        if KazooClient is None:
            logging.error(
                'ZooKeeper requires the `kazoo` library to be installed')
            raise RuntimeError

        self.watchedCollections = []
        self.collections = {}
        self.liveNodes = {}
        self.aliases = {}
        self.state = None
        self.hasClusterState = False

        self.zk = KazooClient(zkServerAddress, read_only=True)

        self.zk.start()
        random.seed()

        def connectionListener(state):
            if state == KazooState.LOST:
                self.state = state
            elif state == KazooState.SUSPENDED:
                self.state = state

        self.zk.add_listener(connectionListener)

        @self.zk.DataWatch(ZooKeeper.CLUSTER_STATE)
        def watchClusterState(data, *args, **kwargs):
            if not data:
                LOG.warning(
                    "No cluster state available: no collections defined?")
            else:
                self.collections = json.loads(data.decode('utf-8'))
                self.hasClusterState = True
                LOG.info('Updated collections: %s', self.collections)

        @self.zk.ChildrenWatch(ZooKeeper.LIVE_NODES_ZKNODE)
        def watchLiveNodes(children):
            self.liveNodes = children
            LOG.info("Updated live nodes: %s", children)

        @self.zk.DataWatch(ZooKeeper.ALIASES)
        def watchAliases(data, stat):
            if data:
                json_data = json.loads(data.decode('utf-8'))
                if ZooKeeper.COLLECTION in json_data and json_data[
                        ZooKeeper.COLLECTION]:
                    self.aliases = json_data[ZooKeeper.COLLECTION]
                else:
                    LOG.warning('Expected to find %s in alias update %s',
                                ZooKeeper.COLLECTION, json_data.keys())
            else:
                self.aliases = {}
            LOG.info("Updated aliases: %s", self.aliases)

    def watchCollection(self, collection):
        path = ZooKeeper.COLLECTION_STATE.format(collection)

        def watch(event=None):
            data = self.zk.get(path, watch=watch)
            self.collections[collection] = json.loads(
                data[0].decode("utf8"))[collection]

        try:
            watch()
        except NoNodeError as e:
            if (self.hasClusterState and collection
                    not in self.collections) or not self.hasClusterState:
                raise SolrError("No collection %s" % collection)

    def __del__(self):
        # Avoid leaking connection handles in Kazoo's atexit handler:
        self.zk.stop()
        self.zk.close()

    def getHosts(self, collname, only_leader=False, seen_aliases=None):
        if self.aliases and collname in self.aliases:
            return self.getAliasHosts(collname, only_leader, seen_aliases)

        hosts = []
        if collname not in self.collections:
            raise SolrError("Unknown collection: %s", collname)
        collection = self.collections[collname]
        shards = collection[ZooKeeper.SHARDS]
        for shardname in shards.keys():
            shard = shards[shardname]
            if shard[ZooKeeper.STATE] == ZooKeeper.ACTIVE:
                replicas = shard[ZooKeeper.REPLICAS]
                for replicaname in replicas.keys():
                    replica = replicas[replicaname]

                    if replica[ZooKeeper.STATE] == ZooKeeper.ACTIVE:
                        if not only_leader or (replica.get(
                                ZooKeeper.LEADER, None) == ZooKeeper.TRUE):
                            base_url = replica[ZooKeeper.BASE_URL]
                            if base_url not in hosts:
                                if replica[
                                        ZooKeeper.NODE_NAME] in self.liveNodes:
                                    hosts.append(base_url)
        return hosts

    def getAliasHosts(self, collname, only_leader, seen_aliases):
        if seen_aliases:
            if collname in seen_aliases:
                LOG.warn("%s in circular alias definition - ignored", collname)
                return []
        else:
            seen_aliases = []
        seen_aliases.append(collname)
        collections = self.aliases[collname].split(",")
        hosts = []
        for collection in collections:
            for host in self.getHosts(collection, only_leader, seen_aliases):
                if host not in hosts:
                    hosts.append(host)
        return hosts

    def getRandomURL(self, collname):
        hosts = self.getHosts(collname, only_leader=False)
        if len(hosts) == 0:
            raise SolrError("No hosts available for %s" % collname)
        return random.choice(hosts) + "/" + collname

    def getLeaderURL(self, collname):
        hosts = self.getHosts(collname, only_leader=True)
        if len(hosts) == 0:
            raise SolrError("No leaders available for %s" % collname)
        return random.choice(hosts) + "/" + collname
Esempio n. 47
0
def get_fake_zk(nodename, timeout=30.0):
    _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) +
                                    ":9181",
                                    timeout=timeout)
    _fake_zk_instance.start()
    return _fake_zk_instance
Esempio n. 48
0
class mock_syc(object):

    _root_node = ''
    _server_list = {}
    _zookeeper = None
    _shell_path = ''

    def __init__(self,
                 host='127.0.0.1',
                 port=2181,
                 root_node='/test',
                 shell_path='./'):
        self._zookeeper = KazooClient('%s:%s' % (
            host,
            port,
        ))
        self._root_node = root_node
        self._shell_path = shell_path

    def run(self):
        self._zookeeper.start()
        self.init()

    def init(self):
        syc_node = '%s/to_syc_notice' % self._root_node
        default_node_value = json.dumps({'update_time': time.time()})

        try:
            if self._zookeeper.exists(syc_node) is None:
                self._zookeeper.create(syc_node,
                                       default_node_value,
                                       makepath=True)
        except kazoo.exceptions.NodeExistsError:
            pass

        @self._zookeeper.ChildrenWatch('%s/server_list' % (self._root_node, ))
        def server(server_list):
            for server_node in server_list:
                result = self.init_server(server_node)
                LOG.info('refresh server list %s' % json.dumps(result))

        @self._zookeeper.ChildrenWatch('%s/to_syc_notice' % (self._root_node, )
                                       )
        def to_syc_node(syc_node_list):
            for syc_node_id in syc_node_list:
                LOG.info('watch_syc children %s/to_syc_notice/%s' % (
                    self._root_node,
                    syc_node_id,
                ))
                self.to_syc(syc_node_id)

        return self

    def init_server(self, server_node):
        server_detail = self._zookeeper.get('%s/server_list/%s' % (
            self._root_node,
            server_node,
        ))
        if 0 == len(server_detail[0]):
            self._server_list[server_node] = {
                'server_id': 0,
                'server_name': '',
                'update_time': 0
            }
        else:
            self._server_list[server_node] = json.loads(server_detail[0])

        return self._server_list[server_node]

    def to_syc(self, syc_node_id):
        @self._zookeeper.DataWatch('%s/to_syc_notice/%s' % (
            self._root_node,
            syc_node_id,
        ))
        def to_zip_execute(data, stat, event):
            if event is not None and event.type == 'DELETED':
                return

            if 0 == len(data):
                return

            LOG.info('watch_syc execute %s/to_syc_notice/%s %s' % (
                self._root_node,
                syc_node_id,
                data,
            ))

            node_detail = json.loads(data)
            if node_detail.get('status', None) == 'ok' or \
                            node_detail.get('status', None) == 'failed' or \
                            node_detail.get('servers', None) is None:
                return

            all_syc_finished = True

            for server_index in self._server_list:
                if 0 == self._server_list[server_index]['server_id'] or \
                            str(self._server_list[server_index]['server_id']) not in node_detail['servers']:
                    continue

                node_value = {'update_time': time.time()}

                if self.syc_execute(
                        node_detail['config_version'],
                        node_detail['game_version'],
                        self._server_list[server_index]['server_id']) is True:
                    LOG.info('syc node %s/to_syc_result/%s/s%s syc success' %
                             (self._root_node, syc_node_id,
                              self._server_list[server_index]['server_id']))
                    node_value['status'] = 'ok'
                else:
                    LOG.info('syc node %s/to_syc_result/%s/s%s syc failed' %
                             (self._root_node, syc_node_id,
                              self._server_list[server_index]['server_id']))
                    node_value['status'] = 'failed'
                    all_syc_finished = False

                syc_server_node = '%s/to_syc_result/%s/s%s' % (
                    self._root_node,
                    syc_node_id,
                    self._server_list[server_index]['server_id'],
                )

                try:
                    if self._zookeeper.exists(syc_server_node) is None:
                        self._zookeeper.create(syc_server_node,
                                               json.dumps(node_value),
                                               makepath=True)
                    else:
                        self._zookeeper.set(syc_server_node,
                                            json.dumps(node_value))
                except kazoo.exceptions.NodeExistsError:
                    pass

            if all_syc_finished:
                node_detail['status'] = 'ok'
                node_detail['finish_time'] = time.time()
                self._zookeeper.set(
                    '%s/to_syc_notice/%s' % (
                        self._root_node,
                        syc_node_id,
                    ), json.dumps(node_detail))

    def syc_execute(self, config_version, game_version, server_id):
        '''
        to execute shell to zip resource
        '''

        LOG.info('start to execute shell %s/syc.sh %s %s %s' % (
            self._shell_path,
            config_version,
            game_version,
            server_id,
        ))
        result = subprocess.call('%s/syc.sh %s %s %s > /dev/null 2>&1' % (
            self._shell_path,
            config_version,
            game_version,
            server_id,
        ),
                                 shell=True)

        return True if result == 0 else False
class USSMetadataManager(object):
    """Interfaces with the locking system to get, put, and delete USS metadata.

  Metadata gets/stores/deletes the USS information for a partiular grid,
  including current version number, a list of USSs with active operations,
  and the endpoints to get that information. Locking is assured through a
  snapshot token received when getting, and used when putting.
  """
    def __init__(self, connectionstring=DEFAULT_CONNECTION, testgroupid=None):
        """Initializes the class.

    Args:
      connectionstring:
        Zookeeper connection string - server:port,server:port,...
      testgroupid:
        ID to use if in test mode, none for normal mode
    """
        if testgroupid:
            self.set_testmode(testgroupid)
        if not connectionstring:
            connectionstring = DEFAULT_CONNECTION
        log.debug(
            'Creating metadata manager object and connecting to zookeeper...')
        try:
            if set(BAD_CHARACTER_CHECK) & set(connectionstring):
                raise ValueError
            self.zk = KazooClient(hosts=connectionstring,
                                  timeout=CONNECTION_TIMEOUT)
            self.zk.add_listener(self.zookeeper_connection_listener)
            self.zk.start()
            if testgroupid:
                self.delete_testdata(testgroupid)
        except KazooTimeoutError:
            log.error(
                'Unable to connect to zookeeper using %s connection string...',
                connectionstring)
            raise
        except ValueError:
            log.error('Connection string %s seems invalid...',
                      connectionstring)
            raise

    def __del__(self):
        log.debug(
            'Destroying metadata manager object and disconnecting from zk...')
        self.zk.stop()

    def set_verbose(self):
        log.setLevel(logging.DEBUG)

    def set_testmode(self, testgroupid='UNDEFINED_TESTER'):
        """Sets the mode to testing with the specific test ID, cannot be undone.

    Args:
      testgroupid: ID to use if in test mode, none for normal mode
    """
        global GRID_PATH
        global CONNECTION_TIMEOUT
        # Adjust parameters specifically for the test
        GRID_PATH = TEST_BASE_PREFIX + testgroupid + USS_BASE_PREFIX
        log.debug('Setting test path to %s...', GRID_PATH)
        CONNECTION_TIMEOUT = 1.0

    def zookeeper_connection_listener(self, state):
        if state == KazooState.LOST:
            # Register somewhere that the session was lost
            log.error('Lost connection with the zookeeper servers...')
        elif state == KazooState.SUSPENDED:
            # Handle being disconnected from Zookeeper
            log.error('Suspended connection with the zookeeper servers...')
        elif state == KazooState.CONNECTED:
            # Handle being connected/reconnected to Zookeeper
            log.info('Connection restored with the zookeeper servers...')

    def delete_testdata(self, testgroupid=None):
        """Removes the test data from the servers.

    Be careful when using this in parallel as it removes everything under
    the testgroupid, or everything if no tetgroupid is provided.

    Args:
      testgroupid: ID to use if in test mode, none will remove all test data
    """
        if testgroupid:
            path = TEST_BASE_PREFIX + testgroupid
        else:
            path = TEST_BASE_PREFIX
        self.zk.delete(path, recursive=True)

    def get(self, z, x, y):
        """Gets the metadata and snapshot token for a GridCell.

    Reads data from zookeeper, including a snapshot token. The
    snapshot token is used as a reference when writing to ensure
    the data has not been updated between read and write.

    Args:
      z: zoom level in slippy tile format
      x: x tile number in slippy tile format
      y: y tile number in slippy tile format
    Returns:
      JSend formatted response (https://labs.omniti.com/labs/jsend)
    """
        # TODO(hikevin): Change to use our own error codes and let the server
        #                   convert them to http error codes. For now, this is
        #                   at least in a standard JSend format.
        status = 500
        if self._validate_slippy(z, x, y):
            (content, metadata) = self._get_raw(z, x, y)
            if metadata:
                try:
                    m = uss_metadata.USSMetadata(content)
                    status = 200
                    result = {
                        'status': 'success',
                        'sync_token': metadata.last_modified_transaction_id,
                        'data': m.to_json()
                    }
                except ValueError:
                    status = 424
            else:
                status = 404
        else:
            status = 400
        if status != 200:
            result = self._format_status_code_to_jsend(status)
        return result

    def set(self, z, x, y, sync_token, uss_id, ws_scope, operation_format,
            operation_ws, earliest_operation, latest_operation):
        """Sets the metadata for a GridCell.

    Writes data, using the snapshot token for confirming data
    has not been updated since it was last read.

    Args:
      z: zoom level in slippy tile format
      x: x tile number in slippy tile format
      y: y tile number in slippy tile format
      sync_token: token retrieved in the original GET GridCellMetadata,
      uss_id: plain text identifier for the USS,
      ws_scope: scope to use to obtain OAuth token,
      operation_format: output format for operation ws (i.e. NASA, GUTMA),
      operation_ws: submitting USS endpoint where all flights in
        this cell can be retrieved from,
      earliest_operation: lower bound of active or planned flight timestamp,
        used for quick filtering conflicts.
      latest_operation: upper bound of active or planned flight timestamp,
        used for quick filtering conflicts.
    Returns:
      JSend formatted response (https://labs.omniti.com/labs/jsend)
    """
        status = 500
        if self._validate_slippy(z, x, y):
            # first we have to get the cell
            status = 0
            (content, metadata) = self._get_raw(z, x, y)
            if metadata:
                # Quick check of the token, another is done on the actual set to be sure
                #    but this check fails early and fast
                if str(metadata.last_modified_transaction_id) == str(
                        sync_token):
                    try:
                        m = uss_metadata.USSMetadata(content)
                        log.debug('Setting metadata for %s...', uss_id)
                        if not m.upsert_operator(
                                uss_id, ws_scope, operation_format,
                                operation_ws, earliest_operation,
                                latest_operation):
                            log.error(
                                'Failed setting operator for %s with token %s...',
                                uss_id, str(sync_token))
                            raise ValueError
                        status = self._set_raw(z, x, y, m, uss_id, sync_token)
                    except ValueError:
                        status = 424
                else:
                    status = 409
            else:
                status = 404
        else:
            status = 400
        if status == 200:
            # Success, now get the metadata back to send back
            result = self.get(z, x, y)
        else:
            result = self._format_status_code_to_jsend(status)
        return result

    def delete(self, z, x, y, uss_id):
        """Sets the metadata for a GridCell by removing the entry for the USS.

    Args:
      z: zoom level in slippy tile format
      x: x tile number in slippy tile format
      y: y tile number in slippy tile format
      uss_id: is the plain text identifier for the USS
    Returns:
      JSend formatted response (https://labs.omniti.com/labs/jsend)
    """
        status = 500
        if self._validate_slippy(z, x, y):
            # first we have to get the cell
            (content, metadata) = self._get_raw(z, x, y)
            if metadata:
                try:
                    m = uss_metadata.USSMetadata(content)
                    m.remove_operator(uss_id)
                    # TODO(pelletierb): Automatically retry on delete
                    status = self._set_raw(
                        z, x, y, m, uss_id,
                        metadata.last_modified_transaction_id)
                except ValueError:
                    status = 424
            else:
                status = 404
        else:
            status = 400
        if status == 200:
            # Success, now get the metadata back to send back
            (content, metadata) = self._get_raw(z, x, y)
            result = {
                'status': 'success',
                'sync_token': metadata.last_modified_transaction_id,
                'data': m.to_json()
            }
        else:
            result = self._format_status_code_to_jsend(status)
        return result

    ######################################################################
    ################       INTERNAL FUNCTIONS    #########################
    ######################################################################
    def _get_raw(self, z, x, y):
        """Gets the raw content and metadata for a GridCell from zookeeper.

    Args:
      z: zoom level in slippy tile format
      x: x tile number in slippy tile format
      y: y tile number in slippy tile format
    Returns:
      content: USS metadata
      metadata: straight from zookeeper
    """
        path = GRID_PATH + '/'.join(
            (str(z), str(x), str(y))) + USS_METADATA_FILE
        log.debug('Getting metadata from zookeeper@%s...', path)
        self.zk.ensure_path(path)
        c, m = self.zk.get(path)
        if c:
            log.debug('Received raw content and metadata from zookeeper: %s',
                      c)
        if m:
            log.debug('Received raw metadata from zookeeper: %s', m)
        return c, m

    def _set_raw(self, z, x, y, m, uss_id, sync_token):
        """Grabs the lock and updates the raw content for a GridCell in zookeeper.

    Args:
      z: zoom level in slippy tile format
      x: x tile number in slippy tile format
      y: y tile number in slippy tile format
      m: metadata object to write
      uss_id: the plain text identifier for the USS
      sync_token: the sync token received during get operation
    Returns:
      200 for success, 409 for conflict, 408 for unable to get the lock
    """
        status = 500
        path = GRID_PATH + '/'.join(
            (str(z), str(x), str(y))) + USS_METADATA_FILE
        # TODO(hikevin): Remove Lock and use built in set with version
        lock = self.zk.WriteLock(path, uss_id)
        try:
            log.debug('Getting metadata lock from zookeeper@%s...', path)
            lock.acquire(timeout=LOCK_TIMEOUT)
            (content, metadata) = self._get_raw(z, x, y)
            del content
            if str(metadata.last_modified_transaction_id) == str(sync_token):
                log.debug('Setting metadata to %s...', str(m))
                self.zk.set(path, json.dumps(m.to_json()))
                status = 200
            else:
                log.error(
                    'Sync token from USS (%s) does not match token from zk (%s)...',
                    str(sync_token),
                    str(metadata.last_modified_transaction_id))
                status = 409
            log.debug('Releasing the lock...')
            lock.release()
        except LockTimeout:
            log.error('Unable to acquire the lock for %s...', path)
            status = 408
        return status

    def _format_status_code_to_jsend(self, status):
        """Formats a response based on HTTP status code.

    Args:
      status: HTTP status code
    Returns:
      JSend formatted response (https://labs.omniti.com/labs/jsend)
    """

        if status == 200 or status == 204:
            result = {
                'status': 'success',
                'code': 204,
                'message': 'Empty data set.'
            }
        elif status == 400:
            result = {
                'status': 'fail',
                'code': status,
                'message': 'Parameters are not following the correct format.'
            }
        elif status == 404:
            result = {
                'status': 'fail',
                'code': status,
                'message': 'Unable to pull metadata from lock system.'
            }
        elif status == 408:
            result = {
                'status': 'fail',
                'code': status,
                'message': 'Timeout trying to get lock.'
            }
        elif status == 409:
            result = {
                'status':
                'fail',
                'code':
                status,
                'message':
                'Content in metadata has been updated since provided sync token.'
            }
        elif status == 424:
            result = {
                'status':
                'fail',
                'code':
                status,
                'message':
                'Content in metadata is not following JSON format guidelines.'
            }
        else:
            result = {
                'status': 'fail',
                'code': status,
                'message': 'Unknown error code occurred.'
            }
        return result

    def _validate_slippy(self, z, x, y):
        """Validates slippy tile ranges.

    https://en.wikipedia.org/wiki/Tiled_web_map
    https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames

    Args:
      z: zoom level in slippy tile format
      x: x tile number in slippy tile format
      y: y tile number in slippy tile format
    Returns:
      true if valid, false if not
    """
        try:
            z = int(z)
            x = int(x)
            y = int(y)
            if not 0 <= z <= 20:
                raise ValueError
            if not 0 <= x < 2**z:
                raise ValueError
            if not 0 <= y < 2**z:
                raise ValueError
            return True
        except (ValueError, TypeError):
            log.error('Invalid slippy format for tiles %sz, %s,%s!', z, x, y)
            return False
Vim�UnDo�X��5��+���ʭ���!Y�i���K���aƓp,!!!!a�#0_�2����))=v=a���12        lines_with_dns.remove5��1�5�_�3����))=v=a���24pH    return re.findall(r"[\w.-]+\.prod\.booking\.com", lines_with_dns[0])5��2�5�_�3L����))=v=a���35q    �35p5��3�3

�35�_�4����))=v=a���35q    if HOSTNAME5��3�3�3	"	�3		"		5�_�4����))=v=a���36q    if HOSTNAME in alt_names5��3	"	�3
"
�3,�4-�455�_�5!����))=v=a���57s        �57r5��5P	�5T�5	[	5�_� 6����))=v=a���57s    return alt_names5��5	[	�5	[	5�_�! &,����))=v=a��%'sM    # The function returns the list of alterative names in puppet certificate5��%,�5�_� !&����36Va�#/�s#!/bin/blue-python3.8,# This script runs as a cron job many times.W# It triggers the puppet SSL certificate regeneration, waits for certificate revocation:# on puppet server side and removes the local certificate.#E# The second script (puppetserver-ensemble.py) runs on puppet serversG# and removes the node certificate from Puppet by request in Zookeeper.	import os
import sys	import re
import socketimport argparseimport logging/from subprocess import check_output, check_call$from kazoo.client import KazooClient'ZK_PATH_BASE = "/puppetserver/ensemble";ZK_PATH_REMOVE_REQUESTS = f"{ZK_PATH_BASE}/remove_requests"OZK_PATH_REMOVE_REQUESTS_PROCESSED = f"{ZK_PATH_BASE}/remove_requests_processed"HOSTNAME = socket.gethostname()>CERT_PATH = f"/etc/puppetlabs/puppet/ssl/certs/{HOSTNAME}.pem"logger = logging.getLogger()def read_bookings_env_var(var):5    with open("/etc/sysconfig/bookings.puppet") as f:        lines = f.readlines()    for line in lines:&        if line.startswith(f"{var}="):(            return line[len(f"{var}="):]    return None0def get_certificate_alt_names(certificate_path):N    # The function returns the list of alternative names in puppet certificatey    # /bin/openssl x509 -in /etc/puppetlabs/puppet/ssl/certs/$(uname -n).pem -text | grep -A 1 'Subject Alternative Name'G    command = f"/bin/openssl x509 -in {certificate_path} -text".split()G    certificate_output = check_output(command, universal_newlines=True)    if not certificate_output:S        logger.info(f"Cannot get the certificate details from {certificate_path}.")        return []X    lines_with_dns = [line for line in certificate_output.split('\n') if 'DNS:' in line]    if not lines_with_dns:        return []M    alt_names = re.findall(r"[\w.-]+\.prod\.booking\.com", lines_with_dns[0])    if HOSTNAME in alt_names:"        alt_names.remove(HOSTNAME)    return alt_namesdef remove_file(file):    if os.path.exists(file):        os.remove(file)-        logging.debug(f"Removed file {file}")def main():Q    parser = argparse.ArgumentParser(description="Regenerate puppet certificate")    parser.add_argument(        "--debug",        action="store_const",        const=True,        default=False,$        help="More verbose logging",    )    args = parser.parse_args()9    level = logging.DEBUG if args.debug else logging.INFO    logging.basicConfig(        level=level,F        format="[{levelname:7s}| {asctime} | {module:10s}] {message}",        style="{",        stream=sys.stdout,    )%    if not os.path.exists(CERT_PATH):L        logging.info(f"Certificate does not exist at {CERT_PATH}. Exiting.")        returnF    zk = KazooClient(hosts=read_bookings_env_var("ZOOKEEPER_CLUSTER"))    zk.start()4    alt_names = get_certificate_alt_names(CERT_PATH)    if alt_names:e        logging.info(f"Certificate {CERT_PATH} has alt names {alt_names}. Restarting nginx service.")$        # Cleanup request processingH        if zk.exists(f"{ZK_PATH_REMOVE_REQUESTS_PROCESSED}/{HOSTNAME}"):H            zk.delete(f"{ZK_PATH_REMOVE_REQUESTS_PROCESSED}/{HOSTNAME}"):        check_call("/bin/systemctl restart nginx".split())        returnD    if zk.exists(f"{ZK_PATH_REMOVE_REQUESTS_PROCESSED}/{HOSTNAME}"):�        logging.info(f"Request to remove certificate is processed: {ZK_PATH_REMOVE_REQUESTS_PROCESSED}/{HOSTNAME} exists. Clean up the local certificate.")&        # cleanup existing certificateG        remove_file(f"/etc/puppetlabs/puppet/ssl/certs/{HOSTNAME}.pem")N        remove_file(f"/etc/puppetlabs/puppet/ssl/private_keys/{HOSTNAME}.pem")M        remove_file(f"/etc/puppetlabs/puppet/ssl/public_keys/{HOSTNAME}.pem")0        logging.info("Restarting puppet daemon");        check_call("/bin/systemctl restart puppet".split())	    else:n        # if the requset to clean certificate on puppet server side is not processed then make cleanup request8        logging.info("Request the certificate cleanup.")?        zk.ensure_path(f"{ZK_PATH_REMOVE_REQUESTS}/{HOSTNAME}")if __name__ == '__main__':
    main()5�5�_�1����))=v=a�
��12p    �13q    if 'DNS:{HOSTNAME}5��1�	�1��1��1��2�5�_�3����))=v=a�
��24r    if f'DNS:{HOSTNAME}5��2�5�_�3����))=v=a�
��24r+    if f'DNS:{HOSTNAME}' in lines_with_dns:5��2
�
�2��2��2��2+��3
�
�3��3��3��3�3�3�3�3�3�35�_�4����42Va�
��35s        lines_with_dns.remove5��35�_�2����22Va�
��155��1�K5�_�/W����22Va�
��.0py    lines_with_dns = [line for line in certificate_output.split('\n') if 'DNS:' in line and 'DNS:{HOSTNAME}' not in line]5��.W!�!5�_�/\����88Va�
��/0p    �01q�/1q        lines_with_dns = [        line2        for line in certificate_output.split("\n");        if "DNS:" in line and f"DNS:{HOSTNAME}" not in line    ]5��/��/��5�_�
0����88Va�
��/1v    lines_with_dns = [5��/�5�_�
/����77Va�
��.05��.:z5�_�
4����/3Va�
��355��3�5�_�/����/3Va�W�.0t    lines_with_alt_dns = [5��.I5�_�/����/3Va�[�.0t    lines_with_alt_names = [5��.M5�_�4����/3Va�a�35t     if not lines_with_alt_names:5��3��3
�
�3�5�_�4����/3Va�c�35t     if not lines_with_alt_names:5��3
�
�3
�
5�_�7A����/3Va�h�68tE    return re.findall(r"[\w.-]+\.prod\.booking\.com", lines_with_[0])5��6AN5�_�7A����/3Va�i�68tN    return re.findall(r"[\w.-]+\.prod\.booking\.com", lines_with_alt_names[0])5��6AN�66
Esempio n. 51
0
def get_query_server_config(name='beeswax', connector=None):
  if connector and has_connectors(): # TODO: Give empty connector when no connector in use
    query_server = get_query_server_config_via_connector(connector)
  else:
    LOG.debug("Query cluster %s" % name)
    if name == "llap":
      activeEndpoint = cache.get('llap')
      if activeEndpoint is None:
        if HIVE_DISCOVERY_LLAP.get():
          LOG.debug("Checking zookeeper for Hive Server Interactive endpoint")
          zk = KazooClient(hosts=libzookeeper_conf.ENSEMBLE.get(), read_only=True)
          zk.start()
          if HIVE_DISCOVERY_LLAP_HA.get():
            znode = "{0}/instances".format(HIVE_DISCOVERY_LLAP_ZNODE.get())
            LOG.debug("Setting up LLAP with the following node {0}".format(znode))
            if zk.exists(znode):
              hiveservers = zk.get_children(znode)
              for server in hiveservers:
                llap_servers= json.loads(zk.get("{0}/{1}".format(znode, server))[0])["internal"][0]
                if llap_servers["api"] == "activeEndpoint":
                  cache.set("llap", json.dumps({"host": llap_servers["addresses"][0]["host"], "port": llap_servers["addresses"][0]["port"]}), CACHE_TIMEOUT.get())
            else:
              LOG.error("LLAP Endpoint not found, reverting to HiveServer2")
              cache.set("llap", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}), CACHE_TIMEOUT.get())
          else:
            znode = "{0}".format(HIVE_DISCOVERY_LLAP_ZNODE.get())
            LOG.debug("Setting up LLAP with the following node {0}".format(znode))
            if zk.exists(znode):
              hiveservers = zk.get_children(znode)
              for server in hiveservers:
                cache.set("llap", json.dumps({"host": server.split(';')[0].split('=')[1].split(":")[0], "port": server.split(';')[0].split('=')[1].split(":")[1]}))
          zk.stop()
        else:
          LOG.debug("Zookeeper Discovery not enabled, reverting to config values")
          cache.set("llap", json.dumps({"host": LLAP_SERVER_HOST.get(), "port": LLAP_SERVER_THRIFT_PORT.get()}), CACHE_TIMEOUT.get())
      activeEndpoint = json.loads(cache.get("llap"))
    elif name != 'hms' and name != 'impala':
      activeEndpoint = cache.get("hiveserver2")
      if activeEndpoint is None:
        if HIVE_DISCOVERY_HS2.get():
          zk = KazooClient(hosts=libzookeeper_conf.ENSEMBLE.get(), read_only=True)
          zk.start()
          znode = HIVE_DISCOVERY_HIVESERVER2_ZNODE.get()
          LOG.info("Setting up Hive with the following node {0}".format(znode))
          if zk.exists(znode):
            hiveservers = zk.get_children(znode)
            server_to_use = 0 # if CONF.HIVE_SPREAD.get() randint(0, len(hiveservers)-1) else 0
            cache.set("hiveserver2", json.dumps({"host": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[0], "port": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[1]}))
          else:
            cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}))
          zk.stop()
        else:
          cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}))
      activeEndpoint = json.loads(cache.get("hiveserver2"))

    if name == 'impala':
      from impala.dbms import get_query_server_config as impala_query_server_config
      query_server = impala_query_server_config()
    elif name == 'hms':
      kerberos_principal = hive_site.get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get())
      query_server = {
          'server_name': 'hms',
          'server_host': HIVE_METASTORE_HOST.get() if not cluster_config else cluster_config.get('server_host'),
          'server_port': HIVE_METASTORE_PORT.get(),
          'principal': kerberos_principal,
          'transport_mode': 'http' if hive_site.hiveserver2_transport_mode() == 'HTTP' else 'socket',
          'auth_username': AUTH_USERNAME.get(),
          'auth_password': AUTH_PASSWORD.get(),
          'use_sasl': HIVE_USE_SASL.get()
      }
    else:
      kerberos_principal = hive_site.get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get())
      query_server = {
          'server_name': 'beeswax',
          'server_host': activeEndpoint["host"],
          'server_port': LLAP_SERVER_PORT.get() if name == 'llap' else HIVE_SERVER_PORT.get(),
          'principal': kerberos_principal,
          'http_url': '%(protocol)s://%(host)s:%(port)s/%(end_point)s' % {
              'protocol': 'https' if hiveserver2_use_ssl() else 'http',
              'host': activeEndpoint["host"],
              'port': activeEndpoint["port"],
              'end_point': hive_site.hiveserver2_thrift_http_path()
            },
          'transport_mode': 'http' if hive_site.hiveserver2_transport_mode() == 'HTTP' else 'socket',
          'auth_username': AUTH_USERNAME.get(),
          'auth_password': AUTH_PASSWORD.get(),
          'use_sasl': HIVE_USE_SASL.get(),
          'close_sessions': CLOSE_SESSIONS.get(),
          'has_session_pool': has_session_pool(),
          'max_number_of_sessions': MAX_NUMBER_OF_SESSIONS.get()
        }

    if name == 'sparksql': # Extends Hive as very similar
      from spark.conf import SQL_SERVER_HOST as SPARK_SERVER_HOST, SQL_SERVER_PORT as SPARK_SERVER_PORT, USE_SASL as SPARK_USE_SASL

      query_server.update({
          'server_name': 'sparksql',
          'server_host': SPARK_SERVER_HOST.get(),
          'server_port': SPARK_SERVER_PORT.get(),
          'use_sasl': SPARK_USE_SASL.get()
      })

  if not query_server.get('dialect'):
    query_server['dialect'] = query_server['server_name']

  debug_query_server = query_server.copy()
  debug_query_server['auth_password_used'] = bool(debug_query_server.pop('auth_password', None))
  LOG.debug("Query Server: %s" % debug_query_server)

  return query_server
Esempio n. 52
0
class ZooAnimal:
    def __init__(self):
        self.zk = KazooClient(hosts=ZOOKEEPER_LOCATION)
        self.zk.start()
        # Use util function to get IP address
        self.ipaddress = [
            ip for ip in list(local_ip4_addr_list())
            if ip.startswith(NETWORK_PREFIX)
        ][0]
        # Inheriting children should assign values to fit the scheme
        # /role/topic
        self.role = None
        self.topic = None
        #Will only be set by pub and sub
        self.broker = None
        # Zookeeper
        #self.election = None
        self.election = self.zk.Election('/broker', self.ipaddress)
        self.zk_seq_id = None
        self.zk_is_a_master = False

    def zookeeper_watcher(self, watch_path):
        @self.zk.DataWatch(watch_path)
        def zookeeper_election(data, stat, event):
            print("Setting election watch.")
            print("Watching node -> ", data)
            if data is None:
                print("Data is none.")
                self.election.run(self.zookeeper_register)
                #self.election.cancel()

    def zookeeper_master(self):
        if not self.zk_is_a_master:
            print("ZOOANIMAL -> Becoming a master.")
            role_topic = "/broker/master"
            data = {'ip': self.ipaddress}
            data_string = json.dumps(data)
            encoded_ip = codecs.encode(data_string, "utf-8")
            self.zk.create(role_topic,
                           ephemeral=True,
                           makepath=True,
                           sequence=True,
                           value=encoded_ip)
            self.zk_is_a_master = True
        return self.zk_is_a_master

    def zookeeper_register(self):
        pass

    # This is a function stub for the get_broker watch callback
    # The child is expected to implement their own logic
    # Pub and Sub need to register_sub()
    def broker_update(self, data):
        print("Broker updated.")
        print("Data -> {}".format(data))
        pass

    def get_broker(self):
        for i in range(10):
            if self.zk.exists(PATH_TO_MASTER_BROKER):
                node_data = self.zk.get(PATH_TO_MASTER_BROKER,
                                        watch=self.broker_update)
                broker_data = node_data[0]
                master_broker = codecs.decode(broker_data, 'utf-8')
                if master_broker != '':
                    self.broker = master_broker
                    return self.broker
                else:
                    raise Exception("No master broker.")
            time.sleep(0.2)
Esempio n. 53
0
class KazooCommandProxy():
    def __init__(self, module):
        self.module = module
        self.zk = KazooClient(module.params['hosts'])

    def absent(self):
        return self._absent(self.module.params['name'])

    def exists(self, znode):
        return self.zk.exists(znode)

    def list(self):
        children = self.zk.get_children(self.module.params['name'])
        return True, {
            'count': len(children),
            'items': children,
            'msg': 'Retrieved znodes in path.',
            'znode': self.module.params['name']
        }

    def present(self):
        return self._present(self.module.params['name'],
                             self.module.params['value'])

    def get(self):
        return self._get(self.module.params['name'])

    def shutdown(self):
        self.zk.stop()
        self.zk.close()

    def start(self):
        self.zk.start()

    def wait(self):
        return self._wait(self.module.params['name'],
                          self.module.params['timeout'])

    def _absent(self, znode):
        if self.exists(znode):
            self.zk.delete(znode, recursive=self.module.params['recursive'])
            return True, {'changed': True, 'msg': 'The znode was deleted.'}
        else:
            return True, {'changed': False, 'msg': 'The znode does not exist.'}

    def _get(self, path):
        if self.exists(path):
            value, zstat = self.zk.get(path)
            stat_dict = {}
            for i in dir(zstat):
                if not i.startswith('_'):
                    attr = getattr(zstat, i)
                    if isinstance(attr, (int, str)):
                        stat_dict[i] = attr
            result = True, {
                'msg': 'The node was retrieved.',
                'znode': path,
                'value': value,
                'stat': stat_dict
            }
        else:
            result = False, {'msg': 'The requested node does not exist.'}

        return result

    def _present(self, path, value):
        if self.exists(path):
            (current_value, zstat) = self.zk.get(path)
            if value != current_value:
                self.zk.set(path, to_bytes(value))
                return True, {
                    'changed': True,
                    'msg': 'Updated the znode value.',
                    'znode': path,
                    'value': value
                }
            else:
                return True, {
                    'changed': False,
                    'msg': 'No changes were necessary.',
                    'znode': path,
                    'value': value
                }
        else:
            self.zk.create(path, to_bytes(value), makepath=True)
            return True, {
                'changed': True,
                'msg': 'Created a new znode.',
                'znode': path,
                'value': value
            }

    def _wait(self, path, timeout, interval=5):
        lim = time.time() + timeout

        while time.time() < lim:
            if self.exists(path):
                return True, {
                    'msg': 'The node appeared before the configured timeout.',
                    'znode': path,
                    'timeout': timeout
                }
            else:
                time.sleep(interval)

        return False, {
            'msg': 'The node did not appear before the operation timed out.',
            'timeout': timeout,
            'znode': path
        }
Esempio n. 54
0
def zookeeper_resolve_leader(addresses, path):
    """
    Resolve the leader using a znode path. ZooKeeper imposes a total
    order on the elements of the queue, guaranteeing that the
    oldest element of the queue is the first one. We can
    thus return the first address we get from ZooKeeper.
    """
    hosts = ",".join(addresses)

    try:
        zk = KazooClient(hosts=hosts)
        zk.start()
    except Exception as exception:
        raise CLIException(
            "Unable to initialize Zookeeper Client: {error}".format(
                error=exception))

    try:
        children = zk.get_children(path)
    except Exception as exception:
        raise CLIException(
            "Unable to get children of {zk_path}: {error}".format(
                zk_path=path, error=exception))

    masters = sorted(
        # 'json.info' is the prefix for master nodes.
        child for child in children if child.startswith("json.info"))

    address = ""
    for master in masters:
        try:
            node_path = "{path}/{node}".format(path=path, node=master)
            json_data, _ = zk.get(node_path)
        except Exception as exception:
            raise CLIException(
                "Unable to get the value of '{node}': {error}".format(
                    node=node_path, error=exception))

        try:
            data = json.loads(json_data)
        except Exception as exception:
            raise CLIException(
                "Could not load JSON from '{data}': {error}".format(
                    data=data, error=str(exception)))

        if ("address" in data and "ip" in data["address"]
                and "port" in data["address"]):
            address = "{ip}:{port}".format(ip=data["address"]["ip"],
                                           port=data["address"]["port"])
            break

    try:
        zk.stop()
    except Exception as exception:
        raise CLIException(
            "Unable to stop Zookeeper Client: {error}".format(error=exception))

    if not address:
        raise CLIException("Unable to resolve the leading"
                           " master using ZooKeeper")
    return address
Esempio n. 55
0
    def read_config_file(self, config_file=None):
        """
        Read configuration file and initialize object. If config file is None, it will use default value
        :param config_file: path to configuration file
        :return:
        """
        # Stop storage service
        self.stop_storage_service()

        config = configparser.ConfigParser()
        if config_file is not None:
            config.read(os.path.realpath(config_file))

        # Main configuration
        self.__id = config.get("OPV", "id", fallback="ID")
        self.__path = config.get("OPV",
                                 "path",
                                 fallback="directory_manager_storage")
        self.__path = os.path.realpath(os.path.expanduser(self.__path))
        self.__host = config.get("OPV",
                                 "host",
                                 fallback=socket.gethostbyname(
                                     socket.gethostname()))
        uid_generator_type = config.get("OPV", "uid_type",
                                        fallback="basic").upper()

        # FTP configuration
        ftp_host = config.get("FTP", "host", fallback="0.0.0.0")
        ftp_port = config.getint("FTP", "port", fallback=2121)
        ftp_logfile = config.get("FTP",
                                 "logfile",
                                 fallback="opv_directory_manager_ftp.log")

        # HTTP configuration
        http_host = config.get("HTTP", "host", fallback="0.0.0.0")
        http_port = config.getint("HTTP", "port", fallback=5050)
        http_logfile = config.get("HTTP",
                                  "logfile",
                                  fallback="opv_directory_manager_http.log")

        # Id
        if uid_generator_type in ["ZOOKEEPER", "ZK"]:
            zk_hosts = config.get("ZOOKEEPER",
                                  "hosts",
                                  fallback="127.0.0.1:2181")
            zk_path = config.get("ZOOKEEPER",
                                 "path",
                                 fallback="/DirectoryManager/increment")
            print(zk_hosts)
            zk = KazooClient(zk_hosts)
            zk.start()
            self.__uid_generator = ZkIDGenerator(zk,
                                                 path=zk_path,
                                                 prefix=self.__id)
        else:
            self.__uid_generator = BasicIDGenerator(prefix=self.__id)

        # Storage
        self.__storage = LocalStorage(self.__path)

        # FTP
        ftp_storage_service = FTP(self.__path,
                                  host=self.__host,
                                  listen_host=ftp_host,
                                  listen_port=ftp_port,
                                  logfile=ftp_logfile)

        # HTTP
        http_storage_service = HTTP(self.__path,
                                    host=self.__host,
                                    listen_host=http_host,
                                    listen_port=http_port,
                                    logfile=http_logfile)

        # Local
        local_storage_service = LocalStorageService(self.__path)

        # Storage service
        self.__storage_service_manager = StorageServiceManager(
            "ftp", ftp_storage_service)
        self.__storage_service_manager.addURI("file", local_storage_service)
        self.__storage_service_manager.addURI("http", http_storage_service)
Esempio n. 56
0
class StaggerLock(object):
    def __init__(self, temp_path, timeout,
                 parent='None', acquire_lock=None, app_state=None):
        """
        :type temp_path: str
        :type timeout: int
        :type parent: str
        :type acquire_lock: zoom.agent.entities.thread_safe_object.ThreadSafeObject or None
        :type app_state: zoom.agent.entities.thread_safe_object.ThreadSafeObject or None
        """
        self._path = temp_path
        self._timeout = timeout
        self._parent = parent
        self._thread = None
        self._prev_state = None
        self._zk = KazooClient(hosts=get_zk_conn_string(),
                               timeout=60.0)
        self._zk.add_listener(self._zk_listener)
        self._log = logging.getLogger('sent.{0}.sl'.format(parent))
        self._counter = 0
        self._acquire_lock = acquire_lock
        self._app_state = app_state

    def join(self):
        if self._thread is not None and self._zk.connected:
            self._thread.join()
            self._close()
        else:
            return

    def start(self):
        """
        This method is to implement a staggered startup.
        A new KazooClient is instantiated b/c of thread-safety issues with the
        election.
        """
        self._zk.start()
        self._acquire_lock.set_value(True)
        self._app_state.set_value(ApplicationState.STAGGERED)
        self._acquire()

    def _acquire(self):
        try:
            while self._acquire_lock.value:
                if self._zk.connected:
                    lock = self._zk.Lock(self._path, identifier=platform.node())
                    if lock.acquire(blocking=True, timeout=5):
                        self._thread = Thread(target=self._sleep_and_unlock,
                                              args=(lock,),
                                              name=str(self))
                        self._thread.daemon = True
                        self._thread.start()
                        break
                    else:
                        pass
                else:
                    self._log.info('No connection to ZK. Will not try to '
                                   'acquire stagger lock.')

        except LockTimeout:
            self._log.debug('Lock timed out. Trying to acquire lock again.')
            self._acquire()

        except Exception as e:
            self._log.error('Unhandled exception: {0}'.format(e))

    def _close(self):
        try:
            self._thread = None
            self._zk.stop()
            self._zk.close()

        # TypeError happens when stop() is called when already stopping
        except TypeError:
            pass
        except Exception as e:
            self._log.debug('Unhandled exception: {0}'.format(e))

    @catch_exception(ConnectionClosedError)
    def _sleep_and_unlock(self, lck):
        self._log.info('Got stagger lock. Sleeping for {0} seconds.'
                       .format(self._timeout))
        time.sleep(self._timeout)
        lck.release()
        self._log.info('Released stagger lock.')

    def _close_connection(self):
        self._close()
        self._acquire_lock.set_value(False)

    def _zk_listener(self, state):
        """
        The callback function that runs when the connection state to Zookeeper
        changes.
        Either passes or immediately spawns a new thread that resets any
        watches, etc., so that it can listen to future connection state changes.
        """
        try:
            self._log.info('Zookeeper Connection went from {0} to {1}'
                           .format(self._prev_state, state))
            if self._prev_state is None and state == KazooState.CONNECTED:
                pass
            elif self._prev_state == KazooState.LOST and state == KazooState.CONNECTED:
                pass
            elif self._prev_state == KazooState.CONNECTED and state == KazooState.SUSPENDED:
                self._zk.handler.spawn(self._close_connection)
            elif self._prev_state == KazooState.CONNECTED and state == KazooState.LOST:
                self._zk.handler.spawn(self._close_connection)
            elif self._prev_state == KazooState.SUSPENDED and state == KazooState.LOST:
                self._zk.handler.spawn(self._close_connection)
            elif self._prev_state == KazooState.SUSPENDED and state == KazooState.CONNECTED:
                pass
            elif state == KazooState.CONNECTED:
                self._zk.handler.spawn(self._close_connection)
            else:
                self._log.info('Zookeeper Connection in unknown state: {0}'
                               .format(state))
                return
            self._prev_state = state

        except Exception:
            self._log.exception('An uncaught exception has occurred')

    def __repr__(self):
        return 'StaggerLock(path={0}, timeout={1})'.format(self._path,
                                                           self._timeout)

    def __str__(self):
        return self.__repr__()
class AnalyticsDiscovery(gevent.Greenlet):

    def _sandesh_connection_info_update(self, status, message):

        new_conn_state = getattr(ConnectionStatus, status)
        ConnectionState.update(conn_type = ConnectionType.ZOOKEEPER,
                name = self._svc_name, status = new_conn_state,
                message = message,
                server_addrs = self._zk_server.split(','))

        if (self._conn_state and self._conn_state != ConnectionStatus.DOWN and
                new_conn_state == ConnectionStatus.DOWN):
            msg = 'Connection to Zookeeper down: %s' %(message)
            self._logger.error(msg)
        if (self._conn_state and self._conn_state != new_conn_state and
                new_conn_state == ConnectionStatus.UP):
            msg = 'Connection to Zookeeper ESTABLISHED'
            self._logger.error(msg)

        self._conn_state = new_conn_state
        #import pdb; pdb.set_trace()
    # end _sandesh_connection_info_update

    def _zk_listen(self, state):
        self._logger.error("Analytics Discovery listen %s" % str(state))
        if state == KazooState.CONNECTED:
            if self._conn_state != ConnectionStatus.UP:
                self._sandesh_connection_info_update(status='UP', message='')
                self._logger.error("Analytics Discovery to publish %s" % str(self._pubinfo))
                self._reconnect = True
            else:
                self._logger.error("Analytics Discovery already connected")
        else:
            self._logger.error("Analytics Discovery NOT connected")
            if self._conn_state == ConnectionStatus.UP:
                self._sandesh_connection_info_update(status='DOWN', message='')

    def _zk_datawatch(self, watcher, child, data, stat, event):
        self._logger.error(\
                "Analytics Discovery %s ChildData : child %s, data %s, event %s" % \
                (watcher, child, data, event))
        self._wchildren[watcher][child] = data
        if self._watchers[watcher]:
            self._watchers[watcher](self._wchildren[watcher])

    def _zk_watcher(self, watcher, children):
        self._logger.error("Analytics Discovery Children %s" % children)
        self._reconnect = True

    def __init__(self, logger, zkservers, svc_name, inst,
                watchers={}, zpostfix=""):
        gevent.Greenlet.__init__(self)
        self._svc_name = svc_name
        self._inst = inst
        self._zk_server = zkservers
        # initialize logging and other stuff
        if logger is None:
            logging.basicConfig()
            self._logger = logging
        else:
            self._logger = logger
        self._conn_state = None
        self._sandesh_connection_info_update(status='INIT', message='')
        self._zk = KazooClient(hosts=zkservers)
        self._pubinfo = None
        self._watchers = watchers
        self._wchildren = {}
        self._zpostfix = zpostfix
        self._basepath = "/analytics-discovery-" + self._zpostfix
        self._reconnect = None

    def publish(self, pubinfo):
        self._pubinfo = pubinfo
        #import pdb; pdb.set_trace()
        if self._conn_state == ConnectionStatus.UP:
            try:
                self._logger.error("ensure %s" % (self._basepath + "/" + self._svc_name))
                self._logger.error("zk state %s (%s)" % (self._zk.state, self._zk.client_state))
                self._zk.ensure_path(self._basepath + "/" + self._svc_name)
                self._logger.error("check for %s/%s/%s" % \
                                (self._basepath, self._svc_name, self._inst))
                if pubinfo is not None:
                    if self._zk.exists("%s/%s/%s" % \
                            (self._basepath, self._svc_name, self._inst)):
                        self._zk.set("%s/%s/%s" % \
                                (self._basepath, self._svc_name, self._inst),
                                self._pubinfo)
                    else:
                        self._zk.create("%s/%s/%s" % \
                                (self._basepath, self._svc_name, self._inst),
                                self._pubinfo, ephemeral=True)
                else:
                    self._logger.error("cannot publish empty info")
            except Exception as ex:
                template = "Exception {0} in AnalyticsDiscovery publish. Args:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.error("%s : traceback %s for %s info %s" % \
                        (messag, traceback.format_exc(), self._svc_name, str(self._pubinfo)))
                self._sandesh_connection_info_update(status='DOWN', message='')
                self._reconnect = True
        else:
            self._logger.error("Analytics Discovery cannot publish while down")

    def _run(self):
        while True:
            try:
                self._zk.start()
                break
            except gevent.event.Timeout as e:
                # Update connection info
                self._sandesh_connection_info_update(status='DOWN',
                                                         message=str(e))
                gevent.sleep(1)
                # Zookeeper is also throwing exception due to delay in master election
            except Exception as e:
                # Update connection info
                self._sandesh_connection_info_update(status='DOWN',
                                                     message=str(e))
                gevent.sleep(1)

        try:
            # Update connection info
            self._sandesh_connection_info_update(status='UP', message='')
            self._reconnect = False
            # Done connecting to ZooKeeper

            self._zk.add_listener(self._zk_listen)
            for wk in self._watchers.keys():
                self._zk.ensure_path(self._basepath + "/" + wk)
                self._wchildren[wk] = {}
                self._zk.ChildrenWatch(self._basepath + "/" + wk,
                        partial(self._zk_watcher, wk))

            # Trigger the initial publish
            self._reconnect = True

            while True:
                try:
                    gevent.sleep(10)

                    # If a reconnect happens during processing, don't lose it
                    while self._reconnect:
                        self._reconnect = False
                        if self._pubinfo:
                            self.publish(self._pubinfo)

                        for wk in self._watchers.keys():
                            self._zk.ensure_path(self._basepath + "/" + wk)
                            children = self._zk.get_children(self._basepath + "/" + wk)

                            old_children = set(self._wchildren[wk].keys())
                            new_children = set(children)

                            # Remove contents for the children who are gone
                            # (DO NOT remove the watch)
                            for elem in old_children - new_children:
                                 self._wchildren[wk][elem] = None

                            # Overwrite existing children, or create new ones
                            for elem in new_children:
                                # Create a watch for new children
                                if elem not in self._wchildren[wk]:
                                    self._zk.DataWatch(self._basepath + "/" + \
                                            wk + "/" + elem,
                                            partial(self._zk_datawatch, wk, elem))
                                self._wchildren[wk][elem], _ = \
                                        self._zk.get(self._basepath + "/" + wk + "/" + elem)
                                self._logger.error(\
                                    "Analytics Discovery %s ChildData : child %s, data %s, event %s" % \
                                    (wk, elem, self._wchildren[wk][elem], "GET"))
                            if self._watchers[wk]:
                                self._watchers[wk](self._wchildren[wk])

                except gevent.GreenletExit:
                    self._logger.error("Exiting AnalyticsDiscovery for %s" % \
                            self._svc_name)
                    self._zk.stop()
                    break

                except Exception as ex:
                    template = "Exception {0} in AnalyticsDiscovery reconnect. Args:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.error("%s : traceback %s for %s info %s" % \
                        (messag, traceback.format_exc(), self._svc_name, str(self._pubinfo)))
                    self._reconnect = True

        except Exception as ex:
            template = "Exception {0} in AnalyticsDiscovery run. Args:\n{1!r}"
            messag = template.format(type(ex).__name__, ex.args)
            self._logger.error("%s : traceback %s for %s info %s" % \
                    (messag, traceback.format_exc(), self._svc_name, str(self._pubinfo)))
            raise SystemExit
class ZooKeeper(AbstractDCS):
    def __init__(self, config):
        super(ZooKeeper, self).__init__(config)

        hosts = config.get('hosts', [])
        if isinstance(hosts, list):
            hosts = ','.join(hosts)

        self._client = KazooClient(
            hosts,
            handler=PatroniSequentialThreadingHandler(config['retry_timeout']),
            timeout=config['ttl'],
            connection_retry=KazooRetry(max_delay=1,
                                        max_tries=-1,
                                        sleep_func=time.sleep),
            command_retry=KazooRetry(deadline=config['retry_timeout'],
                                     max_delay=1,
                                     max_tries=-1,
                                     sleep_func=time.sleep))
        self._client.add_listener(self.session_listener)

        self._fetch_cluster = True

        self._orig_kazoo_connect = self._client._connection._connect
        self._client._connection._connect = self._kazoo_connect

        self._client.start()

    def _kazoo_connect(self, *args):
        """Kazoo is using Ping's to determine health of connection to zookeeper. If there is no
        response on Ping after Ping interval (1/2 from read_timeout) it will consider current
        connection dead and try to connect to another node. Without this "magic" it was taking
        up to 2/3 from session timeout (ttl) to figure out that connection was dead and we had
        only small time for reconnect and retry.

        This method is needed to return different value of read_timeout, which is not calculated
        from negotiated session timeout but from value of `loop_wait`. And it is 2 sec smaller
        than loop_wait, because we can spend up to 2 seconds when calling `touch_member()` and
        `write_leader_optime()` methods, which also may hang..."""

        ret = self._orig_kazoo_connect(*args)
        return max(self.loop_wait - 2, 2) * 1000, ret[1]

    def session_listener(self, state):
        if state in [KazooState.SUSPENDED, KazooState.LOST]:
            self.cluster_watcher(None)

    def cluster_watcher(self, event):
        self._fetch_cluster = True
        self.event.set()

    def reload_config(self, config):
        self.set_retry_timeout(config['retry_timeout'])

        loop_wait = config['loop_wait']

        loop_wait_changed = self._loop_wait != loop_wait
        self._loop_wait = loop_wait
        self._client.handler.set_connect_timeout(loop_wait)

        # We need to reestablish connection to zookeeper if we want to change
        # read_timeout (and Ping interval respectively), because read_timeout
        # is calculated in `_kazoo_connect` method. If we are changing ttl at
        # the same time, set_ttl method will reestablish connection and return
        # `!True`, otherwise we will close existing connection and let kazoo
        # open the new one.
        if not self.set_ttl(int(config['ttl'] * 1000)) and loop_wait_changed:
            self._client._connection._socket.close()

    def set_ttl(self, ttl):
        """It is not possible to change ttl (session_timeout) in zookeeper without
        destroying old session and creating the new one. This method returns `!True`
        if session_timeout has been changed (`restart()` has been called)."""
        if self._client._session_timeout != ttl:
            self._client._session_timeout = ttl
            self._client.restart()
            return True

    @property
    def ttl(self):
        return self._client._session_timeout

    def set_retry_timeout(self, retry_timeout):
        retry = self._client.retry if isinstance(
            self._client.retry, KazooRetry) else self._client._retry
        retry.deadline = retry_timeout

    def get_node(self, key, watch=None):
        try:
            ret = self._client.get(key, watch)
            return (ret[0].decode('utf-8'), ret[1])
        except NoNodeError:
            return None

    @staticmethod
    def member(name, value, znode):
        return Member.from_node(znode.version, name, znode.ephemeralOwner,
                                value)

    def get_children(self, key, watch=None):
        try:
            return self._client.get_children(key, watch)
        except NoNodeError:
            return []

    def load_members(self, sync_standby):
        members = []
        for member in self.get_children(self.members_path,
                                        self.cluster_watcher):
            watch = member == sync_standby and self.cluster_watcher or None
            data = self.get_node(self.members_path + member, watch)
            if data is not None:
                members.append(self.member(member, *data))
        return members

    def _inner_load_cluster(self):
        self._fetch_cluster = False
        self.event.clear()
        nodes = set(
            self.get_children(self.client_path(''), self.cluster_watcher))
        if not nodes:
            self._fetch_cluster = True

        # get initialize flag
        initialize = (self.get_node(self.initialize_path)
                      or [None])[0] if self._INITIALIZE in nodes else None

        # get global dynamic configuration
        config = self.get_node(
            self.config_path,
            watch=self.cluster_watcher) if self._CONFIG in nodes else None
        config = config and ClusterConfig.from_node(config[1].version,
                                                    config[0], config[1].mzxid)

        # get timeline history
        history = self.get_node(
            self.history_path,
            watch=self.cluster_watcher) if self._HISTORY in nodes else None
        history = history and TimelineHistory.from_node(
            history[1].mzxid, history[0])

        # get last leader operation
        last_leader_operation = self._OPTIME in nodes and self._fetch_cluster and self.get_node(
            self.leader_optime_path)
        last_leader_operation = last_leader_operation and int(
            last_leader_operation[0]) or 0

        # get synchronization state
        sync = self.get_node(
            self.sync_path,
            watch=self.cluster_watcher) if self._SYNC in nodes else None
        sync = SyncState.from_node(sync and sync[1].version, sync and sync[0])

        # get list of members
        sync_standby = sync.leader == self._name and sync.sync_standby or None
        members = self.load_members(
            sync_standby) if self._MEMBERS[:-1] in nodes else []

        # get leader
        leader = self.get_node(
            self.leader_path) if self._LEADER in nodes else None
        if leader:
            client_id = self._client.client_id
            if not self._ctl and leader[0] == self._name and client_id is not None \
                    and client_id[0] != leader[1].ephemeralOwner:
                logger.info(
                    'I am leader but not owner of the session. Removing leader node'
                )
                self._client.delete(self.leader_path)
                leader = None

            if leader:
                member = Member(-1, leader[0], None, {})
                member = ([m for m in members if m.name == leader[0]]
                          or [member])[0]
                leader = Leader(leader[1].version, leader[1].ephemeralOwner,
                                member)
                self._fetch_cluster = member.index == -1

        # failover key
        failover = self.get_node(
            self.failover_path,
            watch=self.cluster_watcher) if self._FAILOVER in nodes else None
        failover = failover and Failover.from_node(failover[1].version,
                                                   failover[0])

        return Cluster(initialize, config, leader, last_leader_operation,
                       members, failover, sync, history)

    def _load_cluster(self):
        cluster = self.cluster
        if self._fetch_cluster or cluster is None:
            try:
                cluster = self._client.retry(self._inner_load_cluster)
            except Exception:
                logger.exception('get_cluster')
                self.cluster_watcher(None)
                raise ZooKeeperError('ZooKeeper in not responding properly')
        return cluster

    def _create(self, path, value, retry=False, ephemeral=False):
        try:
            if retry:
                self._client.retry(self._client.create,
                                   path,
                                   value,
                                   makepath=True,
                                   ephemeral=ephemeral)
            else:
                self._client.create_async(path,
                                          value,
                                          makepath=True,
                                          ephemeral=ephemeral).get(timeout=1)
            return True
        except Exception:
            logger.exception('Failed to create %s', path)
        return False

    def attempt_to_acquire_leader(self, permanent=False):
        ret = self._create(self.leader_path,
                           self._name.encode('utf-8'),
                           retry=True,
                           ephemeral=not permanent)
        if not ret:
            logger.info('Could not take out TTL lock')
        return ret

    def _set_or_create(self,
                       key,
                       value,
                       index=None,
                       retry=False,
                       do_not_create_empty=False):
        value = value.encode('utf-8')
        try:
            if retry:
                self._client.retry(self._client.set,
                                   key,
                                   value,
                                   version=index or -1)
            else:
                self._client.set_async(key, value, version=index
                                       or -1).get(timeout=1)
            return True
        except NoNodeError:
            if do_not_create_empty and not value:
                return True
            elif index is None:
                return self._create(key, value, retry)
            else:
                return False
        except Exception:
            logger.exception('Failed to update %s', key)
        return False

    def set_failover_value(self, value, index=None):
        return self._set_or_create(self.failover_path, value, index)

    def set_config_value(self, value, index=None):
        return self._set_or_create(self.config_path, value, index, retry=True)

    def initialize(self, create_new=True, sysid=""):
        sysid = sysid.encode('utf-8')
        return self._create(self.initialize_path, sysid, retry=True) if create_new \
            else self._client.retry(self._client.set, self.initialize_path, sysid)

    def touch_member(self, data, permanent=False):
        cluster = self.cluster
        member = cluster and cluster.get_member(self._name,
                                                fallback_to_leader=False)
        encoded_data = json.dumps(data, separators=(',', ':')).encode('utf-8')
        if member and (self._client.client_id is not None
                       and member.session != self._client.client_id[0] or
                       not (deep_compare(member.data.get('tags', {}),
                                         data.get('tags', {})) and
                            member.data.get('version') == data.get('version')
                            and member.data.get('checkpoint_after_promote')
                            == data.get('checkpoint_after_promote'))):
            try:
                self._client.delete_async(self.member_path).get(timeout=1)
            except NoNodeError:
                pass
            except Exception:
                return False
            member = None

        if member:
            if deep_compare(data, member.data):
                return True
        else:
            try:
                self._client.create_async(
                    self.member_path,
                    encoded_data,
                    makepath=True,
                    ephemeral=not permanent).get(timeout=1)
                return True
            except Exception as e:
                if not isinstance(e, NodeExistsError):
                    logger.exception('touch_member')
                    return False
        try:
            self._client.set_async(self.member_path,
                                   encoded_data).get(timeout=1)
            return True
        except Exception:
            logger.exception('touch_member')

        return False

    def take_leader(self):
        return self.attempt_to_acquire_leader()

    def _write_leader_optime(self, last_operation):
        return self._set_or_create(self.leader_optime_path, last_operation)

    def _update_leader(self):
        return True

    def delete_leader(self):
        self._client.restart()
        return True

    def _cancel_initialization(self):
        node = self.get_node(self.initialize_path)
        if node:
            self._client.delete(self.initialize_path, version=node[1].version)

    def cancel_initialization(self):
        try:
            self._client.retry(self._cancel_initialization)
        except Exception:
            logger.exception("Unable to delete initialize key")

    def delete_cluster(self):
        try:
            return self._client.retry(self._client.delete,
                                      self.client_path(''),
                                      recursive=True)
        except NoNodeError:
            return True

    def set_history_value(self, value):
        return self._set_or_create(self.history_path, value)

    def set_sync_state_value(self, value, index=None):
        return self._set_or_create(self.sync_path,
                                   value,
                                   index,
                                   retry=True,
                                   do_not_create_empty=True)

    def delete_sync_state(self, index=None):
        return self.set_sync_state_value("{}", index)

    def watch(self, leader_index, timeout):
        if super(ZooKeeper, self).watch(leader_index, timeout):
            self._fetch_cluster = True
        return self._fetch_cluster
Esempio n. 59
0
class ZKHandler(object):
    def __init__(self, config, logger=None):
        """
        Initialize an instance of the ZKHandler class with config

        A zk_conn object will be created but not started

        A ZKSchema instance will be created
        """
        self.encoding = "utf8"
        self.coordinators = config["coordinators"]
        self.logger = logger
        self.zk_conn = KazooClient(hosts=self.coordinators)
        self._schema = ZKSchema()

    #
    # Class meta-functions
    #
    def coordinators(self):
        return str(self.coordinators)

    def log(self, message, state=""):
        if self.logger is not None:
            self.logger.out(message, state)
        else:
            print(message)

    #
    # Properties
    #
    @property
    def schema(self):
        return self._schema

    #
    # State/connection management
    #
    def listener(self, state):
        """
        Listen for KazooState changes and log accordingly.

        This function does not do anything except for log the state, and Kazoo handles the rest.
        """
        if state == KazooState.CONNECTED:
            self.log("Connection to Zookeeper resumed", state="o")
        else:
            self.log(
                "Connection to Zookeeper lost with state {}".format(state),
                state="w")

    def connect(self, persistent=False):
        """
        Start the zk_conn object and connect to the cluster
        """
        try:
            self.zk_conn.start()
            if persistent:
                self.log("Connection to Zookeeper started", state="o")
                self.zk_conn.add_listener(self.listener)
        except Exception as e:
            raise ZKConnectionException(self, e)

    def disconnect(self, persistent=False):
        """
        Stop and close the zk_conn object and disconnect from the cluster

        The class instance may be reused later (avoids persistent connections)
        """
        self.zk_conn.stop()
        self.zk_conn.close()
        if persistent:
            self.log("Connection to Zookeeper terminated", state="o")

    #
    # Schema helper actions
    #
    def get_schema_path(self, key):
        """
        Get the Zookeeper path for {key} from the current schema based on its format.

        If {key} is a tuple of length 2, it's treated as a path plus an item instance of that path (e.g. a node, a VM, etc.).

        If {key} is a tuple of length 4, it is treated as a path plus an item instance, as well as another item instance of the subpath.

        If {key} is just a string, it's treated as a lone path (mostly used for the 'base' schema group.

        Otherwise, returns None since this is not a valid key.

        This function also handles the special case where a string that looks like an existing path (i.e. starts with '/') is passed;
        in that case it will silently return the same path back. This was mostly a migration functionality and is deprecated.
        """
        if isinstance(key, tuple):
            # This is a key tuple with both an ipath and an item
            if len(key) == 2:
                # 2-length normal tuple
                ipath, item = key
            elif len(key) == 4:
                # 4-length sub-level tuple
                ipath, item, sub_ipath, sub_item = key
                return self.schema.path(ipath, item=item) + self.schema.path(
                    sub_ipath, item=sub_item)
            else:
                # This is an invalid key
                return None
        elif isinstance(key, str):
            # This is a key string with just an ipath
            ipath = key
            item = None

            # This is a raw key path, used by backup/restore functionality
            if re.match(r"^/", ipath):
                return ipath
        else:
            # This is an invalid key
            return None

        return self.schema.path(ipath, item=item)

    #
    # Key Actions
    #
    def exists(self, key):
        """
        Check if a key exists
        """
        path = self.get_schema_path(key)
        if path is None:
            # This path is invalid, this is likely due to missing schema entries, so return False
            return False

        stat = self.zk_conn.exists(path)
        if stat:
            return True
        else:
            return False

    def read(self, key):
        """
        Read data from a key
        """
        try:
            path = self.get_schema_path(key)
            if path is None:
                # This path is invalid; this is likely due to missing schema entries, so return None
                return None

            return self.zk_conn.get(path)[0].decode(self.encoding)
        except NoNodeError:
            return None

    def write(self, kvpairs):
        """
        Create or update one or more keys' data
        """
        if type(kvpairs) is not list:
            self.log("ZKHandler error: Key-value sequence is not a list",
                     state="e")
            return False

        transaction = self.zk_conn.transaction()

        for kvpair in kvpairs:
            if type(kvpair) is not tuple:
                self.log(
                    "ZKHandler error: Key-value pair '{}' is not a tuple".
                    format(kvpair),
                    state="e",
                )
                return False

            key = kvpair[0]
            value = kvpair[1]

            path = self.get_schema_path(key)
            if path is None:
                # This path is invalid; this is likely due to missing schema entries, so continue
                continue

            if not self.exists(key):
                # Creating a new key
                transaction.create(path, str(value).encode(self.encoding))

            else:
                # Updating an existing key
                data = self.zk_conn.get(path)
                version = data[1].version

                # Validate the expected version after the execution
                new_version = version + 1

                # Update the data
                transaction.set_data(path, str(value).encode(self.encoding))

                # Check the data
                try:
                    transaction.check(path, new_version)
                except TypeError:
                    self.log(
                        "ZKHandler error: Key '{}' does not match expected version"
                        .format(path),
                        state="e",
                    )
                    return False

        try:
            transaction.commit()
            return True
        except Exception as e:
            self.log(
                "ZKHandler error: Failed to commit transaction: {}".format(e),
                state="e")
            return False

    def delete(self, keys, recursive=True):
        """
        Delete a key or list of keys (defaults to recursive)
        """
        if type(keys) is not list:
            keys = [keys]

        for key in keys:
            if self.exists(key):
                try:
                    path = self.get_schema_path(key)
                    self.zk_conn.delete(path, recursive=recursive)
                except Exception as e:
                    self.log(
                        "ZKHandler error: Failed to delete key {}: {}".format(
                            path, e),
                        state="e",
                    )
                    return False

        return True

    def children(self, key):
        """
        Lists all children of a key
        """
        try:
            path = self.get_schema_path(key)
            if path is None:
                # This path is invalid; this is likely due to missing schema entries, so return None
                return None

            return self.zk_conn.get_children(path)
        except NoNodeError:
            return None

    def rename(self, kkpairs):
        """
        Rename one or more keys to a new value
        """
        if type(kkpairs) is not list:
            self.log("ZKHandler error: Key-key sequence is not a list",
                     state="e")
            return False

        transaction = self.zk_conn.transaction()

        def rename_element(transaction, source_path, destination_path):
            data = self.zk_conn.get(source_path)[0]
            transaction.create(destination_path, data)

            if self.children(source_path):
                for child_path in self.children(source_path):
                    child_source_path = "{}/{}".format(source_path, child_path)
                    child_destination_path = "{}/{}".format(
                        destination_path, child_path)
                    rename_element(transaction, child_source_path,
                                   child_destination_path)

            transaction.delete(source_path)

        for kkpair in kkpairs:
            if type(kkpair) is not tuple:
                self.log(
                    "ZKHandler error: Key-key pair '{}' is not a tuple".format(
                        kkpair),
                    state="e",
                )
                return False

            source_key = kkpair[0]
            source_path = self.get_schema_path(source_key)
            if source_path is None:
                # This path is invalid; this is likely due to missing schema entries, so continue
                continue

            destination_key = kkpair[1]
            destination_path = self.get_schema_path(destination_key)
            if destination_path is None:
                # This path is invalid; this is likely due to missing schema entries, so continue
                continue

            if not self.exists(source_key):
                self.log(
                    "ZKHander error: Source key '{}' does not exist".format(
                        source_path),
                    state="e",
                )
                return False

            if self.exists(destination_key):
                self.log(
                    "ZKHander error: Destination key '{}' already exists".
                    format(destination_path),
                    state="e",
                )
                return False

            rename_element(transaction, source_path, destination_path)

        try:
            transaction.commit()
            return True
        except Exception as e:
            self.log(
                "ZKHandler error: Failed to commit transaction: {}".format(e),
                state="e")
            return False

    #
    # Lock actions
    #
    def readlock(self, key):
        """
        Acquires a read lock on a key
        """
        count = 1
        lock = None

        path = self.get_schema_path(key)

        while True:
            try:
                lock_id = str(uuid.uuid1())
                lock = self.zk_conn.ReadLock(path, lock_id)
                break
            except NoNodeError:
                self.log(
                    "ZKHandler warning: Failed to acquire read lock on nonexistent path {}"
                    .format(path),
                    state="e",
                )
                return None
            except Exception as e:
                if count > 5:
                    self.log(
                        "ZKHandler warning: Failed to acquire read lock after 5 tries: {}"
                        .format(e),
                        state="e",
                    )
                    break
                else:
                    time.sleep(0.5)
                    count += 1
                    continue

        return lock

    def writelock(self, key):
        """
        Acquires a write lock on a key
        """
        count = 1
        lock = None

        path = self.get_schema_path(key)

        while True:
            try:
                lock_id = str(uuid.uuid1())
                lock = self.zk_conn.WriteLock(path, lock_id)
                break
            except NoNodeError:
                self.log(
                    "ZKHandler warning: Failed to acquire write lock on nonexistent path {}"
                    .format(path),
                    state="e",
                )
                return None
            except Exception as e:
                if count > 5:
                    self.log(
                        "ZKHandler warning: Failed to acquire write lock after 5 tries: {}"
                        .format(e),
                        state="e",
                    )
                    break
                else:
                    time.sleep(0.5)
                    count += 1
                    continue

        return lock

    def exclusivelock(self, key):
        """
        Acquires an exclusive lock on a key
        """
        count = 1
        lock = None

        path = self.get_schema_path(key)

        while True:
            try:
                lock_id = str(uuid.uuid1())
                lock = self.zk_conn.Lock(path, lock_id)
                break
            except NoNodeError:
                self.log(
                    "ZKHandler warning: Failed to acquire exclusive lock on nonexistent path {}"
                    .format(path),
                    state="e",
                )
                return None
            except Exception as e:
                if count > 5:
                    self.log(
                        "ZKHandler warning: Failed to acquire exclusive lock after 5 tries: {}"
                        .format(e),
                        state="e",
                    )
                    break
                else:
                    time.sleep(0.5)
                    count += 1
                    continue

        return lock
Esempio n. 60
0
class Applier:
    def __init__(self):
        self._zk = KazooClient(hosts=f'{os.getenv("ZOOKEEPER_HOST")}:2181')
        self._logger = logging.getLogger(__name__)
        self._logger.setLevel(
            logging.getLevelName(os.getenv("LOG_LEVEL", "INFO")))
        ch = logging.StreamHandler()
        ch.setLevel(logging.getLevelName(os.getenv("LOG_LEVEL", "INFO")))
        self._logger.addHandler(ch)

    def start(self):
        self._logger.debug("Applier started")
        self._zk.start()
        self._attempt_to_apply_next_target()

        scheduler = BlockingScheduler(timezone="UTC")
        scheduler.add_job(self._attempt_to_apply_next_target,
                          'interval',
                          minutes=1)
        scheduler.start()

    def stop(self):
        self._zk.stop()

    def _attempt_to_apply_next_target(self):
        if (self._is_next_target_ready()):
            self._apply_next_target()

    def _apply_next_target(self):
        self._logger.info("Applying next target")
        self._zk.ensure_path(ZK_CURRENT_TARGET)
        next_target_id = self._zk.get(ZK_NEXT_TARGET)[0]

        tx = self._zk.transaction()
        tx.set_data(ZK_NEXT_TARGET, b'')
        tx.set_data(ZK_CURRENT_TARGET, next_target_id)
        tx.commit()

    def _is_next_target_ready(self):
        if (self._zk.exists(ZK_NEXT_TARGET) is None):
            return False

        next_target_id = self._zk.get(ZK_NEXT_TARGET)[0].decode()
        if (not next_target_id
                or self._zk.exists(f'/phrases/distributor/{next_target_id}') is
                None):
            return False

        partitions = self._zk.get_children(
            f'/phrases/distributor/{next_target_id}/partitions')
        if (not partitions):
            return False

        for partition in partitions:
            nodes_path = f'/phrases/distributor/{next_target_id}/partitions/{partition}/nodes'
            nodes = self._zk.get_children(nodes_path)

            if (len(nodes) < NUMBER_NODES_PER_PARTITION):
                return False

            for node in nodes:
                hostname = self._zk.get(f'{nodes_path}/{node}')[0].decode()
                if (not hostname):
                    return False

        return True