Exemple #1
0
  def start_vttablet(self, port=None, auth=False, memcache=False, wait_for_state="SERVING", customrules=None, schema_override=None, cert=None, key=None, ca_cert=None, repl_extra_flags={}):
    """
    Starts a vttablet process, and returns it.
    The process is also saved in self.proc, so it's easy to kill as well.
    """
    utils.prog_compile(['vtaction',
                        'vttablet',
                        ])

    args = [os.path.join(utils.vtroot, 'bin', 'vttablet'),
            '-port', '%s' % (port or self.port),
            '-tablet-path', self.tablet_alias,
            '-log_dir', self.tablet_dir]

    dbconfigs = self._get_db_configs_file(repl_extra_flags)
    for key1 in dbconfigs:
      for key2 in dbconfigs[key1]:
        args.extend(["-db-config-"+key1+"-"+key2, dbconfigs[key1][key2]])

    if memcache:
      memcache = os.path.join(self.tablet_dir, "memcache.sock")
      args.extend(["-rowcache-bin", "memcached"])
      args.extend(["-rowcache-s", memcache])

    if auth:
      args.extend(['-auth-credentials', os.path.join(utils.vttop, 'test', 'test_data', 'authcredentials_test.json')])

    if customrules:
      args.extend(['-customrules', customrules])

    if schema_override:
      args.extend(['-schema-override', schema_override])

    if cert:
      self.secure_port = utils.reserve_ports(1)
      args.extend(['-secure-port', '%s' % self.secure_port,
                   '-cert', cert,
                   '-key', key])
      if ca_cert:
        args.extend(['-ca-cert', ca_cert])

    stderr_fd = open(os.path.join(self.tablet_dir, "vttablet.stderr"), "w")
    # increment count only the first time
    if not self.proc:
      Tablet.tablets_running += 1
    self.proc = utils.run_bg(args, stderr=stderr_fd)
    stderr_fd.close()

    # wait for zookeeper PID just to be sure we have it
    utils.run(utils.vtroot+'/bin/zk wait -e ' + self.zk_pid, stdout=utils.devnull)

    # wait for query service to be in the right state
    self.wait_for_vttablet_state(wait_for_state, port=port)

    return self.proc
Exemple #2
0
  def __init__(self, tablet_uid=None, port=None, mysql_port=None, cell=None):
    self.tablet_uid = tablet_uid or (Tablet.default_uid + Tablet.seq)
    self.port = port or (utils.reserve_ports(1))
    self.mysql_port = mysql_port or (utils.reserve_ports(1))
    Tablet.seq += 1

    if cell:
      self.cell = cell
    else:
      self.cell = tablet_cell_map.get(tablet_uid, 'nj')
    self.proc = None

    # filled in during init_tablet
    self.keyspace = None
    self.shard = None

    # utility variables
    self.tablet_alias = 'test_%s-%010d' % (self.cell, self.tablet_uid)
    self.zk_tablet_path = '/zk/test_%s/vt/tablets/%010d' % (self.cell, self.tablet_uid)
    self.zk_pid = self.zk_tablet_path + '/pid'
Exemple #3
0
  def start_vttablet(self, port=None, auth=False, memcache=False, wait_for_state="OPEN", customrules=None, schema_override=None, cert=None, key=None, ca_cert=None, repl_extra_flags={}):
    """
    Starts a vttablet process, and returns it.
    The process is also saved in self.proc, so it's easy to kill as well.
    """
    utils.prog_compile(['vtaction',
                        'vttablet',
                        ])

    args = [os.path.join(utils.vtroot, 'bin', 'vttablet'),
            '-port', '%s' % (port or self.port),
            '-tablet-path', self.tablet_alias,
            '-logfile', self.logfile,
            '-log.level', 'INFO',
            '-db-configs-file', self._write_db_configs_file(repl_extra_flags),
            '-debug-querylog-file', self.querylog_file]

    if memcache:
      memcache = os.path.join(self.tablet_dir, "memcache.sock")
      config = os.path.join(self.tablet_dir, "config.json")
      with open(config, 'w') as f:
        json.dump({"RowCache": ["memcached", "-s", memcache]}, f)
      args.extend(["-queryserver-config-file", config])

    if auth:
      args.extend(['-auth-credentials', os.path.join(utils.vttop, 'test', 'test_data', 'authcredentials_test.json')])

    if customrules:
      args.extend(['-customrules', customrules])

    if schema_override:
      args.extend(['-schema-override', schema_override])

    if cert:
      self.secure_port = utils.reserve_ports(1)
      args.extend(['-secure-port', '%s' % self.secure_port,
                   '-cert', cert,
                   '-key', key])
      if ca_cert:
        args.extend(['-ca-cert', ca_cert])

    stderr_fd = open(os.path.join(self.tablet_dir, "vttablet.stderr"), "w")
    self.proc = utils.run_bg(args, stderr=stderr_fd)
    stderr_fd.close()

    # wait for zookeeper PID just to be sure we have it
    utils.run(utils.vtroot+'/bin/zk wait -e ' + self.zk_pid, stdout=utils.devnull)

    # wait for query service to be in the right state
    self.wait_for_vttablet_state(wait_for_state, port=port)

    Tablet.tablets_running += 1
    return self.proc
Exemple #4
0
  def test_zkocc(self):
    # preload the test_nj cell
    zkocc_14850 = utils.zkocc_start(extra_params=['-connect-timeout=2s', '-cache-refresh-interval=1s'])
    time.sleep(1)

    # create a python client. The first address is bad, will test the retry logic
    bad_port = utils.reserve_ports(3)
    zkocc_client = zkocc.ZkOccConnection("localhost:%u,localhost:%u,localhost:%u" % (bad_port, utils.zkocc_port_base, bad_port+1), "test_nj", 30)
    zkocc_client.dial()

    # test failure for a python client that cannot connect
    bad_zkocc_client = zkocc.ZkOccConnection("localhost:%u,localhost:%u" % (bad_port+2, bad_port), "test_nj", 30)
    try:
      bad_zkocc_client.dial()
      raise utils.TestError('exception expected')
    except zkocc.ZkOccError as e:
      if str(e) != "Cannot dial to any server":
        raise
    level = logging.getLogger().getEffectiveLevel()
    logging.getLogger().setLevel(logging.ERROR)

    # FIXME(ryszard): This can be changed into a self.assertRaises.
    try:
      bad_zkocc_client.get("/zk/test_nj/vt/zkocc1/data1")
      self.fail('exception expected')
    except zkocc.ZkOccError as e:
      if str(e) != "Cannot dial to any server":
        raise

    logging.getLogger().setLevel(level)

    # get test
    utils.prog_compile(['zkclient2'])
    out, err = utils.run(utils.vtroot+'/bin/zkclient2 -server localhost:%u /zk/test_nj/vt/zkocc1/data1' % utils.zkocc_port_base, trap_output=True)
    self.assertEqual(err, "/zk/test_nj/vt/zkocc1/data1 = Test data 1 (NumChildren=0, Version=0, Cached=false, Stale=false)\n")

    zk_data = zkocc_client.get("/zk/test_nj/vt/zkocc1/data1")
    self.assertDictContainsSubset({'Data': "Test data 1",
                                   'Cached': True,
                                   'Stale': False,},
                                  zk_data)
    self.assertDictContainsSubset({'NumChildren': 0, 'Version': 0}, zk_data['Stat'])

    # getv test
    out, err = utils.run(utils.vtroot+'/bin/zkclient2 -server localhost:%u /zk/test_nj/vt/zkocc1/data1 /zk/test_nj/vt/zkocc1/data2 /zk/test_nj/vt/zkocc1/data3' % utils.zkocc_port_base, trap_output=True)
    self.assertEqualNormalized(err, """[0] /zk/test_nj/vt/zkocc1/data1 = Test data 1 (NumChildren=0, Version=0, Cached=true, Stale=false)
  [1] /zk/test_nj/vt/zkocc1/data2 = Test data 2 (NumChildren=0, Version=0, Cached=false, Stale=false)
  [2] /zk/test_nj/vt/zkocc1/data3 = Test data 3 (NumChildren=0, Version=0, Cached=false, Stale=false)
  """)
    zk_data = zkocc_client.getv(["/zk/test_nj/vt/zkocc1/data1", "/zk/test_nj/vt/zkocc1/data2", "/zk/test_nj/vt/zkocc1/data3"])['Nodes']
    self.assertEqual(len(zk_data), 3)
    for i, d in enumerate(zk_data):
      self.assertEqual(d['Data'], 'Test data %s' % (i + 1))
      self.assertTrue(d['Cached'])
      self.assertFalse(d['Stale'])
      self.assertDictContainsSubset({'NumChildren': 0, 'Version': 0}, d['Stat'])

    # children test
    out, err = utils.run(utils.vtroot+'/bin/zkclient2 -server localhost:%u -mode children /zk/test_nj/vt' % utils.zkocc_port_base, trap_output=True)
    self.assertEqualNormalized(err, """Path = /zk/test_nj/vt
  Child[0] = zkocc1
  Child[1] = zkocc2
  NumChildren = 2
  CVersion = 2
  Cached = false
  Stale = false
  """)

    # zk command tests
    self._check_zk_output("cat /zk/test_nj/vt/zkocc1/data1", "Test data 1")
    self._check_zk_output("ls -l /zk/test_nj/vt/zkocc1", """total: 3
  -rw-rw-rw- zk zk       11  %s data1
  -rw-rw-rw- zk zk       11  %s data2
  -rw-rw-rw- zk zk       11  %s data3
  """ % (_format_time(zk_data[0]['Stat']['MTime']),
         _format_time(zk_data[1]['Stat']['MTime']),
         _format_time(zk_data[2]['Stat']['MTime'])))

    # test /zk/local is not resolved and rejected
    out, err = utils.run(utils.vtroot+'/bin/zkclient2 -server localhost:%u /zk/local/vt/zkocc1/data1' % utils.zkocc_port_base, trap_output=True, raise_on_error=False)
    self.assertIn("zkocc: cannot resolve local cell", err)

    # start a background process to query the same value over and over again
    # while we kill the zk server and restart it
    outfd = tempfile.NamedTemporaryFile(dir=utils.tmp_root, delete=False)
    filename = outfd.name
    querier = utils.run_bg('/bin/bash -c "while true ; do '+utils.vtroot+'/bin/zkclient2 -server localhost:%u /zk/test_nj/vt/zkocc1/data1 ; sleep 0.1 ; done"' % utils.zkocc_port_base, stderr=outfd.file)
    outfd.close()
    time.sleep(1)

    # kill zk server, sleep a bit, restart zk server, sleep a bit
    utils.run(utils.vtroot+'/bin/zkctl -zk.cfg 1@'+utils.hostname+':%u:%u:%u shutdown' % (utils.zk_port_base, utils.zk_port_base+1, utils.zk_port_base+2))
    time.sleep(3)
    utils.run(utils.vtroot+'/bin/zkctl -zk.cfg 1@'+utils.hostname+':%u:%u:%u start' % (utils.zk_port_base, utils.zk_port_base+1, utils.zk_port_base+2))
    time.sleep(3)

    utils.kill_sub_process(querier)

    logging.debug("Checking %s", filename)
    fd = open(filename, "r")
    state = 0
    for line in fd:
      if line == "/zk/test_nj/vt/zkocc1/data1 = Test data 1 (NumChildren=0, Version=0, Cached=true, Stale=false)\n":
        stale = False
      elif line == "/zk/test_nj/vt/zkocc1/data1 = Test data 1 (NumChildren=0, Version=0, Cached=true, Stale=true)\n":
        stale = True
      else:
        raise utils.TestError('unexpected line: ', line)
      if state == 0:
        if stale:
          state = 1
      elif state == 1:
        if not stale:
          state = 2
      else:
        if stale:
          self.fail('unexpected stale state')
    self.assertEqual(state, 2)
    fd.close()

    utils.zkocc_kill(zkocc_14850)

    # check that after the server is gone, the python client fails correctly
    level = logging.getLogger().getEffectiveLevel()
    logging.getLogger().setLevel(logging.ERROR)
    try:
      zkocc_client.get("/zk/test_nj/vt/zkocc1/data1")
      self.fail('exception expected')
    except zkocc.ZkOccError as e:
      if str(e) != "Cannot dial to any server":
        raise
    logging.getLogger().setLevel(level)
Exemple #5
0
def _run_test_reparent_graceful(shard_id):
  utils.zk_wipe()

  utils.run_vtctl('CreateKeyspace -force test_keyspace')

  # create the database so vttablets start, as they are serving
  tablet_62344.create_db('vt_test_keyspace')
  tablet_62044.create_db('vt_test_keyspace')
  tablet_41983.create_db('vt_test_keyspace')
  tablet_31981.create_db('vt_test_keyspace')

  # Start up a master mysql and vttablet
  tablet_62344.init_tablet('master', 'test_keyspace', shard_id, start=True)

  # Create a few slaves for testing reparenting.
  tablet_62044.init_tablet('replica', 'test_keyspace', shard_id, start=True)
  tablet_41983.init_tablet('replica', 'test_keyspace', shard_id, start=True)
  tablet_31981.init_tablet('replica', 'test_keyspace', shard_id, start=True)

  # Recompute the shard layout node - until you do that, it might not be valid.
  utils.run_vtctl('RebuildShardGraph test_keyspace/' + shard_id)
  utils.validate_topology()

  # Force the slaves to reparent assuming that all the datasets are identical.
  utils.pause("force ReparentShard?")
  utils.run_vtctl('ReparentShard -force test_keyspace/%s %s' % (shard_id, tablet_62344.tablet_alias))
  utils.validate_topology(ping_tablets=True)

  expected_addr = utils.hostname + ':' + str(tablet_62344.port)
  _check_db_addr('test_keyspace.%s.master:_vtocc' % shard_id, expected_addr)

  # Convert two replica to spare. That should leave only one node serving traffic,
  # but still needs to appear in the replication graph.
  utils.run_vtctl('ChangeSlaveType ' + tablet_41983.tablet_alias + ' spare')
  utils.run_vtctl('ChangeSlaveType ' + tablet_31981.tablet_alias + ' spare')
  utils.validate_topology()
  expected_addr = utils.hostname + ':' + str(tablet_62044.port)
  _check_db_addr('test_keyspace.%s.replica:_vtocc' % shard_id, expected_addr)

  # Run this to make sure it succeeds.
  utils.run_vtctl('ShardReplicationPositions test_keyspace/%s' % shard_id, stdout=devnull)

  # Perform a graceful reparent operation.
  utils.pause("graceful ReparentShard?")
  utils.run_vtctl('ReparentShard test_keyspace/%s %s' % (shard_id, tablet_62044.tablet_alias), auto_log=True)
  utils.validate_topology()

  expected_addr = utils.hostname + ':' + str(tablet_62044.port)
  _check_db_addr('test_keyspace.%s.master:_vtocc' % shard_id, expected_addr)

  tablet_62344.kill_vttablet()
  tablet_62044.kill_vttablet()
  tablet_41983.kill_vttablet()
  tablet_31981.kill_vttablet()

  # Test address correction.
  new_port = utils.reserve_ports(1)
  tablet_62044.start_vttablet(port=new_port)
  # Wait a moment for address to reregister.
  time.sleep(1.0)

  expected_addr = utils.hostname + ':' + str(new_port)
  _check_db_addr('test_keyspace.%s.master:_vtocc' % shard_id, expected_addr)

  tablet_62044.kill_vttablet()
Exemple #6
0
    def test_zkocc(self):
        # preload the test_nj cell
        zkocc_14850 = utils.zkocc_start(
            extra_params=['-connect-timeout=2s', '-cache-refresh-interval=1s'])
        time.sleep(1)

        # create a python client. The first address is bad, will test the retry logic
        bad_port = utils.reserve_ports(3)
        zkocc_client = zkocc.ZkOccConnection(
            "localhost:%u,localhost:%u,localhost:%u" %
            (bad_port, utils.zkocc_port_base, bad_port + 1), "test_nj", 30)
        zkocc_client.dial()

        # test failure for a python client that cannot connect
        bad_zkocc_client = zkocc.ZkOccConnection(
            "localhost:%u,localhost:%u" % (bad_port + 2, bad_port), "test_nj",
            30)
        try:
            bad_zkocc_client.dial()
            raise utils.TestError('exception expected')
        except zkocc.ZkOccError as e:
            if str(e) != "Cannot dial to any server":
                raise
        logging.getLogger().setLevel(logging.ERROR)

        # FIXME(ryszard): This can be changed into a self.assertRaises.
        try:
            bad_zkocc_client.get("/zk/test_nj/zkocc1/data1")
            self.fail('exception expected')
        except zkocc.ZkOccError as e:
            if str(e) != "Cannot dial to any server":
                raise

        logging.getLogger().setLevel(logging.WARNING)

        # get test
        utils.prog_compile(['zkclient2'])
        out, err = utils.run(
            utils.vtroot +
            '/bin/zkclient2 -server localhost:%u /zk/test_nj/zkocc1/data1' %
            utils.zkocc_port_base,
            trap_output=True)
        self.assertEqual(
            err,
            "/zk/test_nj/zkocc1/data1 = Test data 1 (NumChildren=0, Version=0, Cached=false, Stale=false)\n"
        )

        zk_data = zkocc_client.get("/zk/test_nj/zkocc1/data1")
        self.assertDictContainsSubset(
            {
                'Data': "Test data 1",
                'Cached': True,
                'Stale': False,
            }, zk_data)
        self.assertDictContainsSubset({
            'NumChildren': 0,
            'Version': 0
        }, zk_data['Stat'])

        # getv test
        out, err = utils.run(
            utils.vtroot +
            '/bin/zkclient2 -server localhost:%u /zk/test_nj/zkocc1/data1 /zk/test_nj/zkocc1/data2 /zk/test_nj/zkocc1/data3'
            % utils.zkocc_port_base,
            trap_output=True)
        self.assertEqualNormalized(
            err,
            """[0] /zk/test_nj/zkocc1/data1 = Test data 1 (NumChildren=0, Version=0, Cached=true, Stale=false)
  [1] /zk/test_nj/zkocc1/data2 = Test data 2 (NumChildren=0, Version=0, Cached=false, Stale=false)
  [2] /zk/test_nj/zkocc1/data3 = Test data 3 (NumChildren=0, Version=0, Cached=false, Stale=false)
  """)
        zk_data = zkocc_client.getv([
            "/zk/test_nj/zkocc1/data1", "/zk/test_nj/zkocc1/data2",
            "/zk/test_nj/zkocc1/data3"
        ])['Nodes']
        self.assertEqual(len(zk_data), 3)
        for i, d in enumerate(zk_data):
            self.assertEqual(d['Data'], 'Test data %s' % (i + 1))
            self.assertTrue(d['Cached'])
            self.assertFalse(d['Stale'])
            self.assertDictContainsSubset({
                'NumChildren': 0,
                'Version': 0
            }, d['Stat'])

        # children test
        out, err = utils.run(
            utils.vtroot +
            '/bin/zkclient2 -server localhost:%u -mode children /zk/test_nj' %
            utils.zkocc_port_base,
            trap_output=True)
        self.assertEqualNormalized(
            err, """Path = /zk/test_nj
  Child[0] = zkocc1
  Child[1] = zkocc2
  NumChildren = 2
  CVersion = 4
  Cached = false
  Stale = false
  """)

        # zk command tests
        self._check_zk_output("cat /zk/test_nj/zkocc1/data1", "Test data 1")
        self._check_zk_output(
            "ls -l /zk/test_nj/zkocc1", """total: 3
  -rw-rw-rw- zk zk       11  %s data1
  -rw-rw-rw- zk zk       11  %s data2
  -rw-rw-rw- zk zk       11  %s data3
  """ % (_format_time(zk_data[0]['Stat']['MTime']),
         _format_time(zk_data[1]['Stat']['MTime']),
         _format_time(zk_data[2]['Stat']['MTime'])))

        # test /zk/local is not resolved and rejected
        out, err = utils.run(
            utils.vtroot +
            '/bin/zkclient2 -server localhost:%u /zk/local/zkocc1/data1' %
            utils.zkocc_port_base,
            trap_output=True,
            raise_on_error=False)
        self.assertIn("zkocc: cannot resolve local cell", err)

        # start a background process to query the same value over and over again
        # while we kill the zk server and restart it
        outfd = tempfile.NamedTemporaryFile(dir=utils.tmp_root, delete=False)
        filename = outfd.name
        querier = utils.run_bg(
            '/bin/bash -c "while true ; do ' + utils.vtroot +
            '/bin/zkclient2 -server localhost:%u /zk/test_nj/zkocc1/data1 ; sleep 0.1 ; done"'
            % utils.zkocc_port_base,
            stderr=outfd.file)
        outfd.close()
        time.sleep(1)

        # kill zk server, sleep a bit, restart zk server, sleep a bit
        utils.run(utils.vtroot + '/bin/zkctl -zk.cfg 1@' + utils.hostname +
                  ':%u:%u:%u shutdown' %
                  (utils.zk_port_base, utils.zk_port_base + 1,
                   utils.zk_port_base + 2))
        time.sleep(3)
        utils.run(utils.vtroot + '/bin/zkctl -zk.cfg 1@' + utils.hostname +
                  ':%u:%u:%u start' % (utils.zk_port_base, utils.zk_port_base +
                                       1, utils.zk_port_base + 2))
        time.sleep(3)

        utils.kill_sub_process(querier)

        utils.debug("Checking " + filename)
        fd = open(filename, "r")
        state = 0
        for line in fd:
            if line == "/zk/test_nj/zkocc1/data1 = Test data 1 (NumChildren=0, Version=0, Cached=true, Stale=false)\n":
                stale = False
            elif line == "/zk/test_nj/zkocc1/data1 = Test data 1 (NumChildren=0, Version=0, Cached=true, Stale=true)\n":
                stale = True
            else:
                raise utils.TestError('unexpected line: ', line)
            if state == 0:
                if stale:
                    state = 1
            elif state == 1:
                if not stale:
                    state = 2
            else:
                if stale:
                    self.fail('unexpected stale state')
        self.assertEqual(state, 2)
        fd.close()

        utils.zkocc_kill(zkocc_14850)

        # check that after the server is gone, the python client fails correctly
        logging.getLogger().setLevel(logging.ERROR)
        try:
            zkocc_client.get("/zk/test_nj/zkocc1/data1")
            self.fail('exception expected')
        except zkocc.ZkOccError as e:
            if str(e) != "Cannot dial to any server":
                raise
        logging.getLogger().setLevel(logging.WARNING)
Exemple #7
0
    def start_vttablet(self,
                       port=None,
                       auth=False,
                       memcache=False,
                       wait_for_state="OPEN",
                       customrules=None,
                       schema_override=None,
                       cert=None,
                       key=None,
                       ca_cert=None,
                       repl_extra_flags={}):
        """
    Starts a vttablet process, and returns it.
    The process is also saved in self.proc, so it's easy to kill as well.
    """
        utils.prog_compile([
            'vtaction',
            'vttablet',
        ])

        args = [
            os.path.join(utils.vtroot, 'bin', 'vttablet'), '-port',
            '%s' % (port or self.port), '-tablet-path', self.tablet_alias,
            '-logfile', self.logfile, '-log.level', 'INFO', '-db-configs-file',
            self._write_db_configs_file(repl_extra_flags),
            '-debug-querylog-file', self.querylog_file
        ]

        if memcache:
            self.start_memcache()
            args.extend(['-rowcache', self.memcache_path])

        if auth:
            args.extend([
                '-auth-credentials',
                os.path.join(utils.vttop, 'test', 'test_data',
                             'authcredentials_test.json')
            ])

        if customrules:
            args.extend(['-customrules', customrules])

        if schema_override:
            args.extend(['-schema-override', schema_override])

        if cert:
            self.secure_port = utils.reserve_ports(1)
            args.extend([
                '-secure-port',
                '%s' % self.secure_port, '-cert', cert, '-key', key
            ])
            if ca_cert:
                args.extend(['-ca-cert', ca_cert])

        stderr_fd = open(os.path.join(self.tablet_dir, "vttablet.stderr"), "w")
        self.proc = utils.run_bg(args, stderr=stderr_fd)
        stderr_fd.close()

        # wait for zookeeper PID just to be sure we have it
        utils.run(utils.vtroot + '/bin/zk wait -e ' + self.zk_pid,
                  stdout=utils.devnull)

        # wait for query service to be in the right state
        self.wait_for_vttablet_state(wait_for_state, port=port)

        return self.proc
Exemple #8
0
def run_test_zkocc():
  _populate_zk()

  # preload the test_nj cell
  zkocc_14850 = utils.zkocc_start(extra_params=['-connect-timeout=2s', '-cache-refresh-interval=1s'])
  time.sleep(1)

  # create a python client. The first address is bad, will test the retry logic
  bad_port = utils.reserve_ports(3)
  zkocc_client = zkocc.ZkOccConnection("localhost:%u,localhost:%u,localhost:%u" % (bad_port, utils.zkocc_port_base, bad_port+1), "test_nj", 30)
  zkocc_client.dial()

  # test failure for a python client that cannot connect
  bad_zkocc_client = zkocc.ZkOccConnection("localhost:%u,localhost:%u" % (bad_port+2, bad_port), "test_nj", 30)
  try:
    bad_zkocc_client.dial()
    raise utils.TestError('exception expected')
  except zkocc.ZkOccError as e:
    if str(e) != "Cannot dial to any server":
      raise utils.TestError('Unexpected exception: ', str(e))
  logging.getLogger().setLevel(logging.ERROR)
  try:
    bad_zkocc_client.get("/zk/test_nj/zkocc1/data1")
    raise utils.TestError('exception expected')
  except zkocc.ZkOccError as e:
    if str(e) != "Cannot dial to any server":
      raise utils.TestError('Unexpected exception: ', str(e))
  logging.getLogger().setLevel(logging.WARNING)

  # get test
  out, err = utils.run(utils.vtroot+'/bin/zkclient2 -server localhost:%u /zk/test_nj/zkocc1/data1' % utils.zkocc_port_base, trap_output=True)
  if err != "/zk/test_nj/zkocc1/data1 = Test data 1 (NumChildren=0, Version=0, Cached=false, Stale=false)\n":
    raise utils.TestError('unexpected get output: ', err)
  zkNode = zkocc_client.get("/zk/test_nj/zkocc1/data1")
  if (zkNode['Data'] != "Test data 1" or \
      zkNode['Stat']['NumChildren'] != 0 or \
      zkNode['Stat']['Version'] != 0 or \
      zkNode['Cached'] != True or \
      zkNode['Stale'] != False):
    raise utils.TestError('unexpected zkocc_client.get output: ', zkNode)

  # getv test
  out, err = utils.run(utils.vtroot+'/bin/zkclient2 -server localhost:%u /zk/test_nj/zkocc1/data1 /zk/test_nj/zkocc1/data2 /zk/test_nj/zkocc1/data3' % utils.zkocc_port_base, trap_output=True)
  if err != """[0] /zk/test_nj/zkocc1/data1 = Test data 1 (NumChildren=0, Version=0, Cached=true, Stale=false)
[1] /zk/test_nj/zkocc1/data2 = Test data 2 (NumChildren=0, Version=0, Cached=false, Stale=false)
[2] /zk/test_nj/zkocc1/data3 = Test data 3 (NumChildren=0, Version=0, Cached=false, Stale=false)
""":
    raise utils.TestError('unexpected getV output: ', err)
  zkNodes = zkocc_client.getv(["/zk/test_nj/zkocc1/data1", "/zk/test_nj/zkocc1/data2", "/zk/test_nj/zkocc1/data3"])
  if (zkNodes['Nodes'][0]['Data'] != "Test data 1" or \
      zkNodes['Nodes'][0]['Stat']['NumChildren'] != 0 or \
      zkNodes['Nodes'][0]['Stat']['Version'] != 0 or \
      zkNodes['Nodes'][0]['Cached'] != True or \
      zkNodes['Nodes'][0]['Stale'] != False or \
      zkNodes['Nodes'][1]['Data'] != "Test data 2" or \
      zkNodes['Nodes'][1]['Stat']['NumChildren'] != 0 or \
      zkNodes['Nodes'][1]['Stat']['Version'] != 0 or \
      zkNodes['Nodes'][1]['Cached'] != True or \
      zkNodes['Nodes'][1]['Stale'] != False or \
      zkNodes['Nodes'][2]['Data'] != "Test data 3" or \
      zkNodes['Nodes'][2]['Stat']['NumChildren'] != 0 or \
      zkNodes['Nodes'][2]['Stat']['Version'] != 0 or \
      zkNodes['Nodes'][2]['Cached'] != True or \
      zkNodes['Nodes'][2]['Stale'] != False):
    raise utils.TestError('unexpected zkocc_client.getv output: ', zkNodes)

  # children test
  out, err = utils.run(utils.vtroot+'/bin/zkclient2 -server localhost:%u -mode children /zk/test_nj' % utils.zkocc_port_base, trap_output=True)
  if err != """Path = /zk/test_nj
Child[0] = zkocc1
Child[1] = zkocc2
NumChildren = 2
CVersion = 4
Cached = false
Stale = false
""":
    raise utils.TestError('unexpected children output: ', err)

  # zk command tests
  _check_zk_output("cat /zk/test_nj/zkocc1/data1", "Test data 1")
  _check_zk_output("ls -l /zk/test_nj/zkocc1", """total: 3
-rw-rw-rw- zk zk       11  %s data1
-rw-rw-rw- zk zk       11  %s data2
-rw-rw-rw- zk zk       11  %s data3
""" % (_format_time(zkNodes['Nodes'][0]['Stat']['MTime']),
       _format_time(zkNodes['Nodes'][1]['Stat']['MTime']),
       _format_time(zkNodes['Nodes'][2]['Stat']['MTime'])))

  # test /zk/local is not resolved and rejected
  out, err = utils.run(utils.vtroot+'/bin/zkclient2 -server localhost:%u /zk/local/zkocc1/data1' % utils.zkocc_port_base, trap_output=True, raise_on_error=False)
  if "zkocc: cannot resolve local cell" not in err:
    raise utils.TestError('unexpected get output, not local cell error: ', err)

  # start a background process to query the same value over and over again
  # while we kill the zk server and restart it
  outfd = tempfile.NamedTemporaryFile(dir=utils.tmp_root, delete=False)
  filename = outfd.name
  querier = utils.run_bg('/bin/bash -c "while true ; do '+utils.vtroot+'/bin/zkclient2 -server localhost:%u /zk/test_nj/zkocc1/data1 ; sleep 0.1 ; done"' % utils.zkocc_port_base, stderr=outfd.file)
  outfd.close()
  time.sleep(1)

  # kill zk server, sleep a bit, restart zk server, sleep a bit
  utils.run(utils.vtroot+'/bin/zkctl -zk.cfg 1@'+utils.hostname+':%u:%u:%u shutdown' % (utils.zk_port_base, utils.zk_port_base+1, utils.zk_port_base+2))
  time.sleep(3)
  utils.run(utils.vtroot+'/bin/zkctl -zk.cfg 1@'+utils.hostname+':%u:%u:%u start' % (utils.zk_port_base, utils.zk_port_base+1, utils.zk_port_base+2))
  time.sleep(3)

  utils.kill_sub_process(querier)

  utils.debug("Checking " + filename)
  fd = open(filename, "r")
  state = 0
  for line in fd:
    if line == "/zk/test_nj/zkocc1/data1 = Test data 1 (NumChildren=0, Version=0, Cached=true, Stale=false)\n":
      stale = False
    elif line == "/zk/test_nj/zkocc1/data1 = Test data 1 (NumChildren=0, Version=0, Cached=true, Stale=true)\n":
      stale = True
    else:
      raise utils.TestError('unexpected line: ', line)
    if state == 0:
      if stale:
        state = 1
    elif state == 1:
      if not stale:
        state = 2
    else:
      if stale:
        raise utils.TestError('unexpected stale state')
  if state != 2:
    raise utils.TestError('unexpected ended stale state')
  fd.close()

  utils.zkocc_kill(zkocc_14850)

  # check that after the server is gone, the python client fails correctly
  logging.getLogger().setLevel(logging.ERROR)
  try:
    zkocc_client.get("/zk/test_nj/zkocc1/data1")
    raise utils.TestError('exception expected')
  except zkocc.ZkOccError as e:
    if str(e) != "Cannot dial to any server":
      raise utils.TestError('Unexpected exception: ', str(e))
  logging.getLogger().setLevel(logging.WARNING)