def test_workload(self): connServer = self.cluster[0] utils.print_with_time("Inserting data") utils.populateTable(self.conn, self.table, records=10000) utils.print_with_time("Starting workload") with workload_runner.SplitOrContinuousWorkload( opts, connServer, db_name=self.dbName, table_name=self.tableName) as workload: utils.print_with_time("Running workload before") workload.run_before() self.checkCluster() for currentShards in opts["sequence"]: utils.print_with_time("Sharding table to %d shards" % currentShards) self.table.reconfigure(shards=currentShards, replicas=opts["num-nodes"]).run( self.conn) self.table.wait(wait_for='all_replicas_ready').run(self.conn) self.checkCluster() utils.print_with_time("Running workload after") workload.run_after() self.checkCluster() utils.print_with_time("Workload complete")
def test_workload(self): connServer = self.cluster[0] utils.print_with_time("Inserting data") utils.populateTable(self.conn, self.table, records=10000, fieldName='val') utils.print_with_time("Starting workload") with workload_runner.SplitOrContinuousWorkload(opts, connServer, db_name=self.dbName, table_name=self.tableName) as workload: utils.print_with_time("Running workload before") workload.run_before() self.checkCluster() for currentShards in opts["sequence"]: utils.print_with_time("Sharding table to %d shards" % currentShards) self.table.reconfigure(shards=currentShards, replicas=opts["num-nodes"]).run(self.conn) self.table.wait(wait_for='all_replicas_ready').run(self.conn) self.checkCluster() utils.print_with_time("Running workload after") workload.run_after() self.checkCluster() utils.print_with_time("Workload complete")
def populateTable(self, conn=None, table=None, records=None, fieldName=None): if conn is None: conn = self.conn if table is None: table = self.table if records is None: records = self.recordsToGenerate utils.populateTable(conn=conn, table=table, records=records, fieldName=fieldName)
def make_table(name, shards, conn): """Create a table named "name" with the given shard configuration, and populateit with some data.""" utils.print_with_time("Preparing table '%s'" % name) res = r.db("rethinkdb").table("table_config").insert({"name":name, "db":dbName, "shards":shards}).run(conn) assert res.get("inserted") == 1, res res = r.table(name).wait(wait_for="all_replicas_ready").run(conn) assert res.get("ready") == 1, res res = utils.populateTable(conn=conn, table=name, db=dbName, records=docs_per_table, fieldName='number') assert res.get("inserted") == docs_per_table
server = cluster[0] conn = r.connect(host=server.host, port=server.driver_port) utils.print_with_time("Creating db/table %s/%s" % (dbName, tableName)) if dbName not in r.db_list().run(conn): r.db_create(dbName).run(conn) if tableName in r.db(dbName).table_list().run(conn): r.db(dbName).table_drop(tableName).run(conn) r.db(dbName).table_create(tableName).run(conn) tbl = r.db(dbName).table(tableName) utils.print_with_time("Inserting some data") utils.populateTable(conn, tbl, records=10000) cluster.check() utils.print_with_time("Splitting into two shards") tbl.reconfigure(shards=2, replicas=2).run(conn) r.db(dbName).wait(wait_for="all_replicas_ready").run(conn) cluster.check() utils.print_with_time("Merging shards together again") tbl.reconfigure(shards=1, replicas=1).run(conn) r.db(dbName).wait(wait_for="all_replicas_ready").run(conn) cluster.check() utils.print_with_time("Cleaning up") utils.print_with_time("Done.")
utils.print_with_time("Creating a table") if dbName not in r.db_list().run(conn): r.db_create(dbName).run(conn) if tableName in r.db(dbName).table_list().run(conn): r.db(dbName).table_drop(tableName).run(conn) r.db(dbName).table_create(tableName).run(conn) res = r.db(dbName).table(tableName).config() \ .update({"shards": [{"primary_replica": "s1", "replicas": ["s1"]}]}).run(conn) assert res["errors"] == 0 r.db(dbName).table(tableName).wait(wait_for="all_replicas_ready").run(conn) # Insert some data so distribution queries can work utils.print_with_time("Adding data") utils.populateTable(conn, r.db(dbName).table(tableName), fieldName='x') utils.print_with_time("Test reconfigure dry_run") # Generate many configurations using `dry_run=True` and check to make sure they # satisfy the constraints def test_reconfigure(num_shards, num_replicas, primary_replica_tag, nonvoting_replica_tags): utils.print_with_time( "Making configuration num_shards=%d num_replicas=%r primary_replica_tag=%r " "nonvoting_replica_tags=%r" % (num_shards, num_replicas, primary_replica_tag, nonvoting_replica_tags)) res = r.db(dbName).table(tableName).reconfigure( shards=num_shards,
def setUp(self): # -- start the servers # - check on an existing cluster if self.cluster is not None: try: self.checkCluster() except: try: self.cluster.check_and_stop() except Exception: pass self.__class__.cluster = None self.__class__._conn = None self.__class__.table = None # - ensure we have a cluster if self.cluster is None: self.__class__.cluster = driver.Cluster() # - make sure we have any named servers if hasattr(self.servers, '__iter__'): for name in self.servers: firstServer = len(self.cluster) == 0 if not name in self.cluster: driver.Process(cluster=self.cluster, name=name, console_output=True, command_prefix=self.server_command_prefix, extra_options=self.server_extra_options, wait_until_ready=firstServer) # - ensure we have the proper number of servers # note: we start up enough servers to make sure they each have only one role serverCount = max( self.shards * self.replicas, len(self.servers) if hasattr(self.servers, '__iter__') else self.servers) for _ in range(serverCount - len(self.cluster)): firstServer = len(self.cluster) == 0 driver.Process(cluster=self.cluster, wait_until_ready=firstServer, command_prefix=self.server_command_prefix, extra_options=self.server_extra_options) self.cluster.wait_until_ready() # -- ensure db is available if self.dbName is not None and self.dbName not in self.r.db_list().run( self.conn): self.r.db_create(self.dbName).run(self.conn) # -- setup test table if self.tableName is not None: # - ensure we have a clean table if self.tableName in self.r.db(self.dbName).table_list().run( self.conn): self.r.db(self.dbName).table_drop(self.tableName).run( self.conn) self.r.db(self.dbName).table_create(self.tableName).run(self.conn) self.__class__.table = self.r.db(self.dbName).table(self.tableName) # - add initial records if self.recordsToGenerate: utils.populateTable(conn=self.conn, table=self.table, records=self.recordsToGenerate, fieldName=self.fieldName) # - shard and replicate the table primaries = iter(self.cluster[:self.shards]) replicas = iter(self.cluster[self.shards:]) shardPlan = [] for primary in primaries: chosenReplicas = [ replicas.next().name for _ in range(0, self.replicas - 1) ] shardPlan.append({ 'primary_replica': primary.name, 'replicas': [primary.name] + chosenReplicas }) assert (self.r.db(self.dbName).table( self.tableName).config().update({ 'shards': shardPlan }).run(self.conn))['errors'] == 0 self.r.db(self.dbName).table(self.tableName).wait().run(self.conn)
def test_backfill(self): valueDropsLimit = 10 recordsToGenerate = 10000 fieldName = 'x' # - generate some records utils.populateTable(conn=self.conn, table=self.table, records=recordsToGenerate, fieldName=fieldName) # - trigger a backfill with a reconfigure reconfigureResponse = self.table.reconfigure(shards=1, replicas=2).run(self.conn) primaryReplica = reconfigureResponse["config_changes"][0]["new_val"][ "shards"][0]["primary_replica"] seondaryReplicas = set( reconfigureResponse["config_changes"][0]["new_val"]["shards"][0] ["replicas"]) - set(primaryReplica) # - watch for the entry in rethinkdb.jobs query = self.r.db("rethinkdb").table("jobs").filter({ "type": "backfill" }).coerce_to("ARRAY") latestValue = None valueIncreases = 0 deadline = time.time() + self.timeout valueDropMessages = [] while time.time() < deadline: response = query.run(self.conn) self.assertTrue( len(response) in (0, 1), 'Expected only one backfill job, got: %r' % response) if len(response) == 1: # - check content try: self.assertTrue('id' in response[0], response[0]) self.assertEqual(response[0]["id"][0], "backfill") self.assertTrue('servers' in response[0], response[0]) self.assertEqual(len(response[0]["servers"]), self.servers) self.assertTrue( 'info' in response[0] and isinstance(response[0]['info'], dict), response[0]) self.assertTrue( 'db' in response[0]['info'] and response[0]['info']['db'] == self.dbName, response[0]) self.assertTrue( 'table' in response[0]['info'] and response[0]['info']['table'] == self.tableName, response[0]) self.assertTrue( 'progress' in response[0]['info'] and response[0]['info']['progress'] >= 0, response[0]) self.assertTrue('source_server' in response[0]["info"] and response[0]["info"]['source_server'] == primaryReplica) self.assertTrue( 'destination_server' in response[0]["info"] and response[0]["info"]['destination_server'] in seondaryReplicas) self.assertTrue( 'duration_sec' in response[0] and 0 < response[0]["duration_sec"] <= self.timeout, response[0]) except Exception: pass # - ensure we are making progress if latestValue is None or response[0]["info"][ "progress"] > latestValue: latestValue = response[0]["info"]["progress"] valueIncreases += 1 elif response[0]["info"]["progress"] < latestValue: valueDropsLimit -= 1 valueDropMessages.append( 'from %f to %f' % (latestValue, response[0]["info"]["progress"])) latestValue = response[0]["info"]["progress"] if valueDropsLimit < 1: self.fail('progress value dropped too many times: %s' % (', '.join(valueDropMessages))) elif latestValue is not None: # we have seen an index_construction, and now it is gone if valueIncreases < 2: self.fail( 'Did not see at least 2 value increases before backfill entry dissapeared' ) break time.sleep(.01) else: if latestValue is None: self.fail( 'Timed out after %.1f seconds waiting for backfill appear' % self.timeout) else: self.fail( 'Timed out after %.1f seconds waiting for backfill to be complete' % self.timeout)
def test_index_construction(self): valueDropsLimit = 10 recordsToGenerate = 500 fieldName = 'x' filterObject = { "type": "index_construction", "info": { "db": self.dbName, "table": self.tableName, "index": fieldName } } query = self.r.db("rethinkdb").table("jobs").filter( filterObject).coerce_to("ARRAY") # - generate some records utils.populateTable(conn=self.conn, table=self.table, records=recordsToGenerate, fieldName=fieldName) # - create an index self.table.index_create(fieldName).run(self.conn) # - watch the index_construction entry progress and then go away latestValue = None valueIncreases = 0 deadline = time.time() + self.timeout valueDropMessages = [] while time.time() < deadline: response = query.run(self.conn) self.assertTrue( len(response) in (0, 1), 'Expected only one filtered job, got: %r' % response) if len(response) == 1: # - check content try: self.assertTrue('id' in response[0], response[0]) self.assertEqual(response[0]["id"][0], "index_construction") self.assertTrue('servers' in response[0], response[0]) self.assertEqual(len(response[0]["servers"]), 1) self.assertTrue( 'info' in response[0] and isinstance(response[0]['info'], dict), response[0]) self.assertTrue( 'db' in response[0]['info'] and response[0]['info']['db'] == self.dbName, response[0]) self.assertTrue( 'table' in response[0]['info'] and response[0]['info']['table'] == self.tableName, response[0]) self.assertTrue( 'index' in response[0]['info'] and response[0]['info']['index'] == fieldName, response[0]) self.assertTrue( 'progress' in response[0]['info'] and response[0]['info']['progress'] >= 0, response[0]) self.assertTrue( 'duration_sec' in response[0] and 0 < response[0]["duration_sec"] <= self.timeout, response[0]) except Exception: pass # - ensure we are making progress if latestValue is None or response[0]["info"][ "progress"] > latestValue: latestValue = response[0]["info"]["progress"] valueIncreases += 1 elif response[0]["info"]["progress"] < latestValue: valueDropsLimit -= 1 valueDropMessages.append( 'from %f to %f' % (latestValue, response[0]["info"]["progress"])) latestValue = response[0]["info"]["progress"] if valueDropsLimit < 1: self.fail('progress value dropped too many times: %s' % (', '.join(valueDropMessages))) elif latestValue is not None: # we have seen an index_construction, and now it is gone if valueIncreases < 2: self.fail( 'Did not see at least 2 value increases before index_construction entry dissapeared' ) break time.sleep(.01) else: if latestValue is None: self.fail( 'Timed out after %.1f seconds waiting for index_construction appear' % self.timeout) else: self.fail( 'Timed out after %.1f seconds waiting for index_construction to be complete' % self.timeout)