def test_workload(self): connServer = self.cluster[0] utils.print_with_time("Inserting data") utils.populateTable(self.conn, self.table, records=10000) utils.print_with_time("Starting workload") with workload_runner.SplitOrContinuousWorkload( opts, connServer, db_name=self.dbName, table_name=self.tableName) as workload: utils.print_with_time("Running workload before") workload.run_before() self.checkCluster() for currentShards in opts["sequence"]: utils.print_with_time("Sharding table to %d shards" % currentShards) self.table.reconfigure(shards=currentShards, replicas=opts["num-nodes"]).run( self.conn) self.table.wait(wait_for='all_replicas_ready').run(self.conn) self.checkCluster() utils.print_with_time("Running workload after") workload.run_after() self.checkCluster() utils.print_with_time("Workload complete")
def test_workload(self): alpha = self.getPrimaryForShard(0) beta = self.getReplicaForShard(0) workload_ports = workload_runner.RDBPorts(host=alpha.host, http_port=alpha.http_port, rdb_port=alpha.driver_port, db_name=self.dbName, table_name=self.tableName) with workload_runner.SplitOrContinuousWorkload( opts, workload_ports) as workload: utils.print_with_time('Workloads:\n%s' % pprint.pformat(workload.opts)) utils.print_with_time("Running before workload") workload.run_before() utils.print_with_time("Before workload complete") self.checkCluster() workload.check() utils.print_with_time("Demoting primary") shardConfig = self.table.config()['shards'].run(self.conn) shardConfig[0]['primary_replica'] = beta.name self.table.config().update({'shards': shardConfig}).run(self.conn) self.table.wait(wait_for='all_replicas_ready').run(self.conn) self.checkCluster() utils.print_with_time("Running after workload") workload.run_after() self.checkCluster() utils.print_with_time("After workload complete")
def test_kill_secondary(self): primary = self.getPrimaryForShard(0) secondary = self.getReplicaForShard(0) conn = self.r.connect(host=primary.host, port=primary.driver_port) issues = list(self.r.db('rethinkdb').table('current_issues').run(conn)) self.assertEqual(issues, [], 'The issues list was not empty: %r' % issues) workload_ports = workload_runner.RDBPorts(host=primary.host, http_port=primary.http_port, rdb_port=primary.driver_port, db_name=self.dbName, table_name=self.tableName) with workload_runner.SplitOrContinuousWorkload(opts, workload_ports) as workload: print_with_time("Starting workload") workload.run_before() self.cluster.check() issues = list(self.r.db('rethinkdb').table('current_issues').run(conn)) self.assertEqual(issues, [], 'The issues list was not empty: %r' % issues) print_with_time("Killing the secondary") secondary.kill() print_with_time("Checking that the table_availability issue shows up") deadline = time.time() + 5 last_error = None while time.time() < deadline: try: issues = list(self.r.db('rethinkdb').table('current_issues').filter({'type':'table_availability', 'info':{'db':self.dbName, 'table':self.tableName}}).run(conn)) self.assertEqual(len(issues), 1, 'The server did not record the single issue for the killed secondary server:\n%s' % pformat(issues)) issue = issues[0] self.assertEqual(issue['critical'], False) self.assertEqual(issue['info']['status']['ready_for_reads'], True) self.assertEqual(issue['info']['status']['ready_for_writes'], True) break except Exception as e: last_error = e time.sleep(.2) else: raise last_error print_with_time("Running after workload") workload.run_after() print_with_time("Done")
secondary_dc = http.add_datacenter() http.move_server_to_datacenter(secondary.files.machine_name, secondary_dc) ns = scenario_common.prepare_table_for_workload(opts, http, primary=primary_dc, affinities={ primary_dc: 0, secondary_dc: 1 }) http.set_namespace_ack_expectations(ns, {secondary_dc: 1}) http.wait_until_blueprint_satisfied(ns) cluster.check() http.check_no_issues() workload_ports = scenario_common.get_workload_ports(opts, ns, [secondary]) with workload_runner.SplitOrContinuousWorkload(opts, opts["protocol"], workload_ports) as workload: workload.run_before() cluster.check() http.check_no_issues() print "Killing the primary..." primary.close() http.declare_machine_dead(primary.files.machine_name) http.move_namespace_to_datacenter(ns, secondary_dc) http.set_namespace_affinities(ns, {secondary_dc: 0}) http.wait_until_blueprint_satisfied(ns) cluster.check() http.check_no_issues() workload.run_after() http.check_no_issues() cluster.check_and_stop()
for process in processes: process.wait_until_started_up() print "Creating table..." http = http_admin.ClusterAccess([("localhost", p.http_port) for p in processes]) primary_dc = http.add_datacenter() secondary_dc = http.add_datacenter() machines = http.machines.keys() http.move_server_to_datacenter(machines[0], primary_dc) http.move_server_to_datacenter(machines[1], secondary_dc) ns = scenario_common.prepare_table_for_workload(http, primary = primary_dc) http.wait_until_blueprint_satisfied(ns) cluster.check() http.check_no_issues() workload_ports = scenario_common.get_workload_ports(ns, processes) with workload_runner.SplitOrContinuousWorkload(opts, workload_ports) as workload: workload.run_before() cluster.check() http.check_no_issues() http.move_table_to_datacenter(ns, secondary_dc) http.wait_until_blueprint_satisfied(ns) rdb_workload_common.wait_for_table(host=workload_ports.host, port=workload_ports.rdb_port, table=workload_ports.table_name) cluster.check() http.check_no_issues() workload.run_after() cluster.check_and_stop()
def test_failover(self): '''Run a workload while killing a server to cause a failover to a secondary''' # - setup primary = self.getPrimaryForShard(0) stable = self.getReplicaForShard(0) stableConn = self.r.connect(host=stable.host, port=stable.driver_port) workload_ports = workload_runner.RDBPorts(host=stable.host, http_port=stable.http_port, rdb_port=stable.driver_port, db_name=dbName, table_name=tableName) # - run test with workload_runner.SplitOrContinuousWorkload( opts, workload_ports) as workload: print_with_time("Starting workload before") workload.run_before() self.cluster.check() issues = list( self.r.db('rethinkdb').table('current_issues').run(stableConn)) self.assertEqual( issues, [], 'The server recorded the following issues after the run_before:\n%s' % pformat(issues)) print_with_time("Shutting down the primary") primary.close() print_with_time( "Checking that the table_availability issue shows up") deadline = time.time() + 5 last_error = None while time.time() < deadline: try: issues = list( self.r.db('rethinkdb').table('current_issues').filter({ 'type': 'table_availability', 'info': { 'db': dbName, 'table': tableName } }).run(stableConn)) self.assertEqual( len(issues), 1, 'The server did not record the single issue for the killed server:\n%s' % pformat(issues)) break except Exception as e: last_error = e time.sleep(.2) else: raise last_error print_with_time("Waiting for the table to become available again") timeout = 30 try: self.table.wait(wait_for='ready_for_writes', timeout=timeout).run(stableConn) except self.r.ReqlRuntimeError as e: raise AssertionError( 'Table did not become available after %d seconds.' % timeout) print_with_time("Running workload after") workload.run_after() print_with_time("Cleaning up")