def readAndCompare(keys, table, conn): '''fetch all of the records for keys and comapre their values''' if not keys: return # - check that all records look correct deadline = time.time() + errorToleranceSecs lastError = None while time.time() < deadline: try: startKey = keys[0] endKey = keys[-1] keyCount = len(keys) for row in table.get_all(*keys).run(conn): assert row[ 'id'] in keys, 'Unexpected id in fetched result: %r' % row assert 'val' in row and row['id'] == row['id'] == row[ 'val'], 'Val did not match id: %r' % row keys.remove(row['id']) assert keys == [], 'Database did not have all expected values, missing at least: %r' % keys utils.print_with_time( '\tVerified keys %r to %r... %d records' % (startKey, endKey, keyCount)) break except conn._r.ReqlError as e: if opts["tolerate_errors"]: lastError = e time.sleep(.1) else: raise else: if lastError: raise lastError
def readAndCompare(keys, table, conn): '''fetch all of the records for keys and comapre their values''' if not keys: return # - check that all records look correct deadline = time.time() + errorToleranceSecs lastError = None while time.time() < deadline: try: startKey = keys[0] endKey = keys[-1] keyCount = len(keys) for row in table.get_all(*keys).run(conn): assert row['id'] in keys, 'Unexpected id in fetched result: %r' % row assert 'val' in row and row['id'] == row['id'] == row['val'], 'Val did not match id: %r' % row keys.remove(row['id']) assert keys == [], 'Database did not have all expected values, missing at least: %r' % keys utils.print_with_time('\tVerified keys %r to %r... %d records' % (startKey, endKey, keyCount)) break except conn._r.ReqlError as e: if opts["tolerate_errors"]: lastError = e time.sleep(.1) else: raise else: if lastError: raise lastError
def set_cache_size(size_mb): utils.print_with_time("Setting cache size to %.2f MB." % size_mb) res = r.db("rethinkdb").table("server_config").update({ "cache_size_mb": size_mb }).run(conn) assert res["errors"] == 0, res
def repair(name, repair_type, expect): """Runs an emergency repair of the given type on the given table, and expects the computed shard config to be "expect".""" utils.print_with_time("Repairing table '%s' with mode '%s'" % (name, repair_type)) # First we run in `dry_run=True` mode, to check that the `dry_run` flag works res = r.table(name).reconfigure(emergency_repair=repair_type, dry_run=True).run(conn) utils.print_with_time("New config, as expected:", res["config_changes"][0]["new_val"]["shards"]) assert res.get("repaired") == 0, res # Make sure that the correct config was calculated, but that it wasn't actually applied assert eq_config(res["config_changes"][0]["new_val"]["shards"], expect), "res=%s, expect=%s" % (res, expect) old_config = r.table(name).config().run(conn) assert eq_config(res["config_changes"][0]["old_val"]["shards"], old_config["shards"]), "res=%s, old_config=%s" % (res, old_config) # Now we run again for real res = r.table(name).reconfigure(emergency_repair=repair_type).run(conn) assert res.get("repaired") == 1, res # Make sure that the correct config was calculated and also applied assert eq_config(res["config_changes"][0]["new_val"]["shards"], expect), "res=%s, expect=%s" % (res, expect) deadline = time.time() + 5 lastError = None while time.time() < deadline: try: new_config = r.table(name).config().run(conn) assert eq_config(new_config["shards"], expect), "new_config=%s, expect=%s" % (new_config, expect) break except Exception as e: lastError = e time.sleep(0.05) else: raise (lastError or AssertionError('Should not get here without an error'))
def search_test_map(collection, search_engine): total_average_precision = 0 doc_ids = { doc_text: doc_id for doc_id, doc_text in collection.documents.items() } queries = collection.queries.items() if type(search_engine) is not BilingualEmbeddingSearchEngine \ else collection.queries_translated.items() empty = 0 for i, query in queries: if i not in collection.relevance: empty += 1 continue expected = collection.relevance[i] precision = query_result(search_engine, i, query, expected, doc_ids, 10, verbose=False, metric=average_precision) print_with_time("{} {}".format(i, precision)) total_average_precision += precision return total_average_precision / (len(queries) - empty)
def extract_stock(code): fields = { 'code' : 'stockcode', 'name' : 'stockname', 'fieldcode' : 'fieldcode', 'fieldname' : 'fieldname', 'fieldjp' : 'fieldjp', 'syl' : 'syl', 'xj' : 'xj', } cnx = mysqllib.get_connection() cursor = cnx.cursor() url = "http://stockpage.10jqka.com.cn/spService/%s/Header/realHeader"%(code) jo = utils.fetch_json(url) if jo is not None: try: keys = fields.keys() vals = ["'"+ (jo[fields[k]] or '')+"'" for k in keys] updates = [keys[i]+"="+vals[i] for i in range(0, len(keys))] except: utils.print_with_time("url=%s"%(url)) traceback.print_exc() return sql = "INSERT INTO stock (%s) VALUES (%s) ON DUPLICATE KEY UPDATE %s"%(', '.join(keys), ', '.join(vals), ', '.join(updates)) # print sql cursor.execute(sql) cnx.commit() cursor.close() cnx.close()
def main(): cnx = mysqllib.get_connection() cursor = cnx.cursor() url = "http://www.jisilu.cn/jisiludata/newstock.php?qtype=apply" jo = utils.fetch_json(url) for row in jo['rows']: cell = row['cell'] name = row['id'] sid = cell['stock_cd'] apply_dt = transform_date(cell['apply_dt']) utils.print_with_time("%s %s %s" % (sid, name, apply_dt)) try: keys = ['code', 'name', 'date'] keys = ["`" + f + "`" for f in keys] vals = [sid, name, apply_dt] vals = ["'" + f + "'" for f in vals] updates = [keys[i] + "=" + vals[i] for i in range(0, len(keys))] except: traceback.print_exc() return sql = "INSERT INTO new_stock (%s) VALUES (%s) ON DUPLICATE KEY UPDATE %s" % ( ', '.join(keys), ', '.join(vals), ', '.join(updates)) # print sql cursor.execute(sql) cnx.commit() cursor.close() cnx.close()
def extract_stock2(code): fields = { 'sjl' : '592920', 'zsz' : '3541450', 'ltsz' : '3475914', } cnx = mysqllib.get_connection() cursor = cnx.cursor() url = "http://d.10jqka.com.cn/v2/realhead/hs_%s/last.js"%(code) data = utils.fetch_url(url) data = re.sub(r'quotebridge.*?\((.*)\)', r'\1', data) jo = json.loads(data)['items'] if jo is not None: try: jo['3541450'] = "%.2f"%(float(jo['3541450']) / 100000000) jo['3475914'] = "%.2f"%(float(jo['3475914']) / 100000000) keys = fields.keys() vals = ["'"+ (jo[fields[k]] or '')+"'" for k in keys] keys.append('code') vals.append("'%s'"%(code)) updates = [keys[i]+"="+vals[i] for i in range(0, len(keys))] except: utils.print_with_time("url=%s"%(url)) traceback.print_exc() return sql = "INSERT INTO stock (%s) VALUES (%s) ON DUPLICATE KEY UPDATE %s"%(', '.join(keys), ', '.join(vals), ', '.join(updates)) # print sql cursor.execute(sql) cnx.commit() cursor.close() cnx.close()
def main(): parser = argparse.ArgumentParser(description='stock extractor') parser.add_argument('-a', '--all', required=False, action='store_true', help='refresh all info') parser.add_argument('-s', '--stock_codes', type=str, nargs='*', help='stock codees') parser.add_argument('-m', '--mystock', required=False, type=int, default=0, help='mystock type') res = parser.parse_args() if res.stock_codes is not None: for i in range(len(res.stock_codes)): code = res.stock_codes[i] if res.all: extract_code(code) else: extract_stock(code) utils.print_with_time("extract_stock %s"%(code)) time.sleep(settings.sleepTime) elif res.mystock is not None: cnx = mysqllib.get_connection() cursor = cnx.cursor() sql = "SELECT code from mystock" if res.mystock <> 0: sql = sql + " where `type`=%d"%(res.mystock) cursor.execute(sql) for (code,) in cursor: if res.all: extract_code(code) else: extract_stock(code) utils.print_with_time("extract_stock %s"%(code)) time.sleep(settings.sleepTime) else: parser.print_help()
def _spin_continuous_workloads(self, seconds): assert self.opts["workload-during"] if seconds != 0: utils.print_with_time("Letting %s run for %d seconds..." % (" and ".join(repr(x) for x in self.opts["workload-during"]), seconds)) for i in xrange(seconds): time.sleep(1) self.check()
def check_error_stats(query, reads, writes): try: query.run() utils.print_with_time("Failed to error in query (%s)" % r.errors.QueryPrinter(query).print_query()) except r.ReqlError as e: pass check_stats_internal(query, reads, writes)
def transition_cluster(servers, state): assert all((x in servers and x in state) for x in ['primary', 'replicas']) assert len(servers['replicas']) == len(state['replicas']) assert len(servers['nvrs']) == len(state['nvrs']) assert all(x in ['up', 'down'] for x in [state['primary']] + state['replicas'] + (state['nvrs'] or [])) message_parts = [] if state['primary'] == 'up': message_parts.append('primary') if 'up' in state['replicas']: count = state['replicas'].count('up') message_parts.append('%d replica' % count + ('s' if count > 1 else '')) if 'up' in state['nvrs']: count = state['nvrs'].count('up') message_parts.append('%d nonvoting replica' % count + ('s' if count > 1 else '')) if len(message_parts) == 0: message_parts.append('no replicas') utils.print_with_time("Transitioning to %s up" % ' and '.join(message_parts)) def up_down_server(server, new_state): if new_state == 'up' and not server.running: server.start() elif new_state == 'down' and server.running: server.stop() up_down_server(servers['primary'], state['primary']) for server, server_state in zip(servers['replicas'], state['replicas']): up_down_server(server, server_state) for server, server_state in zip(servers['nvrs'], state['nvrs']): up_down_server(server, server_state)
def exploit_and_explore(connect_str_or_path, population_id): intervals_trained_col = get_col_from_populations( connect_str_or_path, USE_SQLITE, population_id, "intervals_trained") intervals_trained_col = np.array(intervals_trained_col) if not np.all( intervals_trained_col == intervals_trained_col[0]): msg = """The exploiter seems to be exploiting before all the models have finished training. Check for bad race conditions with respect to the database.""" raise Exception(msg) # Sorted by scores, desc task_ids, scores = get_task_ids_and_scores(connect_str_or_path, USE_SQLITE, population_id) print_with_time("Exploiting interval %s. Best score: %.4f" % (intervals_trained_col[0] - 1, max(scores))) seed_for_shuffling = np.random.randint(10 ** 5) fraction = 0.20 cutoff = int(np.ceil(fraction * len(task_ids))) top_ids = task_ids[:cutoff] bottom_ids = task_ids[len(task_ids) - cutoff:] nonbottom_ids = task_ids[:len(task_ids) - cutoff] for bottom_id in bottom_ids: top_id = np.random.choice(top_ids) model = dnn_model() optimizer = get_optimizer() top_trainer = Trainer(model=model, optimizer=optimizer) top_checkpoint_path = (checkpoint_str % (population_id, top_id)) top_trainer.load_checkpoint(top_checkpoint_path) model = dnn_model() optimizer = get_optimizer() bot_trainer = Trainer(model=model, optimizer=optimizer) bot_checkpoint_path = (checkpoint_str % (population_id, bottom_id)) # TODO BUG bot_trainer.load_checkpoint(bot_checkpoint_path) bot_trainer.exploit_and_explore(top_trainer, HYPERPARAM_NAMES) bot_trainer.save_checkpoint(bot_checkpoint_path) key_value_pairs = dict( ready_for_exploitation=ready_for_exploitation_False, score=None, seed_for_shuffling=seed_for_shuffling) update_task(connect_str_or_path, USE_SQLITE, population_id, bottom_id, key_value_pairs) for nonbottom_id in nonbottom_ids: key_value_pairs = dict( ready_for_exploitation=ready_for_exploitation_False, seed_for_shuffling=seed_for_shuffling) update_task(connect_str_or_path, USE_SQLITE, population_id, nonbottom_id, key_value_pairs) del trainer.model del trainer tf.keras.backend.clear_session()
def revive_random_servers(cluster): dead_procs = [p for p in cluster if not p.running] chosen_procs = random.sample(dead_procs, random.randint(1, len(dead_procs))) remaining = len(cluster) - len(dead_procs) + len(chosen_procs) + 1 utils.print_with_time("\nReviving %d servers, %d remain" % (len(chosen_procs), remaining)) [p.start(wait_until_ready=False) for p in chosen_procs]
def check_table_half(name, wait_for="ready_for_writes"): """Checks that approximately half of the data in the table has been lost.""" utils.print_with_time( "Checking contents of table '%s' (expect half erased)" % name) wait_for_table(name, wait_for) count = r.table(name).count().run(conn) assert 0.25 * docs_per_table < count < 0.75 * docs_per_table, "Found %d rows, expected about %d" % ( count, 0.50 * docs_per_table)
def kill_random_servers(cluster): alive_procs = [p for p in cluster if p.running] chosen_procs = random.sample(alive_procs, random.randint(1, len(alive_procs))) remaining = len(alive_procs) - len(chosen_procs) + 1 utils.print_with_time("\nKilling %d servers, %d remain" % (len(chosen_procs), remaining)) [p.kill() for p in chosen_procs]
def wait_for_table(name, wait_for="ready_for_writes"): """Blocks until the given table is ready for writes.""" try: res = r.table(name).wait(wait_for=wait_for, timeout=30).run(conn) assert res["ready"] == 1 except r.ReqlRuntimeError, e: utils.print_with_time(pprint.pformat(r.table(name).status().run(conn))) raise
def bad_repair(name, repair_type, msg): """Runs an emergency repair on the given table, and expects it to fail. `msg` should be a substring of the error message.""" utils.print_with_time("Repairing table '%s' with mode '%s' (this should fail)" % (name, repair_type)) try: r.table(name).reconfigure(emergency_repair=repair_type).run(conn) except r.ReqlRuntimeError, e: utils.print_with_time("As expected, it failed: %s", str(e).split(" in:")[0]) assert msg in str(e), e
def make_table(name, shards, conn): """Create a table named "name" with the given shard configuration, and populateit with some data.""" utils.print_with_time("Preparing table '%s'" % name) res = r.db("rethinkdb").table("table_config").insert({"name":name, "db":dbName, "shards":shards}).run(conn) assert res.get("inserted") == 1, res res = r.table(name).wait(wait_for="all_replicas_ready").run(conn) assert res.get("ready") == 1, res res = utils.populateTable(conn=conn, table=name, db=dbName, records=docs_per_table, fieldName='number') assert res.get("inserted") == docs_per_table
def create_tables(conn): assert len(tables) == len(table_counts) if not dbName in r.db_list().run(conn): r.db_create(dbName).run(conn) utils.print_with_time("Creating %d tables" % len(tables)) for i in xrange(len(tables)): r.db(db).table_create(tables[i]).run(conn) populate_table(conn, tables[i], table_counts[i])
def tran(x_train, y_train, x_test, y_test, epochs, batch_size, task_id, population_id, ready_for_exploitation_False, ready_for_exploitation_True, active_False, active_True, connect_str_or_path, intervals_trained, seed_for_shuffling): # Train print(os.getpid()) optimizer = get_optimizer() model = dnn_model() trainer = Trainer( model=model, optimizer=optimizer, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, epochs=epochs, batch_size=batch_size, task_id=task_id) checkpoint_path = (checkpoint_str % (population_id, task_id)) if os.path.isfile(checkpoint_path): trainer.load_checkpoint(checkpoint_path) interval_is_odd = intervals_trained % 2 == 1 score = None try: try: trainer.train(interval_is_odd, seed_for_shuffling) time.sleep(1) except LossIsNaN: print_with_time("Setting score to -1.") score = -1 if score != -1: score = float(trainer.eval(intervals_trained)) trainer.save_checkpoint(checkpoint_path) key_value_pairs = dict( intervals_trained=intervals_trained + 1, ready_for_exploitation=ready_for_exploitation_True, active=active_False, score=score) update_task(connect_str_or_path, USE_SQLITE, population_id, task_id, key_value_pairs) sess.close() del trainer.model del trainer tf.keras.backend.clear_session() except KeyboardInterrupt: # Don't save work. key_value_pairs = dict(active=active_False) update_task(connect_str_or_path, USE_SQLITE, population_id, task_id, key_value_pairs) sess.close() del trainer.model del trainer tf.keras.backend.clear_session()
def main(): for page in range(1, 57): url = "http://q.10jqka.com.cn/interface/stock/fl/zdf/desc/%d/hsa/quote" % ( page) utils.print_with_time(url) jo = utils.fetch_json(url) time.sleep(settings.sleepTime) for st in jo['data']: # print st['stockcode'] extractor.extract_code(st['stockcode'])
def get_saved_announcements(self): try: with open(self.announcements_filepath, "r", encoding="utf-8") as f: return json.load(f) except FileNotFoundError: print_with_time("No previously saved announcements are found.") return {} except: print_with_time( "File that stores previously saved announcements is corrupt.") return {}
def replace_rows(host, port, ready_event, start_event): conn = r.connect(host, port, db=dbName) ready_event.set() start_event.wait() try: for i in xrange(num_rows // 2): r.db(dbName).table(tableName).get(i).update({'data': r.random(1, 100)}, non_atomic=True).run(conn) except Exception as ex: if ex.message != "Connection is closed." and ex.message != "Connection is broken.": utils.print_with_time("ERROR: Expected to be interrupted by the connection closing, actual error: %s" % ex.message) return utils.print_with_time("ERROR: Was not interrupted while replacing entries.")
def delete_rows(host, port, ready_event, start_event): conn = r.connect(host, port, db=dbName) ready_event.set() start_event.wait() try: for i in xrange(num_rows // 2, num_rows): r.db(dbName).table(tableName).get(i).delete().run(conn) except Exception as ex: if ex.message != "Connection is closed." and ex.message != "Connection is broken.": utils.print_with_time("ERROR: Expected to be interrupted by the connection closing, actual error: %s" % ex.message) return utils.print_with_time("ERROR: Was not interrupted interrupted while deleting entries.")
def check_stats_internal(query, reads, writes): stats = get_stats(cluster[0]) delta_reads = stats['reads'] - check_stats_internal.last_reads delta_writes = stats['writes'] - check_stats_internal.last_writes if delta_reads != reads: utils.print_with_time('Error in query %s: Expected %d reads but got %d' % (r.errors.QueryPrinter(query).print_query(), reads, delta_reads)) if delta_writes != writes: utils.print_with_time('Error in query %s: Expected %d writes but got %d' % (r.errors.QueryPrinter(query).print_query(), writes, delta_writes)) check_stats_internal.last_reads = stats['reads'] check_stats_internal.last_writes = stats['writes']
def start(self): assert not self.running utils.print_with_time("Starting workload %r..." % self.command_line) # Set up environment new_environ = os.environ.copy() self.ports.add_to_environ(new_environ) self.proc = subprocess.Popen(self.command_line, shell=True, env=new_environ, preexec_fn=os.setpgrp) self.running = True self.check()
def test_workload(self): workloadServer = self.cluster[0] # -- add a proxy node if called for if opts["use-proxy"]: utils.print_with_time('Using proxy') workloadServer = driver.ProxyProcess(self.cluster, console_output='proxy-output', command_prefix=command_prefix, extra_options=serve_options) self.cluster.wait_until_ready() # -- run workload workload_runner.run(opts["workload"], workloadServer, opts["timeout"], db_name=self.dbName, table_name=self.tableName) utils.print_with_time("Ended workload: %s" % opts["workload"])
def start(self): assert not self.running utils.print_with_time("Starting workload %r..." % self.command_line) # Set up environment new_environ = os.environ.copy() self.ports.add_to_environ(new_environ) self.proc = subprocess.Popen(self.command_line, shell=True, env=new_environ, preexec_fn=lambda: os.setpgid(0, 0)) self.running = True self.check()
def start(self): while True: success, self.current_announcements = self.get_current_announcements( ) # if the control was successful, success variable is true, if success is false if success: self.update_announcements_and_notify() print_with_time("Fetched announcements from the website.") else: print_with_time( "Unable to fetch announcements from the website.") time.sleep(300)
def test_workload(self): primary = self.getPrimaryForShard(0) idleServer = None for server in self.cluster: if server not in self.getReplicasForShard(0) + [primary]: idleServer = server break else: raise Exception( 'Could not find a server that was not serving this table') assert primary != idleServer with workload_runner.ContinuousWorkload( opts["workload"], primary, db_name=self.dbName, table_name=self.tableName) as workload: utils.print_with_time( "Starting workload and running for 10 seconds") workload.start() time.sleep(10) self.checkCluster utils.print_with_time("Killing the idle server") idleServer.kill() # this removes it from the cluster self.checkCluster utils.print_with_time("Running workload for 10 more seconds") time.sleep(10) self.checkCluster utils.print_with_time("Done with kill_access_point")
def test_workload(self): primary = self.getPrimaryForShard(0) idleServer = None for server in self.cluster: if server not in self.getReplicasForShard(0) + [primary]: idleServer = server break else: raise Exception('Could not find a server that was not serving this table') assert primary != idleServer with workload_runner.ContinuousWorkload(opts["workload"], primary, db_name=self.dbName, table_name=self.tableName) as workload: utils.print_with_time("Starting workload and running for 10 seconds") workload.start() time.sleep(10) self.checkCluster utils.print_with_time("Killing the idle server") idleServer.kill() # this removes it from the cluster self.checkCluster utils.print_with_time("Running workload for 10 more seconds") time.sleep(10) self.checkCluster utils.print_with_time("Done with kill_access_point")
def create_index(params, data_frame, content_index): try: es = get_es_instance() data_file = params.get('data_file') use_model = params.get('use_model') stop_words = params.get('stop_words') default_batch_size = params.get('default_batch_size') start_time = time.time() embed_func = hub.Module(use_model) end_time = time.time() print_with_time('Load the module: {}'.format(end_time - start_time)) start_time = time.time() sentences = tf.compat.v1.placeholder(dtype=tf.string, shape=[None]) embedding = embed_func(sentences) end_time = time.time() print_with_time('Init sentences embedding: {}'.format(end_time - start_time)) start_time = time.time() content_array = data_frame.to_numpy() end_time = time.time() print_with_time('Read Data Time: {}'.format(end_time - start_time)) start_time = time.time() add_to_es_index(es, embedding, default_batch_size, sentences, content_array, stop_words, content_index) end_time = time.time() print_with_time('Add to ES Index Time: {}'.format(end_time - start_time)) except Exception as e: raise
def test_kill_secondary(self): primary = self.getPrimaryForShard(0) secondary = self.getReplicaForShard(0) conn = self.r.connect(host=primary.host, port=primary.driver_port) issues = list(self.r.db('rethinkdb').table('current_issues').filter(self.r.row["type"] != "memory_error").run(self.conn)) self.assertEqual(issues, [], 'The issues list was not empty:\n%r' % utils.RePrint.pformat(issues)) workload_ports = workload_runner.RDBPorts(host=primary.host, http_port=primary.http_port, rdb_port=primary.driver_port, db_name=self.dbName, table_name=self.tableName) with workload_runner.SplitOrContinuousWorkload(opts, workload_ports) as workload: print_with_time("Starting workload") workload.run_before() self.cluster.check() issues = list(self.r.db('rethinkdb').table('current_issues').filter(self.r.row["type"] != "memory_error").run(self.conn)) self.assertEqual(issues, [], 'The issues list was not empty:\n%r' % utils.RePrint.pformat(issues)) print_with_time("Killing the secondary") secondary.kill() print_with_time("Checking that the table_availability issue shows up") deadline = time.time() + 5 last_error = None while time.time() < deadline: try: issues = list(self.r.db('rethinkdb').table('current_issues').filter({'type':'table_availability', 'info':{'db':self.dbName, 'table':self.tableName}}).run(conn)) self.assertEqual(len(issues), 1, 'The server did not record the single issue for the killed secondary server:\n%s' % pformat(issues)) issue = issues[0] self.assertEqual(issue['critical'], False) self.assertEqual(issue['info']['status']['ready_for_reads'], True) self.assertEqual(issue['info']['status']['ready_for_writes'], True) break except Exception as e: last_error = e time.sleep(.2) else: raise last_error print_with_time("Running after workload") workload.run_after() print_with_time("Done")
def stop(self): self.check() utils.print_with_time("Stopping %r..." % self.command_line) os.killpg(self.proc.pid, signal.SIGINT) shutdown_grace_period = 10 # seconds end_time = time.time() + shutdown_grace_period while time.time() < end_time: result = self.proc.poll() if result is None: time.sleep(1) elif result == 0 or result == -signal.SIGINT: utils.print_with_time("OK") self.running = False break else: self.running = False raise RuntimeError("workload '%s' failed when interrupted with error code %d" % (self.command_line, result)) else: raise RuntimeError("workload '%s' failed to terminate within %d seconds of SIGINT" % (self.command_line, shutdown_grace_period))
def test_workload(self): server = self.cluster[0] utils.print_with_time("Running first workload") workload_runner.run(opts["workload1"], server, opts["timeout"], db_name=self.dbName, table_name=self.tableName) utils.print_with_time("Restarting server") server.check_and_stop() server.start() self.cluster.check() self.r.db(self.dbName).wait(wait_for="all_replicas_ready").run(self.conn) utils.print_with_time("Running second workload") workload_runner.run(opts["workload2"], server, opts["timeout"], db_name=self.dbName, table_name=self.tableName)
def run(command_line, ports, timeout, db_name=None, table_name=None): if isinstance(ports, RDBPorts): if db_name is not None: ports.db_name = db_name if table_name is not None: ports.table_name = table_name else: # probably a driver.Process or subclass assert db_name is not None, 'When using a non-RDBPorts ports, db_name must be supplied' assert table_name is not None, 'When using a non-RDBPorts ports, table_name must be supplied' assert hasattr(ports, 'http_port'), 'When using a non-RDBPorts ports, the ports object must have a http_port attribute: %r' % ports assert hasattr(ports, 'driver_port'), 'When using a non-RDBPorts ports, the ports object must have a driver_port attribute: %r' % ports ports = RDBPorts(host=ports.host, http_port=ports.http_port, rdb_port=ports.driver_port, db_name=db_name, table_name=table_name) start_time = time.time() end_time = start_time + timeout utils.print_with_time("Running workload %r..." % command_line) # Set up environment new_environ = os.environ.copy() ports.add_to_environ(new_environ) proc = subprocess.Popen(command_line, shell=True, env=new_environ, preexec_fn=lambda: os.setpgid(0, 0)) try: while time.time() < end_time: result = proc.poll() if result is None: time.sleep(1) elif result == 0: utils.print_with_time("Done") return else: utils.print_with_time("Failed") sys.stderr.write("workload '%s' failed with error code %d\n" % (command_line, result)) exit(1) sys.stderr.write("\nWorkload timed out after %d seconds (%s)\n" % (time.time() - start_time, command_line)) finally: try: os.killpg(proc.pid, signal.SIGTERM) except OSError: pass exit(1)
def test_failover(self): '''Run a workload while killing a server to cause a failover to a secondary''' # - setup primary = self.getPrimaryForShard(0) stable = self.getReplicaForShard(0) stableConn = self.r.connect(host=stable.host, port=stable.driver_port) workload_ports = workload_runner.RDBPorts(host=stable.host, http_port=stable.http_port, rdb_port=stable.driver_port, db_name=dbName, table_name=tableName) # - run test with workload_runner.SplitOrContinuousWorkload(opts, workload_ports) as workload: print_with_time("Starting workload before") workload.run_before() self.cluster.check() issues = list(self.r.db('rethinkdb').table('current_issues').filter(self.r.row["type"] != "memory_error").run(stableConn)) self.assertEqual(issues, [], 'The server recorded the following issues after the run_before:\n%s' % pformat(issues)) print_with_time("Shutting down the primary") primary.close() print_with_time("Checking that the table_availability issue shows up") deadline = time.time() + 5 last_error = None while time.time() < deadline: try: issues = list(self.r.db('rethinkdb').table('current_issues').filter({'type':'table_availability', 'info':{'db':dbName, 'table':tableName}}).run(stableConn)) self.assertEqual(len(issues), 1, 'The server did not record the single issue for the killed server:\n%s' % pformat(issues)) break except Exception as e: last_error = e time.sleep(.2) else: raise last_error print_with_time("Waiting for the table to become available again") timeout = 30 try: self.table.wait(wait_for='ready_for_writes', timeout=timeout).run(stableConn) except self.r.ReqlRuntimeError as e: raise AssertionError('Table did not become available after %d seconds.' % timeout) print_with_time("Running workload after") workload.run_after() print_with_time("Cleaning up")
utils.print_with_time("ERROR: Was not interrupted while replacing entries.") def delete_rows(host, port, ready_event, start_event): conn = r.connect(host, port, db=dbName) ready_event.set() start_event.wait() try: for i in xrange(num_rows // 2, num_rows): r.db(dbName).table(tableName).get(i).delete().run(conn) except Exception as ex: if ex.message != "Connection is closed." and ex.message != "Connection is broken.": utils.print_with_time("ERROR: Expected to be interrupted by the connection closing, actual error: %s" % ex.message) return utils.print_with_time("ERROR: Was not interrupted interrupted while deleting entries.") utils.print_with_time("Spinning a cluster with one server") with driver.Process(name='.', command_prefix=command_prefix, extra_options=serve_options) as process: utils.print_with_time("Establishing ReQL connection") conn = r.connect(process.host, process.driver_port, db=dbName) utils.print_with_time("Starting replace/delete processes") # Get the replace/delete processes ready ahead of time - # Time is critical during the sindex post-construction ready_events = [multiprocessing.Event(), multiprocessing.Event()] start_event = multiprocessing.Event() replace_proc = multiprocessing.Process(target=replace_rows, args=(process.host, process.driver_port, ready_events[0], start_event))
def test_replicaChanges(self): print() # solve a formatting issue with unittest reporting tableUUID = self.table.info()['id'].run(self.conn) utils.print_with_time("Increasing replication factor") self.table.reconfigure(shards=1, replicas=2).run(self.conn) self.table.wait(wait_for='all_replicas_ready').run(self.conn) self.checkCluster() utils.print_with_time("Checking that both servers have a data file") deadline = time.time() + 5 lastError = None while time.time() < deadline: for server in self.cluster: dataPath = os.path.join(server.data_path, tableUUID) if not os.path.exists(dataPath): lastError = 'Server %s does not have a data file at %s' % (server.name, dataPath) break else: break else: raise Exception(lastError or 'Timed out in a weird way') master = self.getPrimaryForShard(0) slave = self.getReplicaForShard(0) utils.print_with_time("Decreasing replication factor") self.table.config().update({'shards':[{'primary_replica':master.name, 'replicas':[master.name]}]}).run(self.conn) self.table.wait(wait_for='all_replicas_ready').run(self.conn) self.checkCluster() utils.print_with_time("Waiting for file deletion on the slave") deadline = time.time() + 5 slaveDataPath = os.path.join(slave.data_path, tableUUID) while time.time() < deadline: if not os.path.exists(slaveDataPath): break else: raise Exception('File deletion had not happend after 5 seconds, file still exists at: %s' % slaveDataPath) utils.print_with_time("Increasing replication factor again") self.table.reconfigure(shards=1, replicas=2).run(self.conn, noreply=True) utils.print_with_time("Confirming that the progress meter indicates a backfill happening") deadline = time.time() + 5 last_error = None while time.time() < deadline: try: assert r.db("rethinkdb") \ .table("jobs") \ .filter({"type": "backfill", "info": {"table": self.tableName}}) \ .count() \ .run(self.conn) == 1, "No backfill job found in `rethinkdb.jobs`." break except Exception, e: last_error = e time.sleep(0.02)
'''Merging shards causes a server crash''' from __future__ import print_function import sys, os sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, 'common'))) import driver, utils numNodes = 2 r = utils.import_python_driver() dbName, tableName = utils.get_test_db_table() utils.print_with_time("Starting cluster of %d servers" % numNodes) with driver.Cluster(initial_servers=numNodes, output_folder='.', wait_until_ready=True) as cluster: server = cluster[0] conn = r.connect(host=server.host, port=server.driver_port) utils.print_with_time("Creating db/table %s/%s" % (dbName, tableName)) if dbName not in r.db_list().run(conn): r.db_create(dbName).run(conn) if tableName in r.db(dbName).table_list().run(conn): r.db(dbName).table_drop(tableName).run(conn) r.db(dbName).table_create(tableName).run(conn) tbl = r.db(dbName).table(tableName)
else: raise Exception('File deletion had not happend after 5 seconds, file still exists at: %s' % slaveDataPath) utils.print_with_time("Increasing replication factor again") self.table.reconfigure(shards=1, replicas=2).run(self.conn, noreply=True) utils.print_with_time("Confirming that the progress meter indicates a backfill happening") deadline = time.time() + 5 last_error = None while time.time() < deadline: try: assert r.db("rethinkdb") \ .table("jobs") \ .filter({"type": "backfill", "info": {"table": self.tableName}}) \ .count() \ .run(self.conn) == 1, "No backfill job found in `rethinkdb.jobs`." break except Exception, e: last_error = e time.sleep(0.02) else: pass #raise last_error utils.print_with_time("Killing the cluster") # The large backfill might take time, and for this test we don't care about it succeeding for server in self.cluster[:]: server.kill() if __name__ == '__main__': rdb_unittest.main()
# Copyright 2015-2016 RethinkDB, all rights reserved. import os, sys, time sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, 'common'))) import driver, scenario_common, utils, vcoptparse op = vcoptparse.OptParser() scenario_common.prepare_option_parser_mode_flags(op) _, command_prefix, server_options = scenario_common.parse_mode_flags(op.parse(sys.argv)) r = utils.import_python_driver() with driver.Cluster(initial_servers=["a", "b"], output_folder='.', command_prefix=command_prefix, extra_options=server_options) as cluster: utils.print_with_time("Establishing ReQl connections") conn = r.connect(host=cluster[0].host, port=cluster[0].driver_port) utils.print_with_time("Creating a table") r.db_create("test").run(conn) res = r.table_create("test", replicas=2).run(conn) table_uuid = res["config_changes"][0]["new_val"]["id"] r.table("test").wait(wait_for="all_replicas_ready").run(conn) r.table("test").insert(r.range(1000).map({"value": r.row})).run(conn) assert os.path.exists(os.path.join(cluster[0].data_path, table_uuid)) assert os.path.exists(os.path.join(cluster[1].data_path, table_uuid)) utils.print_with_time("Removing one replica") r.table("test").config().update({"shards": [{"primary_replica": "a", "replicas": ["a"]}]}).run(conn)
errorToleranceSecs = 5 batchSize = 100 op = rdb_workload_common.option_parser_for_connect() op["num_rows"] = vcoptparse.IntFlag("--num-rows", 5000) op["sequential"] = vcoptparse.BoolFlag("--sequential") op["phase"] = vcoptparse.ChoiceFlag("--phase", ["w", "r", "wr"], "wr") op["tolerate_errors"] = vcoptparse.BoolFlag("--tolerate-errors", invert=True) opts = op.parse(sys.argv) with rdb_workload_common.make_table_and_connection(opts) as (table, conn): keys = None if "w" in opts["phase"]: utils.print_with_time("Inserting rows") # - generate the ids keys = [] if opts["sequential"]: keys = xrange(opts["num_rows"]) else: keys = [x for x in xrange(opts["num_rows"])] random.shuffle(keys) # - open key file if not in 'wr' mode (pwd is the test output folder) keys_file = None if "r" not in opts["phase"]: keys_file = open("keys", "w")
r = utils.import_python_driver() dbName, tableName = utils.get_test_db_table() numReplicas = opts["sequence"].peak() with driver.Cluster( output_folder=".", initial_servers=numReplicas + 1, console_output=True, command_prefix=command_prefix, extra_options=server_options, ) as cluster: primary = cluster[0] replicaPool = cluster[1:] utils.print_with_time("Establishing ReQL connection") conn = r.connect(host=primary.host, port=primary.driver_port) utils.print_with_time("Creating db/table %s/%s" % (dbName, tableName)) if dbName not in r.db_list().run(conn): r.db_create(dbName).run(conn) if tableName in r.db(dbName).table_list().run(conn): r.db(dbName).table_drop(tableName).run(conn) r.db(dbName).table_create(tableName).run(conn) utils.print_with_time("Setting inital table replication settings") res = ( r.db(dbName) .table(tableName)
import os, pprint, sys, time sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, 'common'))) import driver, scenario_common, utils, vcoptparse op = vcoptparse.OptParser() op["num_rows"] = vcoptparse.IntFlag("--num-rows", 50000) scenario_common.prepare_option_parser_mode_flags(op) opts = op.parse(sys.argv) _, command_prefix, server_options = scenario_common.parse_mode_flags(opts) r = utils.import_python_driver() dbName, tableName = utils.get_test_db_table() num_shards = 16 utils.print_with_time("Starting cluster of three servers") with driver.Cluster(initial_servers=['source1', 'source2', 'target'], output_folder='.', console_output=True, command_prefix=command_prefix, extra_options=server_options) as cluster: source_a = cluster['source1'] source_b = cluster['source2'] target = cluster['target'] conn = r.connect(host=source_a.host, port=source_a.driver_port) utils.print_with_time("Creating a table") if dbName not in r.db_list().run(conn): r.db_create(dbName).run(conn) if tableName in r.db(dbName).table_list().run(conn): r.db(dbName).table_drop(tableName) r.db("rethinkdb").table("table_config").insert({ "name":tableName, "db": dbName, "shards": [{"primary_replica":"source1", "replicas":["source1", "source2"]}] * num_shards }).run(conn)
"""Check that sharding then re-merging keeps all data""" import os, sys sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, "common"))) import driver, utils r = utils.import_python_driver() dbName, tableName = utils.get_test_db_table() numNodes = 2 numShards = 2 numReplicas = 2 numRecords = 500 utils.print_with_time("Starting cluster of %d servers" % numNodes) with driver.Cluster(initial_servers=numNodes, output_folder=".", wait_until_ready=True) as cluster: utils.print_with_time("Establishing ReQL connection") server = cluster[0] conn = r.connect(host=server.host, port=server.driver_port) utils.print_with_time("Creating db/table %s/%s" % (dbName, tableName)) if dbName not in r.db_list().run(conn): r.db_create(dbName).run(conn) if tableName in r.db(dbName).table_list().run(conn): r.db(dbName).table_drop(tableName).run(conn) r.db(dbName).table_create(tableName).run(conn) tbl = r.db(dbName).table(tableName) utils.print_with_time("Adding data to table")
#!/usr/bin/env python # Copyright 2015-2016 RethinkDB, all rights reserved. import os, pprint, sys sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, 'common'))) import driver, utils r = utils.import_python_driver() utils.print_with_time("Starting one server") with driver.Process() as process: utils.print_with_time("Establishing ReQl connections") conn = r.connect(host=process.host, port=process.driver_port) utils.print_with_time("Creating a table") r.db_create("test").run(conn) table_uuid = r.table_create("test").run(conn)["config_changes"][0]["new_val"]["id"] r.table("test").wait(wait_for="all_replicas_ready").run(conn) utils.print_with_time("Creating an index") r.table("test").index_create("test").run(conn) r.table("test").index_wait().run(conn) old_indexes = r.table("test").index_list().run(conn) utils.print_with_time("Modifying table configuration through `rethinkdb.table_config`") r.db("rethinkdb").table("table_config").get(table_uuid).update( {"durability": "soft", "write_acks": "single"} ).run(conn) utils.print_with_time("Verifying configuration")
def test_workload(self): alpha = self.getPrimaryForShard(0) beta = self.getReplicaForShard(0) workload_ports = workload_runner.RDBPorts(host=alpha.host, http_port=alpha.http_port, rdb_port=alpha.driver_port, db_name=self.dbName, table_name=self.tableName) with workload_runner.SplitOrContinuousWorkload(opts, workload_ports) as workload: utils.print_with_time('Workloads:\n%s' % pprint.pformat(workload.opts)) utils.print_with_time("Running before workload") workload.run_before() utils.print_with_time("Before workload complete") self.checkCluster() workload.check() utils.print_with_time("Demoting primary") shardConfig = self.table.config()['shards'].run(self.conn) shardConfig[0]['primary_replica'] = beta.name self.table.config().update({'shards': shardConfig}).run(self.conn) self.table.wait(wait_for='all_replicas_ready').run(self.conn) self.checkCluster() utils.print_with_time("Running after workload") workload.run_after() self.checkCluster() utils.print_with_time("After workload complete")
# Copyright 2014-2016 RethinkDB, all rights reserved. '''This runs a bunch of the ReQL tests against the `rethinkdb._debug_scratch` artificial table to check that `artificial_table_t` works properly.''' import os, subprocess, sys sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, 'common'))) import driver, scenario_common, utils, vcoptparse op = vcoptparse.OptParser() scenario_common.prepare_option_parser_mode_flags(op) _, command_prefix, serve_options = scenario_common.parse_mode_flags(op.parse(sys.argv)) r = utils.import_python_driver() utils.print_with_time("Spinning up a server") with driver.Process(name='.', command_prefix=command_prefix, extra_options=serve_options) as server: server.check() conn = r.connect(host=server.host, port=server.driver_port) # note: these tests are impicitly run in sequence because of the --table option command_line = [ os.path.join(os.path.dirname(os.path.abspath(__file__)), os.path.pardir, 'rql_test', 'test-runner'), '--driver-port', '%s:%d' % (server.host, server.driver_port), '--table', 'rethinkdb._debug_scratch'] command_line.extend('polyglot/' + name for name in [ 'control', 'joins', 'match', 'mutation/atomic_get_set', 'mutation/delete', 'mutation/insert', 'mutation/replace', 'mutation/update', 'polymorphism'])
def test_workload(self): connServer = self.cluster[0] utils.print_with_time("Inserting data") utils.populateTable(self.conn, self.table, records=10000, fieldName='val') utils.print_with_time("Starting workload") with workload_runner.SplitOrContinuousWorkload(opts, connServer, db_name=self.dbName, table_name=self.tableName) as workload: utils.print_with_time("Running workload before") workload.run_before() self.checkCluster() for currentShards in opts["sequence"]: utils.print_with_time("Sharding table to %d shards" % currentShards) self.table.reconfigure(shards=currentShards, replicas=opts["num-nodes"]).run(self.conn) self.table.wait(wait_for='all_replicas_ready').run(self.conn) self.checkCluster() utils.print_with_time("Running workload after") workload.run_after() self.checkCluster() utils.print_with_time("Workload complete")
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, 'common'))) import driver, scenario_common, utils, vcoptparse, workload_runner op = vcoptparse.OptParser() scenario_common.prepare_option_parser_mode_flags(op) op["workload1"] = vcoptparse.StringFlag("--workload-before", None) op["workload2"] = vcoptparse.StringFlag("--workload-after", None) op["timeout"] = vcoptparse.IntFlag("--timeout", 600) opts = op.parse(sys.argv) _, command_prefix, serve_options = scenario_common.parse_mode_flags(opts) r = utils.import_python_driver() dbName, tableName = utils.get_test_db_table() utils.print_with_time("Starting cluster with one server") with driver.Cluster(initial_servers=['first'], output_folder='.', command_prefix=command_prefix, extra_options=serve_options, wait_until_ready=True) as cluster: server1 = cluster[0] workload_ports1 = workload_runner.RDBPorts(host=server1.host, http_port=server1.http_port, rdb_port=server1.driver_port, db_name=dbName, table_name=tableName) utils.print_with_time("Establishing ReQL connection") conn1 = r.connect(server1.host, server1.driver_port) utils.print_with_time("Creating db/table %s/%s" % (dbName, tableName)) if dbName not in r.db_list().run(conn1): r.db_create(dbName).run(conn1) if tableName in r.db(dbName).table_list().run(conn1):
# Copyright 2014-2015 RethinkDB, all rights reserved. import os, sys sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, 'common'))) import driver, scenario_common, utils, vcoptparse op = vcoptparse.OptParser() scenario_common.prepare_option_parser_mode_flags(op) _, command_prefix, serve_options = scenario_common.parse_mode_flags(op.parse(sys.argv)) r = utils.import_python_driver() num_servers = 5 utils.print_with_time("Starting cluster of %d servers" % num_servers) with driver.Cluster(output_folder='.', initial_servers=["s%d" % i for i in range(1, num_servers+1)]) as cluster: utils.print_with_time("Establishing ReQl connections") conn = r.connect(host=cluster[0].host, port=cluster[0].driver_port) utils.print_with_time("Renaming a single server") res = r.db("rethinkdb").table("server_config").filter({"name": "s1"}).update({"name": "foo"}).run(conn) assert res["replaced"] == 1 and res["errors"] == 0 assert r.db("rethinkdb").table("server_config").filter({"name": "foo"}).count().run(conn) == 1 utils.print_with_time("Renaming all servers") res = r.db("rethinkdb").table("server_config").update({"name": "bar"}).run(conn) assert res["replaced"] == 1 and res["errors"] == num_servers - 1, repr(res) utils.print_with_time("Cleaning up")
import sys, os sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, 'common'))) import driver, scenario_common, utils, vcoptparse op = vcoptparse.OptParser() scenario_common.prepare_option_parser_mode_flags(op) _, command_prefix, serve_options = scenario_common.parse_mode_flags(op.parse(sys.argv)) numNodes = 2 r = utils.import_python_driver() dbName, tableName = utils.get_test_db_table() utils.print_with_time("Starting cluster of %d servers" % numNodes) with driver.Cluster(initial_servers=numNodes, output_folder='.', wait_until_ready=True, command_prefix=command_prefix, extra_options=serve_options) as cluster: server1 = cluster[0] server2 = cluster[1] conn = r.connect(host=server1.host, port=server1.driver_port) utils.print_with_time("Creating db/table %s/%s" % (dbName, tableName)) if dbName not in r.db_list().run(conn): r.db_create(dbName).run(conn) if tableName in r.db(dbName).table_list().run(conn): r.db(dbName).table_drop(tableName).run(conn) r.db(dbName).table_create(tableName).run(conn) tbl = r.db(dbName).table(tableName)