def shift_logs ( ) : log_to_remove = old_log_fmt % (max_logs_to_keep - 1) if X.fileExists ( log_to_remove ) : fs.unlink(log_to_remove) for i in range( 1, max_logs_to_keep - 1) : j = max_logs_to_keep - 1 - i log_to_move = old_log_fmt % j new_log_name = old_log_fmt % (j + 1) if X.fileExists( log_to_move ) : os.rename ( log_to_move, new_log_name )
def testRemoveDirs(self): cid = self._clusterId n0 = '%s_%i' % (cid, 0) cluster = self._getCluster() cluster.addNode(n0) cluster.createDirs(n0) cluster.removeDirs(n0) p0 = '/'.join([X.logDir, cid, n0]) p1 = '/'.join([X.varDir, "db", cid, n0]) assert_false(X.fileExists(p0)) assert_false(X.fileExists(p1))
def test_local_collapse(): logging.info("starting test_local_collapse") zero = Common.node_names[0] one = Common.node_names[1] n = 29876 Common.iterate_n_times(n, Common.simple_set) logging.info("did %i sets, now going into collapse scenario" % n) rc = Common.local_collapse(zero, 1) assert_equal(rc, 0) head_name = '%s/%s/head/head.db' % (Common.data_base_dir, zero) logging.info(head_name) assert_true(X.fileExists(head_name)) # logging.info("collapsing done") Common.stopOne(one) Common.wipe(one) Common.startOne(one) cli = Common.get_client() logging.info("cli class:%s", cli.__class__) assert_false(cli.expectProgressPossible()) up2date = False counter = 0 while not up2date and counter < 100: time.sleep(1.0) counter = counter + 1 up2date = cli.expectProgressPossible() logging.info("catchup from collapsed node finished")
def test_copy_db_to_head2(): logging.info("test_copy_db_to_head") zero = C.node_names[0] one = C.node_names[1] n = 29876 C.iterate_n_times(n, C.simple_set) logging.info("did %i sets, now copying db to head" % n) C.copyDbToHead(one,1) head_name = '%s/%s/head/head.db' %(C.data_base_dir, one) logging.info(head_name) assert_true(X.fileExists(head_name)) C.stopOne(zero) C.wipe(zero) C.startOne(zero) cli = C.get_client() logging.info("cli class:%s", cli.__class__) assert_false(cli.expectProgressPossible()) up2date = False counter = 0 while not up2date and counter < 100: time.sleep(1.0) counter = counter + 1 up2date = cli.expectProgressPossible() logging.info("catchup from 'collapsed' node finished")
def test_mixed_tlog_formats(): cluster = C._getCluster() cluster.disableFsync(C.node_names[:2]) s0 = 10500 logging.info("going to do %i sets",s0) C.iterate_n_times(s0,C.simple_set) C.stop_all() cluster.enableTlogCompression(compressor = 'bz2') C.start_all() logging.info("another %i sets", s0) C.iterate_n_times(s0,C.simple_set) C.stop_all() # do we have both .tlf and .tlx files? n0 = C.node_names[0] n1 = C.node_names[1] config = C.getConfig(n0) tlx_dir = config.get('tlf_dir') if not tlx_dir: tlx_dir = config.get('home') files = os.listdir(tlx_dir) tls = filter(lambda x:x.endswith(".tlx"), files) tlf = filter(lambda x:x.endswith(".tlf"), files) assert_true(len(tls) > 5, "we should have .tlx files" ) assert_true(len(tlf) > 5, "we should have .tlf files" ) # does catchup still work? C.wipe(n0) C.startOne(n1) #wait for n1 to respond to client requests... time.sleep(5) rc = cluster.catchupOnly(n0) logging.info("catchup had rc=%i", rc) C.flush_store(n1) C.stop_all() C.compare_stores(n0,n1) C.start_all() rc = C.collapse(name=n1,n = 2) logging.info("collapse had rc=%i", rc) assert_true(rc == 0, "this should not have failed") head_dir = C.build_node_dir_names(n1)[3] db_file = head_dir + "/head.db" time.sleep(1.0) # give it time to move assert_true(X.fileExists(db_file), "%s should exist" % db_file)
def regenerateClientConfig( cluster_id ): h = '/'.join([X.cfgDir,'arakoonclients']) p = X.getConfig(h) if cluster_id in p.sections(): clusterDir = p.get(cluster_id, "path") clientCfgFile = '/'.join([clusterDir, "%s_client.cfg" % cluster_id]) if X.fileExists(clientCfgFile): X.removeFile(clientCfgFile) client = ArakoonClient.ArakoonClient() cliCfg = client.getClientConfig( cluster_id ) cliCfg.generateFromServerConfig()
def _check_tlog_dirs(node, n): (home_dir, _, tlf_dir, head_dir) = Common.build_node_dir_names(node) tlogs = X.listFilesInDir(home_dir, filter="*.tlog") tlxs = X.listFilesInDir(tlf_dir, filter="*.tlx") logging.info("tlogs:%s", tlogs) logging.info("tlxs:%s", tlxs) print tlxs assert_equals(len(tlogs) + len(tlxs), n, msg="(%s + %s) should have %i file(s)" % (tlogs, tlxs, n)) assert_true(X.fileExists(head_dir + "/head.db")) logging.info("tlog_dirs are as expected")
def decorate(*args,**kwargs): global data_base_dir data_base_dir = '/'.join([X.tmpDir,'arakoon_system_tests' , func.func_name]) global test_failed test_failed = False fatal_ex = None home_dir = data_base_dir if X.fileExists( data_base_dir): remove_dirs () self.__setup( home_dir ) try: func(*args,**kwargs) except Exception, outer : tb = traceback.format_exc() logging.fatal( tb ) fatal_ex = outer
def test_catchup_rollover_on_size(): _scenario() lagger = Common.node_names[2] print("now with collapsing as well") Common.stopOne(lagger) Common.iterate_n_times(1234, Common.simple_set) Common.collapse(Common.node_names[0], 20) Common.stopOne(Common.node_names[1]) Common.startOne(lagger) time.sleep(20) cli = Common.get_client() ok = cli.expectProgressPossible() assert_true(ok) Common.stop_all() head_dir = Common.build_node_dir_names(lagger)[3] head_file = head_dir + "/head.db" print head_file exists = X.fileExists(head_file) assert_false(exists) Common.assert_last_i_in_sync(Common.node_names[0], lagger)
def mount_ram_fs ( node_index ) : (mount_target,log_dir,tlf_dir,head_dir) = Common.build_node_dir_names( Common.node_names[node_index] ) if X.fileExists( mount_target ) : Common.stopOne( Common.node_names[node_index] ) cmd = ["umount", mount_target] X.subprocess.check_call ( cmd ) X.removeDirTree( mount_target ) X.createDir ( mount_target ) if not os.path.isdir( mount_target ) : raise Exception( "%s is not valid mount target as it is not a directory") cmd = ["sudo", "mount", "-t", "tmpfs","-o","size=20m","tmpfs", mount_target] (rc,out,err) = X.run(cmd) if rc: logging.info("out=%s", out) logging.info("err = %s", err) raise Exception("Mounting failed (rc=%s)" % rc)
def test_copy_db_to_head(): # fill cluster until they have at least 10 tlogs C.iterate_n_times(5000, C.set_get_and_delete) slave = C.node_names[1] # n < 1 fails assert_raises( Exception, lambda: C.copyDbToHead(slave, 0)) # fails on master assert_raises( Exception, lambda: C.copyDbToHead(C.node_names[0], 2)) C.copyDbToHead(slave, 1) C.stop_all() (home_dir, _, tlf_dir, head_dir) = C.build_node_dir_names(slave) tlogs_count = len(X.listFilesInDir( home_dir, filter="*.tlog" )) tlf_count = len(X.listFilesInDir( tlf_dir, filter="*.tlf" )) assert(tlogs_count + tlf_count < 5) assert(X.fileExists(head_dir + "/head.db")) a = C.get_i(slave, True) logging.info("slave_head_i='%s'", a) assert(a >= 5000)
def rotate_log(node_name, max_logs_to_keep, compress_old_files): cfg = getConfig(node_name) log_dir = cfg['log_dir'] log_file = '/'.join([log_dir, "%s.log" % (node_name)]) if compress_old_files: old_log_fmt = '/'.join([log_dir, "%s.log.%%d.gz" % (node_name)]) else: old_log_fmt = '/'.join([log_dir, "%s.log.%%d" % (node_name)]) tmp_log_file = log_file + ".1" def shift_logs(): log_to_remove = old_log_fmt % (max_logs_to_keep - 1) if X.fileExists(log_to_remove): fs.unlink(log_to_remove) for i in range(1, max_logs_to_keep - 1): j = max_logs_to_keep - 1 - i log_to_move = old_log_fmt % j new_log_name = old_log_fmt % (j + 1) if X.fileExists(log_to_move): os.rename(log_to_move, new_log_name) cluster = _getCluster() shift_logs() if X.fileExists(log_file): os.rename(log_file, tmp_log_file) if cluster.getStatusOne(node_name) == X.AppStatusType.RUNNING: send_signal(node_name, signal.SIGUSR1) if compress_old_files: cf = gzip.open(old_log_fmt % 1, 'w') orig = open(tmp_log_file, 'r') cf.writelines(orig) cf.close() orig.close() os.remove(tmp_log_file)
def make_monkey_run(): global monkey_dies C.data_base_dir = '/'.join([X.tmpDir, "/arakoon_monkey/"]) t = threading.Thread(target=memory_monitor) t.start() C.stop_all() cluster = C._getCluster(C.cluster_id) cluster.tearDown() #setup_3_nodes_forced_master() C.setup_3_nodes(C.data_base_dir) monkey_dir = get_monkey_work_dir() if X.fileExists(monkey_dir): X.removeDirTree(monkey_dir) X.createDir(monkey_dir) iteration = 0 C.start_all() time.sleep(1.0) while (True): iteration += 1 logging.info("Preparing iteration %d" % iteration) thr_list = list() try: (disruption, f_list) = generate_work_list(iteration) logging.info("Starting iteration %d" % iteration) thr_list = C.create_and_start_thread_list(f_list) disruption() for thr in thr_list: thr.join(60.0 * 60.0) if thr.isAlive(): logging.fatal( "Thread did not complete in a timely fashion.") monkey_dies = True if not monkey_dies: wait_for_it() if not monkey_dies: health_check() except SystemExit, ex: if str(ex) == "0": sys.exit(0) else: logging.fatal("Caught SystemExit => %s: %s" % (ex.__class__.__name__, ex)) tb = traceback.format_exc() logging.fatal(tb) for thr in thr_list: thr.join() monkey_dies = True except Exception, ex: logging.fatal("Caught fatal exception => %s: %s" % (ex.__class__.__name__, ex)) tb = traceback.format_exc() logging.fatal(tb) for thr in thr_list: thr.join() monkey_dies = True
def test_272(): pass """ test_272 : arakoon can go down during log rotation, but you need to have load to reproduce it """ node = Common.node_names[0] cluster = Common._getCluster() path = cluster._getConfigFileName() + ".cfg" logging.info('path=%s', path) f = open("./outputFile", "wb") bench = subprocess.Popen([ Common.CONFIG.binary_full_path, '-config', path, '--benchmark', '-scenario', 'master, set, set_tx, get', '-max_n', '60000' ], stdout=f, stderr=f) time.sleep(10.0) # give it time to get up to speed rc = bench.returncode if rc <> None: raise Exception("benchmark should not have finished yet.") cfg = Common.getConfig(node) log_dir = cfg['log_dir'] log_file = '/'.join([log_dir, "%s.log" % node]) def target(i): fn = "%s.%03i" % (log_file, i) return fn for i in xrange(100): new_file = target(i) print "%s => %s" % (log_file, new_file) count = 0 while not X.fileExists(log_file) and count < 10: print "%s does not exist" % log_file time.sleep(0.2) count += 1 os.rename(log_file, new_file) Common.send_signal(node, signal.SIGUSR1) Common.assert_running_nodes(1) rc = bench.returncode if rc <> None: raise Exception("benchmark should not have stopped") Common.assert_running_nodes(1) logging.info("now wait for benchmark to finish") rc = bench.wait() f.close() Common.assert_running_nodes(1) if rc <> 0: raise Exception("benchmark exited with rc = %s" % rc) # now check there are no holes. seqn = -1 for i in xrange(100): fn = target(i) with open(fn, 'r') as f: lines = f.readlines() for line in lines: try: parts = line.split(" - ") s = parts[4] seqn_next = int(s) assert_equals( seqn_next, seqn + 1, msg="%s:sequence numbers do not follow: %s;%s" % (fn, seqn, seqn_next)) seqn = seqn_next except Exception as ex: logging.info("Error while parsing line %s" % line) raise ex logging.info("last seqn:%i" % seqn)