def shift_logs ( ) :
        log_to_remove = old_log_fmt % (max_logs_to_keep - 1)
        if X.fileExists ( log_to_remove ) :
            fs.unlink(log_to_remove)

        for i in range( 1, max_logs_to_keep - 1) :
            j = max_logs_to_keep - 1 - i
            log_to_move = old_log_fmt % j
            new_log_name = old_log_fmt % (j + 1)
            if X.fileExists( log_to_move ) :
                os.rename ( log_to_move, new_log_name )
Esempio n. 2
0
 def testRemoveDirs(self):
     cid = self._clusterId
     n0 = '%s_%i' % (cid, 0)
     cluster = self._getCluster()
     cluster.addNode(n0)
     cluster.createDirs(n0)
     cluster.removeDirs(n0)
     p0 = '/'.join([X.logDir, cid, n0])
     p1 = '/'.join([X.varDir, "db", cid, n0])
     assert_false(X.fileExists(p0))
     assert_false(X.fileExists(p1))
Esempio n. 3
0
def test_local_collapse():
    logging.info("starting test_local_collapse")

    zero = Common.node_names[0]
    one = Common.node_names[1]
    n = 29876
    Common.iterate_n_times(n, Common.simple_set)
    logging.info("did %i sets, now going into collapse scenario" % n)
    rc = Common.local_collapse(zero, 1)
    assert_equal(rc, 0)
    head_name = '%s/%s/head/head.db' % (Common.data_base_dir, zero)
    logging.info(head_name)
    assert_true(X.fileExists(head_name))
    #
    logging.info("collapsing done")
    Common.stopOne(one)
    Common.wipe(one)
    Common.startOne(one)
    cli = Common.get_client()
    logging.info("cli class:%s", cli.__class__)
    assert_false(cli.expectProgressPossible())
    up2date = False
    counter = 0
    while not up2date and counter < 100:
        time.sleep(1.0)
        counter = counter + 1
        up2date = cli.expectProgressPossible()
    logging.info("catchup from collapsed node finished")
def test_copy_db_to_head2():
    logging.info("test_copy_db_to_head")
    zero = C.node_names[0]
    one = C.node_names[1]
    n = 29876
    C.iterate_n_times(n, C.simple_set)
    logging.info("did %i sets, now copying db to head" % n)
    C.copyDbToHead(one,1)

    head_name = '%s/%s/head/head.db' %(C.data_base_dir, one)

    logging.info(head_name)
    assert_true(X.fileExists(head_name))
    C.stopOne(zero)
    C.wipe(zero)
    C.startOne(zero)
    cli = C.get_client()
    logging.info("cli class:%s", cli.__class__)
    assert_false(cli.expectProgressPossible())
    up2date = False
    counter = 0
    while not up2date and counter < 100:
        time.sleep(1.0)
        counter = counter + 1
        up2date = cli.expectProgressPossible()
    logging.info("catchup from 'collapsed' node finished")
def test_mixed_tlog_formats():
    cluster = C._getCluster()
    cluster.disableFsync(C.node_names[:2])
    s0 = 10500
    logging.info("going to do %i sets",s0)
    C.iterate_n_times(s0,C.simple_set)
    C.stop_all()
    cluster.enableTlogCompression(compressor = 'bz2')

    C.start_all()
    logging.info("another %i sets", s0)
    C.iterate_n_times(s0,C.simple_set)
    C.stop_all()

    # do we have both .tlf and .tlx files?
    n0 = C.node_names[0]
    n1 = C.node_names[1]
    config = C.getConfig(n0)

    tlx_dir = config.get('tlf_dir')
    if not tlx_dir:
        tlx_dir = config.get('home')
    files = os.listdir(tlx_dir)

    tls = filter(lambda x:x.endswith(".tlx"), files)
    tlf = filter(lambda x:x.endswith(".tlf"), files)
    assert_true(len(tls) > 5, "we should have .tlx files" )
    assert_true(len(tlf) > 5, "we should have .tlf files" )

    # does catchup still work?

    C.wipe(n0)
    C.startOne(n1)

    #wait for n1 to respond to client requests...
    time.sleep(5)


    rc = cluster.catchupOnly(n0)
    logging.info("catchup had rc=%i", rc)

    C.flush_store(n1)
    C.stop_all()


    C.compare_stores(n0,n1)

    C.start_all()
    rc = C.collapse(name=n1,n = 2)
    logging.info("collapse had rc=%i", rc)
    assert_true(rc == 0, "this should not have failed")
    head_dir = C.build_node_dir_names(n1)[3]

    db_file = head_dir + "/head.db"
    time.sleep(1.0) # give it time to move
    assert_true(X.fileExists(db_file), "%s should exist" % db_file)
def regenerateClientConfig( cluster_id ):
    h = '/'.join([X.cfgDir,'arakoonclients'])
    p = X.getConfig(h)

    if cluster_id in p.sections():
        clusterDir = p.get(cluster_id, "path")
        clientCfgFile = '/'.join([clusterDir, "%s_client.cfg" % cluster_id])
        if X.fileExists(clientCfgFile):
            X.removeFile(clientCfgFile)

    client = ArakoonClient.ArakoonClient()
    cliCfg = client.getClientConfig( cluster_id )
    cliCfg.generateFromServerConfig()
Esempio n. 7
0
def _check_tlog_dirs(node, n):
    (home_dir, _, tlf_dir, head_dir) = Common.build_node_dir_names(node)

    tlogs = X.listFilesInDir(home_dir, filter="*.tlog")
    tlxs = X.listFilesInDir(tlf_dir, filter="*.tlx")
    logging.info("tlogs:%s", tlogs)
    logging.info("tlxs:%s", tlxs)
    print tlxs

    assert_equals(len(tlogs) + len(tlxs),
                  n,
                  msg="(%s + %s) should have %i file(s)" % (tlogs, tlxs, n))
    assert_true(X.fileExists(head_dir + "/head.db"))
    logging.info("tlog_dirs are as expected")
        def decorate(*args,**kwargs):

            global data_base_dir
            data_base_dir = '/'.join([X.tmpDir,'arakoon_system_tests' , func.func_name])
            global test_failed
            test_failed = False
            fatal_ex = None
            home_dir = data_base_dir
            if X.fileExists( data_base_dir):
                remove_dirs ()
            self.__setup( home_dir )
            try:
                func(*args,**kwargs)
            except Exception, outer :
                tb = traceback.format_exc()
                logging.fatal( tb )
                fatal_ex = outer
def test_catchup_rollover_on_size():
    _scenario()
    lagger = Common.node_names[2]
    print("now with collapsing as well")
    Common.stopOne(lagger)
    Common.iterate_n_times(1234, Common.simple_set)
    Common.collapse(Common.node_names[0], 20)
    Common.stopOne(Common.node_names[1])
    Common.startOne(lagger)
    time.sleep(20)
    cli = Common.get_client()
    ok = cli.expectProgressPossible()
    assert_true(ok)
    Common.stop_all()
    head_dir = Common.build_node_dir_names(lagger)[3]
    head_file = head_dir + "/head.db"
    print head_file
    exists = X.fileExists(head_file)
    assert_false(exists)
    Common.assert_last_i_in_sync(Common.node_names[0], lagger)
Esempio n. 10
0
def mount_ram_fs ( node_index ) :

    (mount_target,log_dir,tlf_dir,head_dir) = Common.build_node_dir_names( Common.node_names[node_index] )
    if X.fileExists( mount_target ) :
        Common.stopOne( Common.node_names[node_index] )
        cmd = ["umount", mount_target]
        X.subprocess.check_call ( cmd )
        X.removeDirTree( mount_target )

    X.createDir ( mount_target )

    if not os.path.isdir( mount_target ) :
        raise Exception( "%s is not valid mount target as it is not a directory")

    cmd = ["sudo", "mount", "-t", "tmpfs","-o","size=20m","tmpfs", mount_target]
    (rc,out,err) = X.run(cmd)
    if rc:
        logging.info("out=%s", out)
        logging.info("err = %s", err)
        raise Exception("Mounting failed (rc=%s)" % rc)
Esempio n. 11
0
def test_copy_db_to_head():
    # fill cluster until they have at least 10 tlogs
    C.iterate_n_times(5000, C.set_get_and_delete)

    slave = C.node_names[1]
    # n < 1 fails
    assert_raises( Exception, lambda: C.copyDbToHead(slave, 0))
    # fails on master
    assert_raises( Exception, lambda: C.copyDbToHead(C.node_names[0], 2))

    C.copyDbToHead(slave, 1)

    C.stop_all()

    (home_dir, _, tlf_dir, head_dir) = C.build_node_dir_names(slave)
    tlogs_count = len(X.listFilesInDir( home_dir, filter="*.tlog" ))
    tlf_count = len(X.listFilesInDir( tlf_dir, filter="*.tlf" ))
    assert(tlogs_count + tlf_count < 5)
    assert(X.fileExists(head_dir + "/head.db"))
    a = C.get_i(slave, True)
    logging.info("slave_head_i='%s'", a)
    assert(a >= 5000)
Esempio n. 12
0
def rotate_log(node_name, max_logs_to_keep, compress_old_files):
    cfg = getConfig(node_name)
    log_dir = cfg['log_dir']

    log_file = '/'.join([log_dir, "%s.log" % (node_name)])
    if compress_old_files:
        old_log_fmt = '/'.join([log_dir, "%s.log.%%d.gz" % (node_name)])
    else:
        old_log_fmt = '/'.join([log_dir, "%s.log.%%d" % (node_name)])

    tmp_log_file = log_file + ".1"

    def shift_logs():
        log_to_remove = old_log_fmt % (max_logs_to_keep - 1)
        if X.fileExists(log_to_remove):
            fs.unlink(log_to_remove)

        for i in range(1, max_logs_to_keep - 1):
            j = max_logs_to_keep - 1 - i
            log_to_move = old_log_fmt % j
            new_log_name = old_log_fmt % (j + 1)
            if X.fileExists(log_to_move):
                os.rename(log_to_move, new_log_name)

    cluster = _getCluster()
    shift_logs()
    if X.fileExists(log_file):
        os.rename(log_file, tmp_log_file)
        if cluster.getStatusOne(node_name) == X.AppStatusType.RUNNING:
            send_signal(node_name, signal.SIGUSR1)

        if compress_old_files:
            cf = gzip.open(old_log_fmt % 1, 'w')
            orig = open(tmp_log_file, 'r')
            cf.writelines(orig)
            cf.close()
            orig.close()
            os.remove(tmp_log_file)
Esempio n. 13
0
def make_monkey_run():

    global monkey_dies

    C.data_base_dir = '/'.join([X.tmpDir, "/arakoon_monkey/"])

    t = threading.Thread(target=memory_monitor)
    t.start()

    C.stop_all()
    cluster = C._getCluster(C.cluster_id)
    cluster.tearDown()
    #setup_3_nodes_forced_master()
    C.setup_3_nodes(C.data_base_dir)
    monkey_dir = get_monkey_work_dir()
    if X.fileExists(monkey_dir):
        X.removeDirTree(monkey_dir)
    X.createDir(monkey_dir)
    iteration = 0
    C.start_all()
    time.sleep(1.0)
    while (True):
        iteration += 1
        logging.info("Preparing iteration %d" % iteration)
        thr_list = list()
        try:
            (disruption, f_list) = generate_work_list(iteration)
            logging.info("Starting iteration %d" % iteration)
            thr_list = C.create_and_start_thread_list(f_list)

            disruption()

            for thr in thr_list:
                thr.join(60.0 * 60.0)
                if thr.isAlive():
                    logging.fatal(
                        "Thread did not complete in a timely fashion.")
                    monkey_dies = True

            if not monkey_dies:
                wait_for_it()

            if not monkey_dies:
                health_check()

        except SystemExit, ex:
            if str(ex) == "0":
                sys.exit(0)
            else:
                logging.fatal("Caught SystemExit => %s: %s" %
                              (ex.__class__.__name__, ex))
                tb = traceback.format_exc()
                logging.fatal(tb)
                for thr in thr_list:
                    thr.join()
                monkey_dies = True

        except Exception, ex:
            logging.fatal("Caught fatal exception => %s: %s" %
                          (ex.__class__.__name__, ex))
            tb = traceback.format_exc()
            logging.fatal(tb)
            for thr in thr_list:
                thr.join()
            monkey_dies = True
Esempio n. 14
0
def test_272():
    pass
    """
    test_272 : arakoon can go down during log rotation, but you need to have load to reproduce it 
    """
    node = Common.node_names[0]
    cluster = Common._getCluster()
    path = cluster._getConfigFileName() + ".cfg"
    logging.info('path=%s', path)

    f = open("./outputFile", "wb")
    bench = subprocess.Popen([
        Common.CONFIG.binary_full_path, '-config', path, '--benchmark',
        '-scenario', 'master, set, set_tx, get', '-max_n', '60000'
    ],
                             stdout=f,
                             stderr=f)

    time.sleep(10.0)  # give it time to get up to speed
    rc = bench.returncode
    if rc <> None:
        raise Exception("benchmark should not have finished yet.")

    cfg = Common.getConfig(node)
    log_dir = cfg['log_dir']
    log_file = '/'.join([log_dir, "%s.log" % node])

    def target(i):
        fn = "%s.%03i" % (log_file, i)
        return fn

    for i in xrange(100):
        new_file = target(i)
        print "%s => %s" % (log_file, new_file)
        count = 0
        while not X.fileExists(log_file) and count < 10:
            print "%s does not exist" % log_file
            time.sleep(0.2)
            count += 1

        os.rename(log_file, new_file)
        Common.send_signal(node, signal.SIGUSR1)
        Common.assert_running_nodes(1)
        rc = bench.returncode
        if rc <> None:
            raise Exception("benchmark should not have stopped")

    Common.assert_running_nodes(1)
    logging.info("now wait for benchmark to finish")
    rc = bench.wait()
    f.close()
    Common.assert_running_nodes(1)
    if rc <> 0:
        raise Exception("benchmark exited with rc = %s" % rc)

    # now check there are no holes.

    seqn = -1
    for i in xrange(100):
        fn = target(i)
        with open(fn, 'r') as f:
            lines = f.readlines()
            for line in lines:
                try:
                    parts = line.split(" - ")
                    s = parts[4]
                    seqn_next = int(s)
                    assert_equals(
                        seqn_next,
                        seqn + 1,
                        msg="%s:sequence numbers do not follow: %s;%s" %
                        (fn, seqn, seqn_next))
                    seqn = seqn_next
                except Exception as ex:
                    logging.info("Error while parsing line %s" % line)
                    raise ex
    logging.info("last seqn:%i" % seqn)