def quorum_available_during_failure_test(self):
        CL = 'QUORUM'
        RF = 3

        debug("Creating a ring")
        cluster = self.cluster
        if ENABLE_VNODES:
            tokens = cluster.balanced_tokens(3)
            cluster.populate(3, tokens=tokens).start()
        else:
            cluster.populate(3).start()
        [node1, node2, node3] = cluster.nodelist()
        cluster.start()

        debug("Set to talk to node 2")
        cursor = self.cql_connection(node2).cursor()
        self.create_ks(cursor, 'ks', RF)
        create_c1c2_table(self, cursor)

        debug("Generating some data")
        for n in xrange(100):
            insert_c1c2(cursor, n, CL)

        debug("Taking down node1")
        node1.stop(wait_other_notice=True)

        debug("Reading back data.")
        for n in xrange(100):
            query_c1c2(cursor, n, CL)
    def decommission_test(self):
        cluster = self.cluster

        tokens = cluster.balanced_tokens(4)
        cluster.populate(4, tokens=tokens).start()
        node1, node2, node3, node4 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 2)
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=30000, consistency=ConsistencyLevel.QUORUM)

        cluster.flush()
        sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()]
        init_size = sizes[0]
        assert_almost_equal(*sizes)

        time.sleep(.5)
        node4.decommission()
        node4.stop()
        cluster.cleanup()
        time.sleep(.5)

        # Check we can get all the keys
        for n in xrange(0, 30000):
            query_c1c2(session, n, ConsistencyLevel.QUORUM)

        sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()]
        debug(sizes)
        assert_almost_equal(sizes[0], sizes[1])
        assert_almost_equal((2.0 / 3.0) * sizes[0], sizes[2])
        assert_almost_equal(sizes[2], init_size)
    def movement_test(self):
        cluster = self.cluster

        # Create an unbalanced ring
        cluster.populate(3, tokens=[0, 2**48, 2**62]).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 1)
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ONE)

        cluster.flush()

        # Move nodes to balance the cluster
        balancing_tokens = cluster.balanced_tokens(3)
        escformat = '%s'
        node1.move(escformat % balancing_tokens[0])  # can't assume 0 is balanced with m3p
        node2.move(escformat % balancing_tokens[1])
        node3.move(escformat % balancing_tokens[2])
        time.sleep(1)

        cluster.cleanup()

        # Check we can get all the keys
        for n in xrange(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.ONE)

        # Now the load should be basically even
        sizes = [node.data_size() for node in [node1, node2, node3]]

        assert_almost_equal(sizes[0], sizes[1])
        assert_almost_equal(sizes[0], sizes[2])
        assert_almost_equal(sizes[1], sizes[2])
    def _do_hinted_handoff(self, node1, node2, enabled):
        """
        Test that if we stop one node the other one
        will store hints only when hinted handoff is enabled
        """
        session = self.patient_exclusive_cql_connection(node1)
        self.create_ks(session, 'ks', 2)
        create_c1c2_table(self, session)

        node2.stop(wait_other_notice=True)

        insert_c1c2(session, n=100, consistency=ConsistencyLevel.ONE)

        log_mark = node1.mark_log()
        node2.start(wait_other_notice=True)

        if enabled:
            node1.watch_log_for(["Finished hinted"], from_mark=log_mark, timeout=120)

        node1.stop(wait_other_notice=True)

        # Check node2 for all the keys that should have been delivered via HH if enabled or not if not enabled
        session = self.patient_exclusive_cql_connection(node2, keyspace='ks')
        for n in xrange(0, 100):
            if enabled:
                query_c1c2(session, n, ConsistencyLevel.ONE)
            else:
                query_c1c2(session, n, ConsistencyLevel.ONE, tolerate_missing=True, must_be_missing=True)
    def quorum_quorum_test(self):
        cluster = self.cluster

        cluster.populate(3).start()
        [node1, node2, node3] = cluster.nodelist()

        cursor1 = self.patient_cql_connection(node1).cursor()
        self.create_ks(cursor1, 'ks', 3)
        create_c1c2_table(self, cursor1)

        cursor2 = self.patient_cql_connection(node2, 'ks').cursor()

        # insert and get at CL.QUORUM
        for n in xrange(0, 100):
            insert_c1c2(cursor1, n, "QUORUM")
            query_c1c2(cursor2, n, "QUORUM")

        # shutdown a node an test again
        node3.stop(wait_other_notice=True)
        for n in xrange(100, 200):
            insert_c1c2(cursor1, n, "QUORUM")
            query_c1c2(cursor2, n, "QUORUM")

        # shutdown another node and test we get unavailabe exception
        node2.stop(wait_other_notice=True)
        assert_unavailable(insert_c1c2, cursor1, 200, "QUORUM")
Exemple #6
0
    def move_single_node_test(self):
        """ Test moving a node in a single-node cluster (#4200) """
        cluster = self.cluster

        # Create an unbalanced ring
        cluster.populate(1, tokens=[0]).start()
        node1 = cluster.nodelist()[0]
        time.sleep(0.2)

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 1)
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ONE)

        cluster.flush()

        node1.move(2**25)
        time.sleep(1)

        cluster.cleanup()

        # Check we can get all the keys
        for n in xrange(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.ONE)
    def readrepair_test(self):
        cluster = self.cluster
        cluster.set_configuration_options(
            values={'hinted_handoff_enabled': False})

        if DISABLE_VNODES:
            cluster.populate(2).start()
        else:
            tokens = cluster.balanced_tokens(2)
            cluster.populate(2, tokens=tokens).start()
        [node1, node2] = cluster.nodelist()

        cursor = self.patient_cql_connection(node1).cursor()
        self.create_ks(cursor, 'ks', 2)
        create_c1c2_table(self, cursor, read_repair=1.0)

        node2.stop(wait_other_notice=True)

        for n in xrange(0, 10000):
            insert_c1c2(cursor, n, "ONE")

        node2.start(wait_other_notice=True)
        time.sleep(5)
        # query everything to cause RR
        for n in xrange(0, 10000):
            query_c1c2(cursor, n, "QUORUM")

        node1.stop(wait_other_notice=True)

        # Check node2 for all the keys that should have been repaired
        cursor = self.patient_cql_connection(node2, keyspace='ks').cursor()
        for n in xrange(0, 10000):
            query_c1c2(cursor, n, "ONE")
    def hintedhandoff_test(self):
        cluster = self.cluster

        if DISABLE_VNODES:
            cluster.populate(2).start()
        else:
            tokens = cluster.balanced_tokens(2)
            cluster.populate(2, tokens=tokens).start()
        [node1, node2] = cluster.nodelist()

        cursor = self.patient_cql_connection(node1).cursor()
        self.create_ks(cursor, 'ks', 2)
        create_c1c2_table(self, cursor)

        node2.stop(wait_other_notice=True)

        for n in xrange(0, 100):
            insert_c1c2(cursor, n, "ONE")

        log_mark = node1.mark_log()
        node2.start()
        node1.watch_log_for(["Finished hinted"],
                            from_mark=log_mark,
                            timeout=90)

        node1.stop(wait_other_notice=True)

        # Check node2 for all the keys that should have been delivered via HH
        cursor = self.patient_cql_connection(node2, keyspace='ks').cursor()
        for n in xrange(0, 100):
            query_c1c2(cursor, n, "ONE")
    def move_single_node_test(self):
        """ Test moving a node in a single-node cluster (#4200) """
        cluster = self.cluster

        # Create an unbalanced ring
        cluster.populate(1, tokens=[0]).start()
        node1 = cluster.nodelist()[0]
        time.sleep(0.2)

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 1)
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ONE)

        cluster.flush()

        node1.move(2**25)
        time.sleep(1)

        cluster.cleanup()

        # Check we can get all the keys
        for n in xrange(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.ONE)
    def _deprecated_repair_jmx(self, method, arguments):
        cluster = self.cluster

        debug("Starting cluster..")
        cluster.populate([1, 1])
        node1, node2 = cluster.nodelist()
        remove_perf_disable_shared_mem(node1)
        cluster.start()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 2)
        self.create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL)

        # Run repair
        mbean = make_mbean('db', 'StorageService')
        with JolokiaAgent(node1) as jmx:
            # assert repair runs and returns valid cmd number
            self.assertEqual(jmx.execute_method(mbean, method, arguments), 1)
        # wait for log to start
        node1.watch_log_for("Starting repair command")
        # get repair parameters from the log
        l = node1.grep_log("Starting repair command #1, repairing keyspace ks with repair options \(parallelism: (?P<parallelism>\w+), primary range: (?P<pr>\w+), incremental: (?P<incremental>\w+), job threads: (?P<jobs>\d+), ColumnFamilies: (?P<cfs>.+), dataCenters: (?P<dc>.+), hosts: (?P<hosts>.+), # of ranges: (?P<ranges>\d+)\)")
        self.assertEqual(len(l), 1)
        line, m = l[0]
        return {"parallelism": m.group("parallelism"),
                "primary_range": m.group("pr"),
                "incremental": m.group("incremental"),
                "job_threads": m.group("jobs"),
                "column_families": m.group("cfs"),
                "data_centers": m.group("dc"),
                "hosts": m.group("hosts"),
                "ranges": m.group("ranges")}
    def readrepair_test(self):
        cluster = self.cluster
        cluster.set_configuration_options(values={"hinted_handoff_enabled": False})

        if DISABLE_VNODES:
            cluster.populate(2).start()
        else:
            tokens = cluster.balanced_tokens(2)
            cluster.populate(2, tokens=tokens).start()
        node1, node2 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, "ks", 2)
        create_c1c2_table(self, session, read_repair=1.0)

        node2.stop(wait_other_notice=True)

        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ONE)

        node2.start(wait_other_notice=True)

        # query everything to cause RR
        for n in xrange(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.QUORUM)

        node1.stop(wait_other_notice=True)

        # Check node2 for all the keys that should have been repaired
        session = self.patient_cql_connection(node2, keyspace="ks")
        for n in xrange(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.ONE)
    def readrepair_test(self):
        cluster = self.cluster
        cluster.set_configuration_options(values={"hinted_handoff_enabled": False})

        if DISABLE_VNODES:
            cluster.populate(2).start()
        else:
            tokens = cluster.balanced_tokens(2)
            cluster.populate(2, tokens=tokens).start()
        [node1, node2] = cluster.nodelist()

        cursor = self.patient_cql_connection(node1).cursor()
        self.create_ks(cursor, "ks", 2)
        create_c1c2_table(self, cursor, read_repair=1.0)

        node2.stop(wait_other_notice=True)

        for n in xrange(0, 10000):
            insert_c1c2(cursor, n, "ONE")

        node2.start(wait_other_notice=True)
        time.sleep(5)
        # query everything to cause RR
        for n in xrange(0, 10000):
            query_c1c2(cursor, n, "QUORUM")

        node1.stop(wait_other_notice=True)

        # Check node2 for all the keys that should have been repaired
        cursor = self.patient_cql_connection(node2, keyspace="ks").cursor()
        for n in xrange(0, 10000):
            query_c1c2(cursor, n, "ONE")
    def quorum_quorum_test(self):
        cluster = self.cluster

        cluster.populate(3).start()
        [node1, node2, node3] = cluster.nodelist()

        cursor1 = self.patient_cql_connection(node1).cursor()
        self.create_ks(cursor1, "ks", 3)
        create_c1c2_table(self, cursor1)

        cursor2 = self.patient_cql_connection(node2, "ks").cursor()

        # insert and get at CL.QUORUM
        for n in xrange(0, 100):
            insert_c1c2(cursor1, n, "QUORUM")
            query_c1c2(cursor2, n, "QUORUM")

        # shutdown a node an test again
        node3.stop(wait_other_notice=True)
        for n in xrange(100, 200):
            insert_c1c2(cursor1, n, "QUORUM")
            query_c1c2(cursor2, n, "QUORUM")

        # shutdown another node and test we get unavailabe exception
        node2.stop(wait_other_notice=True)
        assert_unavailable(insert_c1c2, cursor1, 200, "QUORUM")
    def blacklisted_directory_test(self):
        cluster = self.cluster
        cluster.set_datadir_count(3)
        cluster.populate(1)
        [node] = cluster.nodelist()
        remove_perf_disable_shared_mem(node)
        cluster.start(wait_for_binary_proto=True)

        session = self.patient_cql_connection(node)
        self.create_ks(session, 'ks', 1)
        create_c1c2_table(self, session)
        insert_c1c2(session, n=10000)
        node.flush()
        for k in xrange(0, 10000):
            query_c1c2(session, k)

        node.compact()
        mbean = make_mbean('db', type='BlacklistedDirectories')
        with JolokiaAgent(node) as jmx:
            jmx.execute_method(mbean, 'markUnwritable', [os.path.join(node.get_path(), 'data0')])

        for k in xrange(0, 10000):
            query_c1c2(session, k)

        node.nodetool('relocatesstables')

        for k in xrange(0, 10000):
            query_c1c2(session, k)
    def readrepair_test(self):
        cluster = self.cluster
        cluster.set_configuration_options(
            values={'hinted_handoff_enabled': False})

        if DISABLE_VNODES:
            cluster.populate(2).start()
        else:
            tokens = cluster.balanced_tokens(2)
            cluster.populate(2, tokens=tokens).start()
        node1, node2 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 2)
        create_c1c2_table(self, session, read_repair=1.0)

        node2.stop(wait_other_notice=True)

        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ONE)

        node2.start(wait_other_notice=True)

        # query everything to cause RR
        for n in xrange(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.QUORUM)

        node1.stop(wait_other_notice=True)

        # Check node2 for all the keys that should have been repaired
        session = self.patient_cql_connection(node2, keyspace='ks')
        for n in xrange(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.ONE)
    def quorum_available_during_failure_test(self):
        CL = ConsistencyLevel.QUORUM
        RF = 3

        debug("Creating a ring")
        cluster = self.cluster
        if DISABLE_VNODES:
            cluster.populate(3).start()
        else:
            tokens = cluster.balanced_tokens(3)
            cluster.populate(3, tokens=tokens).start()
        node1, node2, node3 = cluster.nodelist()

        debug("Set to talk to node 2")
        session = self.patient_cql_connection(node2)
        self.create_ks(session, 'ks', RF)
        create_c1c2_table(self, session)

        debug("Generating some data")
        insert_c1c2(session, n=100, consistency=CL)

        debug("Taking down node1")
        node1.stop(wait_other_notice=True)

        debug("Reading back data.")
        for n in xrange(100):
            query_c1c2(session, n, CL)
Exemple #17
0
    def _do_hinted_handoff(self, node1, node2, enabled):
        """
        Test that if we stop one node the other one
        will store hints only when hinted handoff is enabled
        """
        session = self.patient_exclusive_cql_connection(node1)
        self.create_ks(session, 'ks', 2)
        create_c1c2_table(self, session)

        node2.stop(wait_other_notice=True)

        insert_c1c2(session, n=100, consistency=ConsistencyLevel.ONE)

        log_mark = node1.mark_log()
        node2.start(wait_other_notice=True)

        if enabled:
            node1.watch_log_for(["Finished hinted"], from_mark=log_mark, timeout=120)

        node1.stop(wait_other_notice=True)

        # Check node2 for all the keys that should have been delivered via HH if enabled or not if not enabled
        session = self.patient_exclusive_cql_connection(node2, keyspace='ks')
        for n in xrange(0, 100):
            if enabled:
                query_c1c2(session, n, ConsistencyLevel.ONE)
            else:
                query_c1c2(session, n, ConsistencyLevel.ONE, tolerate_missing=True, must_be_missing=True)
    def blacklisted_directory_test(self):
        cluster = self.cluster
        cluster.set_datadir_count(3)
        cluster.populate(1)
        [node] = cluster.nodelist()
        remove_perf_disable_shared_mem(node)
        cluster.start(wait_for_binary_proto=True)

        session = self.patient_cql_connection(node)
        self.create_ks(session, 'ks', 1)
        create_c1c2_table(self, session)
        insert_c1c2(session, n=10000)
        node.flush()
        for k in xrange(0, 10000):
            query_c1c2(session, k)

        node.compact()
        mbean = make_mbean('db', type='BlacklistedDirectories')
        with JolokiaAgent(node) as jmx:
            jmx.execute_method(mbean, 'markUnwritable',
                               [os.path.join(node.get_path(), 'data0')])

        for k in xrange(0, 10000):
            query_c1c2(session, k)

        node.nodetool('relocatesstables')

        for k in xrange(0, 10000):
            query_c1c2(session, k)
    def quorum_available_during_failure_test(self):
        CL = ConsistencyLevel.QUORUM
        RF = 3

        debug("Creating a ring")
        cluster = self.cluster
        if DISABLE_VNODES:
            cluster.populate(3).start()
        else:
            tokens = cluster.balanced_tokens(3)
            cluster.populate(3, tokens=tokens).start()
        node1, node2, node3 = cluster.nodelist()
        cluster.start()

        debug("Set to talk to node 2")
        session = self.patient_cql_connection(node2)
        self.create_ks(session, "ks", RF)
        create_c1c2_table(self, session)

        debug("Generating some data")
        insert_c1c2(session, n=100, consistency=CL)

        debug("Taking down node1")
        node1.stop(wait_other_notice=True)

        debug("Reading back data.")
        for n in xrange(100):
            query_c1c2(session, n, CL)
    def hintedhandoff_test(self):
        cluster = self.cluster

        if DISABLE_VNODES:
            cluster.populate(2).start()
        else:
            tokens = cluster.balanced_tokens(2)
            cluster.populate(2, tokens=tokens).start()
        [node1, node2] = cluster.nodelist()

        cursor = self.patient_cql_connection(node1).cursor()
        self.create_ks(cursor, "ks", 2)
        create_c1c2_table(self, cursor)

        node2.stop(wait_other_notice=True)

        for n in xrange(0, 100):
            insert_c1c2(cursor, n, "ONE")

        log_mark = node1.mark_log()
        node2.start()
        node1.watch_log_for(["Finished hinted"], from_mark=log_mark, timeout=90)

        node1.stop(wait_other_notice=True)

        # Check node2 for all the keys that should have been delivered via HH
        cursor = self.patient_cql_connection(node2, keyspace="ks").cursor()
        for n in xrange(0, 100):
            query_c1c2(cursor, n, "ONE")
Exemple #21
0
    def consistent_reads_after_bootstrap_test(self):
        debug("Creating a ring")
        cluster = self.cluster
        cluster.set_configuration_options(values={
            'hinted_handoff_enabled':
            False,
            'write_request_timeout_in_ms':
            60000,
            'read_request_timeout_in_ms':
            60000,
            'dynamic_snitch_badness_threshold':
            0.0
        },
                                          batch_commitlog=True)

        cluster.populate(2).start()
        node1, node2 = cluster.nodelist()
        cluster.start(wait_for_binary_proto=True, wait_other_notice=True)

        debug("Set to talk to node 2")
        n2session = self.patient_cql_connection(node2)
        self.create_ks(n2session, 'ks', 2)
        create_c1c2_table(self, n2session)

        debug("Generating some data for all nodes")
        insert_c1c2(n2session,
                    keys=range(10, 20),
                    consistency=ConsistencyLevel.ALL)

        node1.flush()
        debug("Taking down node1")
        node1.stop(wait_other_notice=True)

        debug("Writing data to only node2")
        insert_c1c2(n2session,
                    keys=range(30, 1000),
                    consistency=ConsistencyLevel.ONE)
        node2.flush()

        debug("Restart node1")
        node1.start(wait_other_notice=True)

        debug("Bootstraping node3")
        node3 = new_node(cluster)
        node3.start(wait_for_binary_proto=True)

        n3session = self.patient_cql_connection(node3)
        n3session.execute("USE ks")
        debug("Checking that no data was lost")
        for n in xrange(10, 20):
            query_c1c2(n3session, n, ConsistencyLevel.ALL)

        for n in xrange(30, 1000):
            query_c1c2(n3session, n, ConsistencyLevel.ALL)
    def quorum_quorum_test(self):
        session, session2 = self.cl_cl_prepare(ConsistencyLevel.QUORUM, ConsistencyLevel.QUORUM)

        #Stop a node and retest
        self.cluster.nodelist()[2].stop()
        for n in xrange(0, 100):
            insert_c1c2(session, n, ConsistencyLevel.QUORUM)
            query_c1c2(session2, n, ConsistencyLevel.QUORUM)

        self.cluster.nodelist()[1].stop()
        assert_unavailable(insert_c1c2, session, 100, ConsistencyLevel.QUORUM)
    def _deprecated_repair_jmx(self, method, arguments):
        """
        * Launch a two node, two DC cluster
        * Create a keyspace and table
        * Insert some data
        * Call the deprecated repair JMX API based on the arguments passed into this method
        * Check the node log to see if the correct repair was performed based on the jmx args
        """
        cluster = self.cluster

        debug("Starting cluster..")
        cluster.populate([1, 1])
        node1, node2 = cluster.nodelist()
        remove_perf_disable_shared_mem(node1)
        cluster.start()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 2)
        self.create_cf(session,
                       'cf',
                       read_repair=0.0,
                       columns={
                           'c1': 'text',
                           'c2': 'text'
                       })

        insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL)

        # Run repair
        mbean = make_mbean('db', 'StorageService')
        with JolokiaAgent(node1) as jmx:
            # assert repair runs and returns valid cmd number
            self.assertEqual(jmx.execute_method(mbean, method, arguments), 1)
        # wait for log to start
        node1.watch_log_for("Starting repair command")
        # get repair parameters from the log
        l = node1.grep_log((
            "Starting repair command #1, repairing keyspace ks with repair options \(parallelism: (?P<parallelism>\w+), primary range: (?P<pr>\w+), "
            "incremental: (?P<incremental>\w+), job threads: (?P<jobs>\d+), ColumnFamilies: (?P<cfs>.+), dataCenters: (?P<dc>.+), "
            "hosts: (?P<hosts>.+), # of ranges: (?P<ranges>\d+)\)"))
        self.assertEqual(len(l), 1)
        line, m = l[0]
        return {
            "parallelism": m.group("parallelism"),
            "primary_range": m.group("pr"),
            "incremental": m.group("incremental"),
            "job_threads": m.group("jobs"),
            "column_families": m.group("cfs"),
            "data_centers": m.group("dc"),
            "hosts": m.group("hosts"),
            "ranges": m.group("ranges")
        }
    def consistent_reads_after_move_test(self):
        debug("Creating a ring")
        cluster = self.cluster
        cluster.set_configuration_options(values={
            'hinted_handoff_enabled':
            False,
            'write_request_timeout_in_ms':
            60000,
            'read_request_timeout_in_ms':
            60000,
            'dynamic_snitch_badness_threshold':
            0.0
        },
                                          batch_commitlog=True)

        cluster.populate(3, tokens=[0, 2**48, 2**62]).start()
        [node1, node2, node3] = cluster.nodelist()
        cluster.start()

        debug("Set to talk to node 2")
        n2cursor = self.patient_cql_connection(node2).cursor()
        self.create_ks(n2cursor, 'ks', 2)
        create_c1c2_table(self, n2cursor)

        debug("Generating some data for all nodes")
        for n in xrange(10, 20):
            insert_c1c2(n2cursor, n, 'ALL')

        node1.flush()
        debug("Taking down node1")
        node1.stop(wait_other_notice=True)

        debug("Writing data to node2")
        for n in xrange(30, 1000):
            insert_c1c2(n2cursor, n, 'ONE')
        node2.flush()

        debug("Restart node1")
        node1.start(wait_other_notice=True)

        debug("Move token on node3")
        node3.move(2)

        debug("Checking that no data was lost")
        for n in xrange(10, 20):
            query_c1c2(n2cursor, n, 'ALL')

        for n in xrange(30, 1000):
            query_c1c2(n2cursor, n, 'ALL')
    def one_all_test(self):
        session, session2 = self.cl_cl_prepare(ConsistencyLevel.ONE, ConsistencyLevel.ALL)

        #Stop a node and retest
        self.cluster.nodelist()[2].stop()
        for n in xrange(0, 100):
            insert_c1c2(session, n, ConsistencyLevel.ONE)
        assert_unavailable(query_c1c2, session2, 100, ConsistencyLevel.ALL)


        #Stop a node and retest
        self.cluster.nodelist()[1].stop()
        for n in xrange(0, 100):
            insert_c1c2(session, n, ConsistencyLevel.ONE)
        assert_unavailable(query_c1c2, session2, 100, ConsistencyLevel.ALL)
Exemple #26
0
    def non_local_read_test(self):
        """ This test reads from a coordinator we know has no copy of the data """
        cluster = self.cluster

        cluster.populate(3).start()
        node1, node2, node3 = cluster.nodelist()

        cursor = self.patient_cql_connection(node1)
        self.create_ks(cursor, 'ks', 2)
        create_c1c2_table(self, cursor)

        # insert and get at CL.QUORUM (since RF=2, node1 won't have all key locally)
        for n in xrange(0, 1000):
            tools.insert_c1c2(cursor, n, ConsistencyLevel.QUORUM)
            tools.query_c1c2(cursor, n, ConsistencyLevel.QUORUM)
Exemple #27
0
 def hintedhandoff_decom_test(self):
     self.cluster.populate(4).start(wait_for_binary_proto=True)
     [node1, node2, node3, node4] = self.cluster.nodelist()
     session = self.patient_cql_connection(node1)
     self.create_ks(session, 'ks', 2)
     create_c1c2_table(self, session)
     node4.stop(wait_other_notice=True)
     insert_c1c2(session, n=100, consistency=ConsistencyLevel.ONE)
     node1.decommission()
     node4.start(wait_for_binary_proto=True)
     node2.decommission()
     node3.decommission()
     time.sleep(5)
     for x in xrange(0, 100):
         query_c1c2(session, x, ConsistencyLevel.ONE)
Exemple #28
0
    def non_local_read_test(self):
        """ This test reads from a coordinator we know has no copy of the data """
        cluster = self.cluster

        cluster.populate(3).start()
        [node1, node2, node3] = cluster.nodelist()

        cursor = self.cql_connection(node1).cursor()
        self.create_ks(cursor, 'ks', 2)
        self.create_cf(cursor, 'cf')

        # insert and get at CL.QUORUM (since RF=2, node1 won't have all key locally)
        for n in xrange(0, 1000):
            tools.insert_c1c2(cursor, n, "QUORUM")
            tools.query_c1c2(cursor, n, "QUORUM")
Exemple #29
0
    def non_local_read_test(self):
        """ This test reads from a coordinator we know has no copy of the data """
        cluster = self.cluster

        cluster.populate(3).start()
        [node1, node2, node3] = cluster.nodelist()

        cursor = self.cql_connection(node1).cursor()
        self.create_ks(cursor, 'ks', 2)
        self.create_cf(cursor, 'cf')

        # insert and get at CL.QUORUM (since RF=2, node1 won't have all key locally)
        for n in xrange(0, 1000):
            tools.insert_c1c2(cursor, n, "QUORUM")
            tools.query_c1c2(cursor, n, "QUORUM")
 def hintedhandoff_decom_test(self):
     self.cluster.populate(4).start(wait_for_binary_proto=True)
     [node1, node2, node3, node4] = self.cluster.nodelist()
     session = self.patient_cql_connection(node1)
     self.create_ks(session, 'ks', 2)
     create_c1c2_table(self, session)
     node4.stop(wait_other_notice=True)
     insert_c1c2(session, n=100, consistency=ConsistencyLevel.ONE)
     node1.decommission()
     node4.start(wait_for_binary_proto=True)
     node2.decommission()
     node3.decommission()
     time.sleep(5)
     for x in xrange(0, 100):
         query_c1c2(session, x, ConsistencyLevel.ONE)
Exemple #31
0
    def non_local_read_test(self):
        """ This test reads from a coordinator we know has no copy of the data """
        cluster = self.cluster

        cluster.populate(3).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 2)
        create_c1c2_table(self, session)

        # insert and get at CL.QUORUM (since RF=2, node1 won't have all key locally)
        tools.insert_c1c2(session, n=1000, consistency=ConsistencyLevel.QUORUM)
        for n in xrange(0, 1000):
            tools.query_c1c2(session, n, ConsistencyLevel.QUORUM)
    def one_one_test(self):
        session, session2 = self.cl_cl_prepare(
            ConsistencyLevel.ONE, ConsistencyLevel.ONE, tolerate_missing=True)

        #Stop a node and retest
        self.cluster.nodelist()[2].stop()
        for n in xrange(0, 100):
            insert_c1c2(session, n, ConsistencyLevel.ONE)
            query_c1c2(session2, n, ConsistencyLevel.ONE, tolerate_missing=True)


        #Stop a node and retest
        self.cluster.nodelist()[1].stop()
        for n in xrange(0, 100):
            insert_c1c2(session, n, ConsistencyLevel.ONE)
            query_c1c2(session2, n, ConsistencyLevel.ONE, tolerate_missing=False)
    def cl_cl_prepare(self, write_cl, read_cl, tolerate_missing=False):
        cluster = self.cluster

        cluster.populate(3).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 3)
        create_c1c2_table(self, session)

        session2 = self.patient_cql_connection(node2, 'ks')

        # insert and get at CL.QUORUM
        for n in xrange(0, 100):
            insert_c1c2(session, n, write_cl)
            query_c1c2(session2, n, read_cl, tolerate_missing)

        return session, session2
Exemple #34
0
    def movement_test(self):
        cluster = self.cluster

        # Create an unbalanced ring
        cluster.populate(3, tokens=[0, 2**48, 2**62]).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 1)
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=30000, consistency=ConsistencyLevel.ONE)

        cluster.flush()

        # Move nodes to balance the cluster
        def move_node(node, token, ip):
            mark = node.mark_log()
            node.move(token)  # can't assume 0 is balanced with m3p
            node.watch_log_for('{} state jump to NORMAL'.format(ip),
                               from_mark=mark,
                               timeout=180)
            time.sleep(3)

        balancing_tokens = cluster.balanced_tokens(3)

        move_node(node1, balancing_tokens[0], '127.0.0.1')
        move_node(node2, balancing_tokens[1], '127.0.0.2')
        move_node(node3, balancing_tokens[2], '127.0.0.3')

        time.sleep(1)

        cluster.cleanup()

        # Check we can get all the keys
        for n in xrange(0, 30000):
            query_c1c2(session, n, ConsistencyLevel.ONE)

        # Now the load should be basically even
        sizes = [node.data_size() for node in [node1, node2, node3]]

        assert_almost_equal(sizes[0], sizes[1])
        assert_almost_equal(sizes[0], sizes[2])
        assert_almost_equal(sizes[1], sizes[2])
    def consistent_reads_after_bootstrap_test(self):
        debug("Creating a ring")
        cluster = self.cluster
        cluster.set_configuration_options(values={'hinted_handoff_enabled': False, 'write_request_timeout_in_ms': 60000,
                                          'read_request_timeout_in_ms': 60000, 'dynamic_snitch_badness_threshold': 0.0}, batch_commitlog=True)

        cluster.populate(2).start()
        node1, node2 = cluster.nodelist()
        cluster.start(wait_for_binary_proto=True, wait_other_notice=True)

        debug("Set to talk to node 2")
        n2session = self.patient_cql_connection(node2)
        self.create_ks(n2session, 'ks', 2)
        create_c1c2_table(self, n2session)

        debug("Generating some data for all nodes")
        for n in xrange(10, 20):
            insert_c1c2(n2session, n, ConsistencyLevel.ALL)

        node1.flush()
        debug("Taking down node1")
        node1.stop(wait_other_notice=True)

        debug("Writing data to only node2")
        for n in xrange(30, 1000):
            insert_c1c2(n2session, n, ConsistencyLevel.ONE)
        node2.flush()

        debug("Restart node1")
        node1.start(wait_other_notice=True)

        debug("Boostraping node3")
        node3 = new_node(cluster)
        node3.start(wait_for_binary_proto=True)

        n3session = self.patient_cql_connection(node3)
        n3session.execute("USE ks")
        debug("Checking that no data was lost")
        for n in xrange(10, 20):
            query_c1c2(n3session, n, ConsistencyLevel.ALL)

        for n in xrange(30, 1000):
            query_c1c2(n3session, n, ConsistencyLevel.ALL)
    def movement_test(self):
        cluster = self.cluster

        # Create an unbalanced ring
        cluster.populate(3, tokens=[0, 2**48, 2**62]).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 1)
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ONE)

        cluster.flush()

        # Move nodes to balance the cluster
        def move_node(node, token, ip):
            mark = node.mark_log()
            node.move(token)  # can't assume 0 is balanced with m3p
            node.watch_log_for('{} state jump to NORMAL'.format(ip), from_mark=mark, timeout=180)
            time.sleep(3)

        balancing_tokens = cluster.balanced_tokens(3)

        move_node(node1, balancing_tokens[0], '127.0.0.1')
        move_node(node2, balancing_tokens[1], '127.0.0.2')
        move_node(node3, balancing_tokens[2], '127.0.0.3')

        time.sleep(1)

        cluster.cleanup()

        # Check we can get all the keys
        for n in xrange(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.ONE)

        # Now the load should be basically even
        sizes = [node.data_size() for node in [node1, node2, node3]]

        assert_almost_equal(sizes[0], sizes[1])
        assert_almost_equal(sizes[0], sizes[2])
        assert_almost_equal(sizes[1], sizes[2])
    def all_all_test(self):
        cluster = self.cluster

        cluster.populate(3).start()
        [node1, node2, node3] = cluster.nodelist()

        cursor1 = self.cql_connection(node1).cursor()
        self.create_ks(cursor1, 'ks', 3)
        create_c1c2_table(self, cursor1)

        cursor2 = self.cql_connection(node2, 'ks').cursor()

        # insert and get at CL.ALL
        for n in xrange(0, 100):
            insert_c1c2(cursor1, n, "ALL")
            query_c1c2(cursor2, n, "ALL")

        # shutdown one node and test we get unavailabe exception
        node3.stop(wait_other_notice=True)
        assert_unavailable(insert_c1c2, cursor1, 100, "ALL")
    def all_one_test(self):
        cluster = self.cluster

        cluster.populate(3).start()
        [node1, node2, node3] = cluster.nodelist()

        cursor1 = self.patient_cql_connection(node1).cursor()
        self.create_ks(cursor1, "ks", 3)
        create_c1c2_table(self, cursor1)

        cursor2 = self.patient_cql_connection(node2, "ks").cursor()

        # insert and get at CL.ONE
        for n in xrange(0, 100):
            insert_c1c2(cursor1, n, "ALL")
            query_c1c2(cursor2, n, "ONE")

        # shutdown a node an test again
        node3.stop(wait_other_notice=True)
        assert_unavailable(insert_c1c2, cursor1, 100, "ALL")
    def all_one_test(self):
        cluster = self.cluster

        cluster.populate(3).start()
        [node1, node2, node3] = cluster.nodelist()

        cursor1 = self.patient_cql_connection(node1).cursor()
        self.create_ks(cursor1, 'ks', 3)
        create_c1c2_table(self, cursor1)

        cursor2 = self.patient_cql_connection(node2, 'ks').cursor()

        # insert and get at CL.ONE
        for n in xrange(0, 100):
            insert_c1c2(cursor1, n, "ALL")
            query_c1c2(cursor2, n, "ONE")

        # shutdown a node an test again
        node3.stop(wait_other_notice=True)
        assert_unavailable(insert_c1c2, cursor1, 100, "ALL")
Exemple #40
0
    def tracing_from_system_traces_test(self):
        self.cluster.populate(1).start(wait_for_binary_proto=True)

        node1, = self.cluster.nodelist()

        session = self.patient_cql_connection(node1)

        self.create_ks(session, 'ks', 1)
        create_c1c2_table(self, session)

        for n in xrange(100):
            insert_c1c2(session, n)

        out, err = self.run_cqlsh(node1, 'TRACING ON; SELECT * FROM ks.cf')
        self.assertIn('Tracing session: ', out)

        out, err = self.run_cqlsh(node1, 'TRACING ON; SELECT * FROM system_traces.events')
        self.assertNotIn('Tracing session: ', out)

        out, err = self.run_cqlsh(node1, 'TRACING ON; SELECT * FROM system_traces.sessions')
        self.assertNotIn('Tracing session: ', out)
    def one_one_test(self):
        cluster = self.cluster

        cluster.populate(3).start()
        [node1, node2, node3] = cluster.nodelist()

        cursor1 = self.patient_cql_connection(node1).cursor()
        self.create_ks(cursor1, "ks", 3)
        create_c1c2_table(self, cursor1)

        cursor2 = self.patient_cql_connection(node2, "ks").cursor()

        # insert and get at CL.ONE
        for n in xrange(0, 100):
            insert_c1c2(cursor1, n, "ONE")
            retry_till_success(query_c1c2, cursor2, n, "ONE", timeout=5)

        # shutdown a node an test again
        node3.stop(wait_other_notice=True)
        for n in xrange(100, 200):
            insert_c1c2(cursor1, n, "ONE")
            retry_till_success(query_c1c2, cursor2, n, "ONE", timeout=5)

        # shutdown a second node an test again
        node2.stop(wait_other_notice=True)
        for n in xrange(200, 300):
            insert_c1c2(cursor1, n, "ONE")
            retry_till_success(query_c1c2, cursor1, n, "ONE", timeout=5)
Exemple #42
0
    def _populate_cluster(self, start=True):
        cluster = self.cluster

        # Disable hinted handoff and set batch commit log so this doesn't
        # interfere with the test (this must be after the populate)
        cluster.set_configuration_options(values={'hinted_handoff_enabled': False})
        cluster.set_batch_commitlog(enabled=True)
        debug("Starting cluster..")
        cluster.populate(3).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        session.cluster.default_retry_policy = FlakyRetryPolicy(max_retries=15)
        self.create_ks(session, 'ks', 3)
        self.create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'})

        # Insert 1000 keys, kill node 3, insert 1 key, restart node 3, insert 1000 more keys
        debug("Inserting data...")
        insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL)
        node3.flush()
        node3.stop(wait_other_notice=True)
        insert_c1c2(session, keys=(1000, ), consistency=ConsistencyLevel.TWO)
        node3.start(wait_other_notice=True, wait_for_binary_proto=True)
        insert_c1c2(session, keys=range(1001, 2001), consistency=ConsistencyLevel.ALL)

        cluster.flush()
    def one_one_test(self):
        cluster = self.cluster

        cluster.populate(3).start()
        [node1, node2, node3] = cluster.nodelist()

        cursor1 = self.patient_cql_connection(node1).cursor()
        self.create_ks(cursor1, 'ks', 3)
        create_c1c2_table(self, cursor1)

        cursor2 = self.patient_cql_connection(node2, 'ks').cursor()

        # insert and get at CL.ONE
        for n in xrange(0, 100):
            insert_c1c2(cursor1, n, "ONE")
            retry_till_success(query_c1c2, cursor2, n, "ONE", timeout=5)

        # shutdown a node an test again
        node3.stop(wait_other_notice=True)
        for n in xrange(100, 200):
            insert_c1c2(cursor1, n, "ONE")
            retry_till_success(query_c1c2, cursor2, n, "ONE", timeout=5)

        # shutdown a second node an test again
        node2.stop(wait_other_notice=True)
        for n in xrange(200, 300):
            insert_c1c2(cursor1, n, "ONE")
            retry_till_success(query_c1c2, cursor1, n, "ONE", timeout=5)
    def consistent_reads_after_move_test(self):
        debug("Creating a ring")
        cluster = self.cluster
        cluster.set_configuration_options(values={ 'hinted_handoff_enabled' : False, 'write_request_timeout_in_ms' : 60000, 'read_request_timeout_in_ms' : 60000, 'dynamic_snitch_badness_threshold' : 0.0}, batch_commitlog=True)

        cluster.populate(3, tokens=[0, 2**48, 2**62]).start()
        [node1, node2, node3] = cluster.nodelist()
        cluster.start()

        debug("Set to talk to node 2")
        n2cursor = self.patient_cql_connection(node2)
        self.create_ks(n2cursor, 'ks', 2)
        create_c1c2_table(self, n2cursor)

        debug("Generating some data for all nodes")
        for n in xrange(10,20):
            insert_c1c2(n2cursor, n, ConsistencyLevel.ALL)

        node1.flush()
        debug("Taking down node1")
        node1.stop(wait_other_notice=True)

        debug("Writing data to node2")
        for n in xrange(30,1000):
            insert_c1c2(n2cursor, n, ConsistencyLevel.ONE)
        node2.flush()

        debug("Restart node1")
        node1.start(wait_other_notice=True)

        debug("Move token on node3")
        node3.move(2)

        debug("Checking that no data was lost")
        for n in xrange(10,20):
            query_c1c2(n2cursor, n, ConsistencyLevel.ALL)

        for n in xrange(30,1000):
            query_c1c2(n2cursor, n, ConsistencyLevel.ALL)
    def multiple_repair_test(self):
        cluster = self.cluster
        cluster.populate(3).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 3)
        self.create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'})

        debug("insert data")

        insert_c1c2(session, keys=range(1, 50), consistency=ConsistencyLevel.ALL)
        node1.flush()

        debug("bringing down node 3")
        node3.flush()
        node3.stop(gently=False)

        debug("inserting additional data into node 1 and 2")
        insert_c1c2(session, keys=range(50, 100), consistency=ConsistencyLevel.TWO)
        node1.flush()
        node2.flush()

        debug("restarting and repairing node 3")
        node3.start(wait_for_binary_proto=True)

        if cluster.version() >= "2.2":
            node3.repair()
        else:
            node3.nodetool("repair -par -inc")

        # wait stream handlers to be closed on windows
        # after session is finished (See CASSANDRA-10644)
        if is_win:
            time.sleep(2)

        debug("stopping node 2")
        node2.stop(gently=False)

        debug("inserting data in nodes 1 and 3")
        insert_c1c2(session, keys=range(100, 150), consistency=ConsistencyLevel.TWO)
        node1.flush()
        node3.flush()

        debug("start and repair node 2")
        node2.start(wait_for_binary_proto=True)

        if cluster.version() >= "2.2":
            node2.repair()
        else:
            node2.nodetool("repair -par -inc")

        debug("replace node and check data integrity")
        node3.stop(gently=False)
        node5 = Node('node5', cluster, True, ('127.0.0.5', 9160), ('127.0.0.5', 7000), '7500', '0', None, ('127.0.0.5', 9042))
        cluster.add(node5, False)
        node5.start(replace_address='127.0.0.3', wait_other_notice=True)

        assert_one(session, "SELECT COUNT(*) FROM ks.cf LIMIT 200", [149])
Exemple #46
0
    def decommission_test(self):
        cluster = self.cluster

        tokens = cluster.balanced_tokens(4)
        cluster.populate(4, tokens=tokens).start()
        node1, node2, node3, node4 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 2)
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=30000, consistency=ConsistencyLevel.QUORUM)

        cluster.flush()
        sizes = [
            node.data_size() for node in cluster.nodelist()
            if node.is_running()
        ]
        init_size = sizes[0]
        assert_almost_equal(*sizes)

        time.sleep(.5)
        node4.decommission()
        node4.stop()
        cluster.cleanup()
        time.sleep(.5)

        # Check we can get all the keys
        for n in xrange(0, 30000):
            query_c1c2(session, n, ConsistencyLevel.QUORUM)

        sizes = [
            node.data_size() for node in cluster.nodelist()
            if node.is_running()
        ]
        debug(sizes)
        assert_almost_equal(sizes[0], sizes[1])
        assert_almost_equal((2.0 / 3.0) * sizes[0], sizes[2])
        assert_almost_equal(sizes[2], init_size)
    def movement_test(self):
        cluster = self.cluster

        # Create an unbalanced ring
        cluster.populate(3, tokens=[0, 2**48, 2**62]).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 1)
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ONE)

        cluster.flush()

        # Move nodes to balance the cluster
        balancing_tokens = cluster.balanced_tokens(3)
        escformat = '\\%s'
        if cluster.version() >= '2.1':
            escformat = '%s'
        node1.move(escformat % balancing_tokens[0])  # can't assume 0 is balanced with m3p
        node2.move(escformat % balancing_tokens[1])
        node3.move(escformat % balancing_tokens[2])
        time.sleep(1)

        cluster.cleanup()

        # Check we can get all the keys
        for n in xrange(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.ONE)

        # Now the load should be basically even
        sizes = [node.data_size() for node in [node1, node2, node3]]

        assert_almost_equal(sizes[0], sizes[1])
        assert_almost_equal(sizes[0], sizes[2])
        assert_almost_equal(sizes[1], sizes[2])
    def multiple_repair_test(self):
        cluster = self.cluster
        cluster.populate(3).start()
        [node1, node2, node3] = cluster.nodelist()

        cursor = self.patient_cql_connection(node1)
        self.create_ks(cursor, 'ks', 3)
        self.create_cf(cursor, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'})

        debug("insert data")

        for x in range(1, 50):
            insert_c1c2(cursor, x, ConsistencyLevel.ALL)
        node1.flush()

        debug("bringing down node 3")
        node3.flush()
        node3.stop(gently=False)

        debug("inserting additional data into node 1 and 2")
        for y in range(50, 100):
            insert_c1c2(cursor, y, ConsistencyLevel.TWO)
        node1.flush()
        node2.flush()

        debug("restarting and repairing node 3")
        node3.start()

        if cluster.version() >= "3.0":
            node3.repair()
        else:
            node3.nodetool("repair -par -inc")

        debug("stopping node 2")
        node2.stop(gently=False)

        debug("inserting data in nodes 1 and 3")
        for z in range(100, 150):
            insert_c1c2(cursor, z, ConsistencyLevel.TWO)
        node1.flush()
        node3.flush()

        debug("start and repair node 2")
        node2.start()

        if cluster.version() >= "3.0":
            node2.repair()
        else:
            node2.nodetool("repair -par -inc")

        debug("replace node and check data integrity")
        node3.stop(gently=False)
        node5 = Node('node5', cluster, True, ('127.0.0.5', 9160), ('127.0.0.5', 7000), '7500', '0', None, ('127.0.0.5',9042))
        cluster.add(node5, False)
        node5.start(replace_address = '127.0.0.3', wait_other_notice=True)

        assert_one(cursor, "SELECT COUNT(*) FROM ks.cf LIMIT 200", [149])
    def _setup_multi_dc(self):
        """
        Sets up 3 DCs (2 nodes in 'dc1', and one each in 'dc2' and 'dc3').
        After set up, node2 in dc1 lacks some data and needs to be repaired.
        """
        cluster = self.cluster

        # Disable hinted handoff and set batch commit log so this doesn't
        # interfer with the test (this must be after the populate)
        cluster.set_configuration_options(
            values={'hinted_handoff_enabled': False}, batch_commitlog=True)
        debug("Starting cluster..")
        # populate 2 nodes in dc1, and one node each in dc2 and dc3
        cluster.populate([2, 1, 1]).start()
        version = cluster.version()

        [node1, node2, node3, node4] = cluster.nodelist()
        session = self.patient_cql_connection(node1)
        session.execute(
            "CREATE KEYSPACE ks WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 2, 'dc2': 1, 'dc3':1};"
        )
        session.execute("USE ks")
        self.create_cf(session,
                       'cf',
                       read_repair=0.0,
                       columns={
                           'c1': 'text',
                           'c2': 'text'
                       })

        # Insert 1000 keys, kill node 3, insert 1 key, restart node 3, insert 1000 more keys
        debug("Inserting data...")
        insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL)
        node2.flush()
        node2.stop(wait_other_notice=True)
        insert_c1c2(session, keys=(1000, ), consistency=ConsistencyLevel.THREE)
        node2.start(wait_for_binary_proto=True, wait_other_notice=True)
        node1.watch_log_for_alive(node2)
        insert_c1c2(session,
                    keys=range(1001, 2001),
                    consistency=ConsistencyLevel.ALL)

        cluster.flush()

        # Verify that only node2 has only 2000 keys and others have 2001 keys
        debug("Checking data...")
        self.check_rows_on_node(node2, 2000, missings=[1000])
        for node in [node1, node3, node4]:
            self.check_rows_on_node(node, 2001, found=[1000])
        return cluster
Exemple #50
0
    def _setup_multi_dc(self):
        """
        Sets up 3 DCs (2 nodes in 'dc1', and one each in 'dc2' and 'dc3').
        After set up, node2 in dc1 lacks some data and needs to be repaired.
        """
        cluster = self.cluster

        # Disable hinted handoff and set batch commit log so this doesn't
        # interfer with the test (this must be after the populate)
        cluster.set_configuration_options(values={'hinted_handoff_enabled': False}, batch_commitlog=True)
        debug("Starting cluster..")
        # populate 2 nodes in dc1, and one node each in dc2 and dc3
        cluster.populate([2, 1, 1]).start()
        version = cluster.version()

        [node1, node2, node3, node4] = cluster.nodelist()
        session = self.patient_cql_connection(node1)
        session.execute("CREATE KEYSPACE ks WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 2, 'dc2': 1, 'dc3':1};")
        session.execute("USE ks")
        self.create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'})

        # Insert 1000 keys, kill node 3, insert 1 key, restart node 3, insert 1000 more keys
        debug("Inserting data...")
        for i in xrange(0, 1000):
            insert_c1c2(session, i, ConsistencyLevel.ALL)
        node2.flush()
        node2.stop()
        insert_c1c2(session, 1000, ConsistencyLevel.THREE)
        node2.start(wait_for_binary_proto=True, wait_other_notice=True)
        node1.watch_log_for_alive(node2)
        for i in xrange(1001, 2001):
            insert_c1c2(session, i, ConsistencyLevel.ALL)

        cluster.flush()

        # Verify that only node2 has only 2000 keys and others have 2001 keys
        debug("Checking data...")
        self.check_rows_on_node(node2, 2000, missings=[1000])
        for node in [node1, node3, node4]:
            self.check_rows_on_node(node, 2001, found=[1000])
        return cluster
Exemple #51
0
    def _simple_repair(self,
                       order_preserving_partitioner=False,
                       sequential=True):
        cluster = self.cluster

        if order_preserving_partitioner:
            cluster.set_partitioner(
                'org.apache.cassandra.dht.ByteOrderedPartitioner')

        # Disable hinted handoff and set batch commit log so this doesn't
        # interfer with the test (this must be after the populate)
        cluster.set_configuration_options(
            values={'hinted_handoff_enabled': False}, batch_commitlog=True)
        debug("Starting cluster..")
        cluster.populate(3).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 3)
        self.create_cf(session,
                       'cf',
                       read_repair=0.0,
                       columns={
                           'c1': 'text',
                           'c2': 'text'
                       })

        # Insert 1000 keys, kill node 3, insert 1 key, restart node 3, insert 1000 more keys
        debug("Inserting data...")
        insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL)
        node3.flush()
        node3.stop(wait_other_notice=True)
        insert_c1c2(session, keys=(1000, ), consistency=ConsistencyLevel.TWO)
        node3.start(wait_other_notice=True, wait_for_binary_proto=True)
        insert_c1c2(session,
                    keys=range(1001, 2001),
                    consistency=ConsistencyLevel.ALL)

        cluster.flush()

        # Verify that node3 has only 2000 keys
        debug("Checking data on node3...")
        self.check_rows_on_node(node3, 2000, missings=[1000])

        # Verify that node1 has 2001 keys
        debug("Checking data on node1...")
        self.check_rows_on_node(node1, 2001, found=[1000])

        # Verify that node2 has 2001 keys
        debug("Checking data on node2...")
        self.check_rows_on_node(node2, 2001, found=[1000])

        time.sleep(10)  # see CASSANDRA-4373
        # Run repair
        start = time.time()
        debug("starting repair...")
        node1.repair(self._repair_options(ks='ks', sequential=sequential))
        debug("Repair time: {end}".format(end=time.time() - start))

        # Validate that only one range was transfered
        out_of_sync_logs = node1.grep_log(
            "/([0-9.]+) and /([0-9.]+) have ([0-9]+) range\(s\) out of sync")
        if cluster.version() > "1":
            self.assertEqual(
                len(out_of_sync_logs), 2,
                "Lines matching: " + str([elt[0] for elt in out_of_sync_logs]))
        else:
            # In pre-1.0, we should have only one line
            self.assertEqual(
                len(out_of_sync_logs), 1,
                "Lines matching: " + str([elt[0] for elt in out_of_sync_logs]))
        valid = [(node1.address(), node3.address()),
                 (node3.address(), node1.address()),
                 (node2.address(), node3.address()),
                 (node3.address(), node2.address())]
        for line, m in out_of_sync_logs:
            self.assertEqual(
                int(m.group(3)), 1,
                "Expecting 1 range out of sync, got " + m.group(3))
            self.assertIn((m.group(1), m.group(2)), valid,
                          str((m.group(1), m.group(2))))
            valid.remove((m.group(1), m.group(2)))
            valid.remove((m.group(2), m.group(1)))

        # Check node3 now has the key
        self.check_rows_on_node(node3, 2001, found=[1000], restart=False)
    def decommission_test(self):
        cluster = self.cluster

        tokens = cluster.balanced_tokens(4)
        cluster.populate(4, tokens=tokens).start()
        node1, node2, node3, node4 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 2)
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.QUORUM)

        cluster.flush()
        sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()]
        init_size = sizes[0]
        assert_almost_equal(*sizes)

        time.sleep(.5)
        node4.decommission()
        node4.stop()
        cluster.cleanup()
        time.sleep(.5)

        # Check we can get all the keys
        for n in xrange(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.QUORUM)

        sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()]
        three_node_sizes = sizes
        assert_almost_equal(sizes[0], sizes[1])
        assert_almost_equal((2.0 / 3.0) * sizes[0], sizes[2])
        assert_almost_equal(sizes[2], init_size)

        if cluster.version() <= '1.2':
            node3.stop(wait_other_notice=True)
            node1.removeToken(tokens[2])
            time.sleep(.5)
            cluster.cleanup()
            time.sleep(.5)

            # Check we can get all the keys
            for n in xrange(0, 10000):
                query_c1c2(session, n, ConsistencyLevel.QUORUM)

            sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()]
            assert_almost_equal(*sizes)
            assert_almost_equal(sizes[0], 2 * init_size)

            node5 = new_node(cluster, token=(tokens[2] + 1)).start()
            time.sleep(.5)
            cluster.cleanup()
            time.sleep(.5)
            cluster.compact()
            time.sleep(.5)

            # Check we can get all the keys
            for n in xrange(0, 10000):
                query_c1c2(session, n, ConsistencyLevel.QUORUM)

            sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()]
            # We should be back to the earlir 3 nodes situation
            for i in xrange(0, len(sizes)):
                assert_almost_equal(sizes[i], three_node_sizes[i])
Exemple #53
0
    def simple_rebuild_test(self):
        """
        @jira_ticket CASSANDRA-9119

        Test rebuild from other dc works as expected.
        """

        keys = 1000

        cluster = self.cluster
        cluster.set_configuration_options(values={
            'endpoint_snitch':
            'org.apache.cassandra.locator.PropertyFileSnitch'
        })
        node1 = cluster.create_node('node1',
                                    False, ('127.0.0.1', 9160),
                                    ('127.0.0.1', 7000),
                                    '7100',
                                    '2000',
                                    None,
                                    binary_interface=('127.0.0.1', 9042))
        cluster.add(node1, True, data_center='dc1')

        # start node in dc1
        node1.start(wait_for_binary_proto=True)

        # populate data in dc1
        session = self.patient_exclusive_cql_connection(node1)
        self.create_ks(session, 'ks', {'dc1': 1})
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=keys, consistency=ConsistencyLevel.LOCAL_ONE)

        # check data
        for i in xrange(0, keys):
            query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE)
        session.shutdown()

        # Bootstrapping a new node in dc2 with auto_bootstrap: false
        node2 = cluster.create_node('node2',
                                    False, ('127.0.0.2', 9160),
                                    ('127.0.0.2', 7000),
                                    '7200',
                                    '2001',
                                    None,
                                    binary_interface=('127.0.0.2', 9042))
        cluster.add(node2, False, data_center='dc2')
        node2.start(wait_other_notice=True, wait_for_binary_proto=True)

        # wait for snitch to reload
        time.sleep(60)
        # alter keyspace to replicate to dc2
        session = self.patient_exclusive_cql_connection(node2)
        session.execute(
            "ALTER KEYSPACE ks WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};"
        )
        # alter system_auth -- rebuilding it no longer possible after
        # CASSANDRA-11848 prevented local node from being considered a source
        session.execute(
            "ALTER KEYSPACE system_auth WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};"
        )
        session.execute('USE ks')

        self.rebuild_errors = 0

        # rebuild dc2 from dc1
        def rebuild():
            try:
                node2.nodetool('rebuild dc1')
            except ToolError as e:
                if 'Node is still rebuilding' in e.stdout:
                    self.rebuild_errors += 1
                else:
                    raise e

        class Runner(Thread):
            def __init__(self, func):
                Thread.__init__(self)
                self.func = func
                self.thread_exc_info = None

            def run(self):
                """
                Closes over self to catch any exceptions raised by func and
                register them at self.thread_exc_info
                Based on http://stackoverflow.com/a/1854263
                """
                try:
                    self.func()
                except Exception:
                    import sys
                    self.thread_exc_info = sys.exc_info()

        cmd1 = Runner(rebuild)
        cmd1.start()

        # concurrent rebuild should not be allowed (CASSANDRA-9119)
        # (following sleep is needed to avoid conflict in 'nodetool()' method setting up env.)
        time.sleep(.1)
        # we don't need to manually raise exeptions here -- already handled
        rebuild()

        cmd1.join()

        # manually raise exception from cmd1 thread
        # see http://stackoverflow.com/a/1854263
        if cmd1.thread_exc_info is not None:
            raise cmd1.thread_exc_info[1], None, cmd1.thread_exc_info[2]

        # exactly 1 of the two nodetool calls should fail
        # usually it will be the one in the main thread,
        # but occasionally it wins the race with the one in the secondary thread,
        # so we check that one succeeded and the other failed
        self.assertEqual(
            self.rebuild_errors,
            1,
            msg=
            'rebuild errors should be 1, but found {}. Concurrent rebuild should not be allowed, but one rebuild command should have succeeded.'
            .format(self.rebuild_errors))

        # check data
        for i in xrange(0, keys):
            query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE)
    def taketoken_test(self):
        debug("Creating a ring")
        cluster = self.cluster
        cluster.set_configuration_options(values={
            'initial_token':
            None,
            'num_tokens':
            10,
            'hinted_handoff_enabled':
            False,
            'write_request_timeout_in_ms':
            60000,
            'read_request_timeout_in_ms':
            60000,
            'dynamic_snitch_badness_threshold':
            0.0
        },
                                          batch_commitlog=True)

        cluster.populate(3).start()
        [node1, node2, node3] = cluster.nodelist()
        cluster.start()

        debug("Set to talk to node 2")
        n2cursor = self.patient_cql_connection(node2).cursor()
        self.create_ks(n2cursor, 'ks', 2)
        create_c1c2_table(self, n2cursor)

        debug("Generating some data for all nodes")
        for n in xrange(10, 20):
            insert_c1c2(n2cursor, n, 'ALL')

        node1.flush()

        debug("Writing data to node2")
        for n in xrange(30, 1000):
            insert_c1c2(n2cursor, n, 'ONE')
        node2.flush()

        debug("Getting token from node 1")
        n1cursor = self.patient_cql_connection(node1).cursor()
        n1cursor.execute('SELECT tokens FROM system.local')
        n1tokens = n1cursor.fetchone()

        n3cursor = self.patient_cql_connection(node3).cursor()
        n3cursor.execute('SELECT tokens FROM system.local')
        n3tokens = n3cursor.fetchone()

        debug("Relocate tokens from node1 to node3")
        i = 0
        tl = ""
        for t in n1tokens[0]:
            if i == 8:
                break
            t = '\\%s' % t
            tl = "%s %s" % (tl, t)
            i += 1

        cmd = "taketoken %s" % tl
        debug(cmd)
        node3.nodetool(cmd)
        time.sleep(1)

        debug("Check that the tokens were really moved")
        n3cursor.execute('SELECT tokens FROM system.local')
        n3tokens = n3cursor.fetchone()

        n1cursor.execute('SELECT tokens FROM system.local')
        n1tokens = n1cursor.fetchone()

        debug("n1 %s n3 %s" % (n1tokens, n3tokens))
        assert len(n3tokens[0]) == 18
        assert len(n1tokens[0]) == 2

        debug("Checking that no data was lost")
        for n in xrange(10, 20):
            query_c1c2(n2cursor, n, 'ALL')

        for n in xrange(30, 1000):
            query_c1c2(n2cursor, n, 'ALL')
Exemple #55
0
    def _simple_repair(self,
                       order_preserving_partitioner=False,
                       sequential=True):
        """
        * Configure a three node cluster to not use hinted handoff, and to use batch commitlog
        * Launch the cluster
        * Create a keyspace at RF 3 and table
        * Insert one thousand rows at CL ALL
        * Flush on node3 and shut it down
        * Insert one row at CL TWO
        * Restart node3
        * Insert one thousand more rows at CL ALL
        * Flush all nodes
        * Check node3 only has 2000 keys
        * Check node1 and node2 have 2001 keys
        * Perform the repair type specified by the parent test
        * Assert the appropriate messages are logged
        * Assert node3 now has all data

        @jira_ticket CASSANDRA-4373
        """
        cluster = self.cluster

        if order_preserving_partitioner:
            cluster.set_partitioner(
                'org.apache.cassandra.dht.ByteOrderedPartitioner')

        # Disable hinted handoff and set batch commit log so this doesn't
        # interfere with the test (this must be after the populate)
        cluster.set_configuration_options(
            values={'hinted_handoff_enabled': False}, batch_commitlog=True)
        debug("Starting cluster..")
        cluster.populate(3).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 3)
        self.create_cf(session,
                       'cf',
                       read_repair=0.0,
                       columns={
                           'c1': 'text',
                           'c2': 'text'
                       })

        # Insert 1000 keys, kill node 3, insert 1 key, restart node 3, insert 1000 more keys
        debug("Inserting data...")
        insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL)
        node3.flush()
        node3.stop(wait_other_notice=True)
        insert_c1c2(session, keys=(1000, ), consistency=ConsistencyLevel.TWO)
        node3.start(wait_other_notice=True, wait_for_binary_proto=True)
        insert_c1c2(session,
                    keys=range(1001, 2001),
                    consistency=ConsistencyLevel.ALL)

        cluster.flush()

        # Verify that node3 has only 2000 keys
        debug("Checking data on node3...")
        self.check_rows_on_node(node3, 2000, missings=[1000])

        # Verify that node1 has 2001 keys
        debug("Checking data on node1...")
        self.check_rows_on_node(node1, 2001, found=[1000])

        # Verify that node2 has 2001 keys
        debug("Checking data on node2...")
        self.check_rows_on_node(node2, 2001, found=[1000])

        time.sleep(10)  # see CASSANDRA-4373
        # Run repair
        start = time.time()
        debug("starting repair...")
        node1.repair(
            _repair_options(self.cluster.version(),
                            ks='ks',
                            sequential=sequential))
        debug("Repair time: {end}".format(end=time.time() - start))

        # Validate that only one range was transfered
        out_of_sync_logs = node1.grep_log(
            "/([0-9.]+) and /([0-9.]+) have ([0-9]+) range\(s\) out of sync")

        self.assertEqual(
            len(out_of_sync_logs), 2,
            "Lines matching: " + str([elt[0] for elt in out_of_sync_logs]))

        valid_out_of_sync_pairs = [{node1.address(),
                                    node3.address()},
                                   {node2.address(),
                                    node3.address()}]

        for line, m in out_of_sync_logs:
            num_out_of_sync_ranges, out_of_sync_nodes = m.group(3), {
                m.group(1), m.group(2)
            }
            self.assertEqual(
                int(num_out_of_sync_ranges), 1,
                "Expecting 1 range out of sync for {}, but saw {}".format(
                    out_of_sync_nodes, line))
            self.assertIn(out_of_sync_nodes, valid_out_of_sync_pairs,
                          str(out_of_sync_nodes))

        # Check node3 now has the key
        self.check_rows_on_node(node3, 2001, found=[1000], restart=False)
    def multiple_repair_test(self):
        """
        * Launch a three node cluster
        * Create a keyspace with RF 3 and a table
        * Insert 49 rows
        * Stop node3
        * Insert 50 more rows
        * Restart node3
        * Issue an incremental repair on node3
        * Stop node2
        * Insert a final50 rows
        * Restart node2
        * Issue an incremental repair on node2
        * Replace node3 with a new node
        * Verify data integrity
        # TODO: Several more verifications of data need to be interspersed throughout the test. The final assertion is insufficient.
        @jira_ticket CASSANDRA-10644
        """
        cluster = self.cluster
        cluster.populate(3).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 3)
        self.create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'})

        debug("insert data")

        insert_c1c2(session, keys=range(1, 50), consistency=ConsistencyLevel.ALL)
        node1.flush()

        debug("bringing down node 3")
        node3.flush()
        node3.stop(gently=False)

        debug("inserting additional data into node 1 and 2")
        insert_c1c2(session, keys=range(50, 100), consistency=ConsistencyLevel.TWO)
        node1.flush()
        node2.flush()

        debug("restarting and repairing node 3")
        node3.start(wait_for_binary_proto=True)

        if cluster.version() >= "2.2":
            node3.repair()
        else:
            node3.nodetool("repair -par -inc")

        # wait stream handlers to be closed on windows
        # after session is finished (See CASSANDRA-10644)
        if is_win:
            time.sleep(2)

        debug("stopping node 2")
        node2.stop(gently=False)

        debug("inserting data in nodes 1 and 3")
        insert_c1c2(session, keys=range(100, 150), consistency=ConsistencyLevel.TWO)
        node1.flush()
        node3.flush()

        debug("start and repair node 2")
        node2.start(wait_for_binary_proto=True)

        if cluster.version() >= "2.2":
            node2.repair()
        else:
            node2.nodetool("repair -par -inc")

        debug("replace node and check data integrity")
        node3.stop(gently=False)
        node5 = Node('node5', cluster, True, ('127.0.0.5', 9160), ('127.0.0.5', 7000), '7500', '0', None, ('127.0.0.5', 9042))
        cluster.add(node5, False)
        node5.start(replace_address='127.0.0.3', wait_other_notice=True)

        assert_one(session, "SELECT COUNT(*) FROM ks.cf LIMIT 200", [149])
    def consistent_reads_after_relocate_test(self):
        debug("Creating a ring")
        cluster = self.cluster
        cluster.set_configuration_options(values={
            'initial_token':
            None,
            'num_tokens':
            10,
            'hinted_handoff_enabled':
            False,
            'write_request_timeout_in_ms':
            60000,
            'read_request_timeout_in_ms':
            60000,
            'dynamic_snitch_badness_threshold':
            0.0
        },
                                          batch_commitlog=True)

        cluster.populate(3).start()
        [node1, node2, node3] = cluster.nodelist()
        cluster.start()

        debug("Set to talk to node 2")
        n2cursor = self.patient_cql_connection(node2).cursor()
        self.create_ks(n2cursor, 'ks', 2)
        create_c1c2_table(self, n2cursor)

        debug("Generating some data for all nodes")
        for n in xrange(10, 20):
            insert_c1c2(n2cursor, n, 'ALL')

        node1.flush()
        debug("Taking down node1")
        node3.stop(wait_other_notice=True)

        debug("Writing data to node2")
        for n in xrange(30, 1000):
            insert_c1c2(n2cursor, n, 'ONE')
        node2.flush()

        debug("Restart node1")
        node3.start(wait_other_notice=True)

        debug("Getting token from node 1")
        n1cursor = self.patient_cql_connection(node1).cursor()
        n1cursor.execute('SELECT tokens FROM system.local')
        tokens = n1cursor.fetchone()

        debug("Relocate tokens from node1 to node3")
        tl = " ".join(str(t) for t in list(tokens[0])[:8])
        cmd = "taketoken %s" % tl
        debug(cmd)
        node3.nodetool(cmd)

        n1cursor.execute('SELECT tokens FROM system.local')
        tokens = n1cursor.fetchone()
        debug("%s" % tokens)
        assert len(tokens) == 2

        debug("Checking that no data was lost")
        for n in xrange(10, 20):
            query_c1c2(n2cursor, n, 'ALL')

        for n in xrange(30, 1000):
            query_c1c2(n2cursor, n, 'ALL')
    def simple_rebuild_test(self):
        """
        @jira_ticket CASSANDRA-9119

        Test rebuild from other dc works as expected.
        """

        keys = 1000

        cluster = self.cluster
        cluster.set_configuration_options(values={
            'endpoint_snitch':
            'org.apache.cassandra.locator.PropertyFileSnitch'
        })
        node1 = cluster.create_node('node1',
                                    False, ('127.0.0.1', 9160),
                                    ('127.0.0.1', 7000),
                                    '7100',
                                    '2000',
                                    None,
                                    binary_interface=('127.0.0.1', 9042))
        cluster.add(node1, True, data_center='dc1')

        # start node in dc1
        node1.start(wait_for_binary_proto=True)

        # populate data in dc1
        session = self.patient_exclusive_cql_connection(node1)
        self.create_ks(session, 'ks', {'dc1': 1})
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL)

        # check data
        for i in xrange(0, keys):
            query_c1c2(session, i, ConsistencyLevel.ALL)
        session.shutdown()

        # Bootstraping a new node in dc2 with auto_bootstrap: false
        node2 = cluster.create_node('node2',
                                    False, ('127.0.0.2', 9160),
                                    ('127.0.0.2', 7000),
                                    '7200',
                                    '2001',
                                    None,
                                    binary_interface=('127.0.0.2', 9042))
        cluster.add(node2, False, data_center='dc2')
        node2.start(wait_other_notice=True, wait_for_binary_proto=True)

        # wait for snitch to reload
        time.sleep(60)
        # alter keyspace to replicate to dc2
        session = self.patient_exclusive_cql_connection(node2)
        session.execute(
            "ALTER KEYSPACE ks WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};"
        )
        session.execute('USE ks')

        self.rebuild_errors = 0

        # rebuild dc2 from dc1
        def rebuild():
            try:
                node2.nodetool('rebuild dc1')
            except NodetoolError as e:
                if 'Node is still rebuilding' in e.message:
                    self.rebuild_errors += 1

        cmd1 = Thread(target=rebuild)
        cmd1.start()

        # concurrent rebuild should not be allowed (CASSANDRA-9119)
        # (following sleep is needed to avoid conflict in 'nodetool()' method setting up env.)
        time.sleep(.1)
        try:
            node2.nodetool('rebuild dc1')
        except NodetoolError:
            self.rebuild_errors += 1

        cmd1.join()

        # exactly 1 of the two nodetool calls should fail
        # usually it will be the one in the main thread,
        # but occasionally it wins the race with the one in the secondary thread,
        # so we check that one succeeded and the other failed
        self.assertEqual(
            self.rebuild_errors,
            1,
            msg=
            'concurrent rebuild should not be allowed, but one rebuild command should have succeeded.'
        )

        # check data
        for i in xrange(0, keys):
            query_c1c2(session, i, ConsistencyLevel.ALL)
Exemple #59
0
    def rebuild_ranges_test(self):
        """
        @jira_ticket CASSANDRA-10406
        """
        keys = 1000

        cluster = self.cluster
        tokens = cluster.balanced_tokens_across_dcs(['dc1', 'dc2'])
        cluster.set_configuration_options(values={
            'endpoint_snitch':
            'org.apache.cassandra.locator.PropertyFileSnitch'
        })
        cluster.set_configuration_options(values={'num_tokens': 1})
        node1 = cluster.create_node('node1',
                                    False, ('127.0.0.1', 9160),
                                    ('127.0.0.1', 7000),
                                    '7100',
                                    '2000',
                                    tokens[0],
                                    binary_interface=('127.0.0.1', 9042))
        node1.set_configuration_options(values={'initial_token': tokens[0]})
        cluster.add(node1, True, data_center='dc1')
        node1 = cluster.nodelist()[0]

        # start node in dc1
        node1.start(wait_for_binary_proto=True)

        # populate data in dc1
        session = self.patient_exclusive_cql_connection(node1)
        # ks1 will be rebuilt in node2
        self.create_ks(session, 'ks1', {'dc1': 1})
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL)
        # ks2 will not be rebuilt in node2
        self.create_ks(session, 'ks2', {'dc1': 1})
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL)
        session.shutdown()

        # Bootstraping a new node in dc2 with auto_bootstrap: false
        node2 = cluster.create_node('node2',
                                    False, ('127.0.0.2', 9160),
                                    ('127.0.0.2', 7000),
                                    '7200',
                                    '2001',
                                    tokens[1],
                                    binary_interface=('127.0.0.2', 9042))
        node2.set_configuration_options(values={'initial_token': tokens[1]})
        cluster.add(node2, False, data_center='dc2')
        node2.start(wait_other_notice=True, wait_for_binary_proto=True)

        # wait for snitch to reload
        time.sleep(60)
        # alter keyspace to replicate to dc2
        session = self.patient_exclusive_cql_connection(node2)
        session.execute(
            "ALTER KEYSPACE ks1 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};"
        )
        session.execute(
            "ALTER KEYSPACE ks2 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};"
        )
        session.execute('USE ks1')

        # rebuild only ks1 with range that is node1's replica
        node2.nodetool('rebuild -ks ks1 -ts (%s,%s] dc1' %
                       (tokens[1], str(pow(2, 63) - 1)))

        # check data is sent by stopping node1
        node1.stop()
        for i in xrange(0, keys):
            query_c1c2(session, i, ConsistencyLevel.ONE)
        # ks2 should not be streamed
        session.execute('USE ks2')
        for i in xrange(0, keys):
            query_c1c2(session,
                       i,
                       ConsistencyLevel.ONE,
                       tolerate_missing=True,
                       must_be_missing=True)