コード例 #1
0
    def blacklisted_directory_test(self):
        cluster = self.cluster
        cluster.set_datadir_count(3)
        cluster.populate(1)
        [node] = cluster.nodelist()
        remove_perf_disable_shared_mem(node)
        cluster.start(wait_for_binary_proto=True)

        session = self.patient_cql_connection(node)
        create_ks(session, 'ks', 1)
        create_c1c2_table(self, session)
        insert_c1c2(session, n=10000)
        node.flush()
        for k in xrange(0, 10000):
            query_c1c2(session, k)

        node.compact()
        mbean = make_mbean('db', type='BlacklistedDirectories')
        with JolokiaAgent(node) as jmx:
            jmx.execute_method(mbean, 'markUnwritable', [os.path.join(node.get_path(), 'data0')])

        for k in xrange(0, 10000):
            query_c1c2(session, k)

        node.nodetool('relocatesstables')

        for k in xrange(0, 10000):
            query_c1c2(session, k)
コード例 #2
0
    def test_blacklisted_directory(self):
        cluster = self.cluster
        cluster.set_datadir_count(3)
        cluster.populate(1)
        [node] = cluster.nodelist()
        cluster.start()

        session = self.patient_cql_connection(node)
        create_ks(session, 'ks', 1)
        create_c1c2_table(self, session)
        insert_c1c2(session, n=10000)
        node.flush()
        for k in range(0, 10000):
            query_c1c2(session, k)

        node.compact()
        mbean = make_mbean('db', type='BlacklistedDirectories')
        with JolokiaAgent(node) as jmx:
            jmx.execute_method(mbean, 'markUnwritable',
                               [os.path.join(node.get_path(), 'data1')])

        for k in range(0, 10000):
            query_c1c2(session, k)

        node.nodetool('relocatesstables')

        for k in range(0, 10000):
            query_c1c2(session, k)
コード例 #3
0
    def _do_hinted_handoff(self, node1, node2, enabled, keyspace='ks'):
        """
        Test that if we stop one node the other one
        will store hints only when hinted handoff is enabled
        """
        session = self.patient_exclusive_cql_connection(node1)
        create_ks(session, keyspace, 2)
        create_c1c2_table(self, session)

        node2.stop(wait_other_notice=True)

        insert_c1c2(session, n=100, consistency=ConsistencyLevel.ONE)

        log_mark = node1.mark_log()
        node2.start(wait_other_notice=True)

        if enabled:
            node1.watch_log_for(["Finished hinted"], from_mark=log_mark, timeout=120)

        node1.stop(wait_other_notice=True)

        # Check node2 for all the keys that should have been delivered via HH if enabled or not if not enabled
        session = self.patient_exclusive_cql_connection(node2, keyspace=keyspace)
        for n in xrange(0, 100):
            if enabled:
                query_c1c2(session, n, ConsistencyLevel.ONE)
            else:
                query_c1c2(session, n, ConsistencyLevel.ONE, tolerate_missing=True, must_be_missing=True)
コード例 #4
0
    def test_decommission(self):
        cluster = self.cluster

        tokens = cluster.balanced_tokens(4)
        cluster.populate(4, tokens=tokens).start()
        node1, node2, node3, node4 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 2)
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=30000, consistency=ConsistencyLevel.QUORUM)

        cluster.flush()
        sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()]
        init_size = sizes[0]
        assert_almost_equal(*sizes)

        time.sleep(.5)
        node4.decommission()
        node4.stop()
        cluster.cleanup()
        time.sleep(.5)

        # Check we can get all the keys
        for n in range(0, 30000):
            query_c1c2(session, n, ConsistencyLevel.QUORUM)

        sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()]
        logger.debug(sizes)
        assert_almost_equal(sizes[0], sizes[1])
        assert_almost_equal((2.0 / 3.0) * sizes[0], sizes[2])
        assert_almost_equal(sizes[2], init_size)
コード例 #5
0
    def test_move_single_node(self):
        """ Test moving a node in a single-node cluster (#4200) """
        cluster = self.cluster

        # Create an unbalanced ring
        cluster.populate(1, tokens=[0]).start()
        node1 = cluster.nodelist()[0]
        time.sleep(0.2)

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 1)
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ONE)

        cluster.flush()

        node1.move(2**25)
        time.sleep(1)

        cluster.cleanup()

        # Check we can get all the keys
        for n in range(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.ONE)
コード例 #6
0
    def quorum_available_during_failure_test(self):
        CL = ConsistencyLevel.QUORUM
        RF = 3

        debug("Creating a ring")
        cluster = self.cluster
        if DISABLE_VNODES:
            cluster.populate(3).start()
        else:
            tokens = cluster.balanced_tokens(3)
            cluster.populate(3, tokens=tokens).start()
        node1, node2, node3 = cluster.nodelist()

        debug("Set to talk to node 2")
        session = self.patient_cql_connection(node2)
        self.create_ks(session, 'ks', RF)
        create_c1c2_table(self, session)

        debug("Generating some data")
        insert_c1c2(session, n=100, consistency=CL)

        debug("Taking down node1")
        node1.stop(wait_other_notice=True)

        debug("Reading back data.")
        for n in xrange(100):
            query_c1c2(session, n, CL)
コード例 #7
0
    def _do_hinted_handoff(self, node1, node2, enabled, keyspace='ks'):
        """
        Test that if we stop one node the other one
        will store hints only when hinted handoff is enabled
        """
        session = self.patient_exclusive_cql_connection(node1)
        create_ks(session, keyspace, 2)
        create_c1c2_table(self, session)

        node2.stop(wait_other_notice=True)

        insert_c1c2(session, n=100, consistency=ConsistencyLevel.ONE)

        log_mark = node1.mark_log()
        node2.start()

        if enabled:
            node1.watch_log_for(["Finished hinted"],
                                from_mark=log_mark,
                                timeout=120)

        node1.stop(wait_other_notice=True)

        # Check node2 for all the keys that should have been delivered via HH if enabled or not if not enabled
        session = self.patient_exclusive_cql_connection(node2,
                                                        keyspace=keyspace)
        for n in range(0, 100):
            if enabled:
                query_c1c2(session, n, ConsistencyLevel.ONE)
            else:
                query_c1c2(session,
                           n,
                           ConsistencyLevel.ONE,
                           tolerate_missing=True,
                           must_be_missing=True)
コード例 #8
0
    def readrepair_test(self):
        cluster = self.cluster
        cluster.set_configuration_options(
            values={'hinted_handoff_enabled': False})

        if DISABLE_VNODES:
            cluster.populate(2).start()
        else:
            tokens = cluster.balanced_tokens(2)
            cluster.populate(2, tokens=tokens).start()
        node1, node2 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 2)
        create_c1c2_table(self, session, read_repair=1.0)

        node2.stop(wait_other_notice=True)

        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ONE)

        node2.start(wait_other_notice=True)

        # query everything to cause RR
        for n in xrange(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.QUORUM)

        node1.stop(wait_other_notice=True)

        # Check node2 for all the keys that should have been repaired
        session = self.patient_cql_connection(node2, keyspace='ks')
        for n in xrange(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.ONE)
コード例 #9
0
    def quorum_available_during_failure_test(self):
        CL = ConsistencyLevel.QUORUM
        RF = 3

        debug("Creating a ring")
        cluster = self.cluster
        if DISABLE_VNODES:
            cluster.populate(3).start()
        else:
            tokens = cluster.balanced_tokens(3)
            cluster.populate(3, tokens=tokens).start()
        node1, node2, node3 = cluster.nodelist()

        debug("Set to talk to node 2")
        session = self.patient_cql_connection(node2)
        create_ks(session, 'ks', RF)
        create_c1c2_table(self, session)

        debug("Generating some data")
        insert_c1c2(session, n=100, consistency=CL)

        debug("Taking down node1")
        node1.stop(wait_other_notice=True)

        debug("Reading back data.")
        for n in xrange(100):
            query_c1c2(session, n, CL)
コード例 #10
0
    def readrepair_test(self):
        cluster = self.cluster
        cluster.set_configuration_options(values={'hinted_handoff_enabled': False})

        if DISABLE_VNODES:
            cluster.populate(2).start()
        else:
            tokens = cluster.balanced_tokens(2)
            cluster.populate(2, tokens=tokens).start()
        node1, node2 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 2)
        create_c1c2_table(self, session, read_repair=1.0)

        node2.stop(wait_other_notice=True)

        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ONE)

        node2.start(wait_for_binary_proto=True, wait_other_notice=True)

        # query everything to cause RR
        for n in xrange(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.QUORUM)

        node1.stop(wait_other_notice=True)

        # Check node2 for all the keys that should have been repaired
        session = self.patient_cql_connection(node2, keyspace='ks')
        for n in xrange(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.ONE)
コード例 #11
0
    def test_resumable_decommission(self):
        """
        @jira_ticket CASSANDRA-12008

        Test decommission operation is resumable
        """
        self.fixture_dtest_setup.ignore_log_patterns = [r'Streaming error occurred',
                                                        r'Error while decommissioning node',
                                                        r'Remote peer 127.0.0.2 failed stream session',
                                                        r'Remote peer 127.0.0.2:7000 failed stream session']
        cluster = self.cluster
        cluster.set_configuration_options(values={'stream_throughput_outbound_megabits_per_sec': 1})
        cluster.populate(3, install_byteman=True).start(wait_other_notice=True)
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node2)
        # reduce system_distributed RF to 2 so we don't require forceful decommission
        session.execute("ALTER KEYSPACE system_distributed WITH REPLICATION = {'class':'SimpleStrategy', 'replication_factor':'2'};")
        create_ks(session, 'ks', 2)
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ALL)

        # Execute first rebuild, should fail
        with pytest.raises(ToolError):
            if cluster.version() >= '4.0':
                script = ['./byteman/4.0/decommission_failure_inject.btm']
            else:
                script = ['./byteman/pre4.0/decommission_failure_inject.btm']
            node2.byteman_submit(script)
            node2.nodetool('decommission')

        # Make sure previous ToolError is due to decommission
        node2.watch_log_for('Error while decommissioning node')

        # Decommission again
        mark = node2.mark_log()
        node2.nodetool('decommission')

        # Check decommision is done and we skipped transfereed ranges
        node2.watch_log_for('DECOMMISSIONED', from_mark=mark)
        node2.grep_log("Skipping transferred range .* of keyspace ks, endpoint {}".format(node2.address_for_current_version_slashy()), filename='debug.log')

        # Check data is correctly forwarded to node1 and node3
        cluster.remove(node2)
        node3.stop(gently=False)
        session = self.patient_exclusive_cql_connection(node1)
        session.execute('USE ks')
        for i in range(0, 10000):
            query_c1c2(session, i, ConsistencyLevel.ONE)
        node1.stop(gently=False)
        node3.start()
        session.shutdown()
        mark = node3.mark_log()
        node3.watch_log_for('Starting listening for CQL clients', from_mark=mark)
        session = self.patient_exclusive_cql_connection(node3)
        session.execute('USE ks')
        for i in range(0, 10000):
            query_c1c2(session, i, ConsistencyLevel.ONE)
コード例 #12
0
    def resumable_decommission_test(self):
        """
        @jira_ticket CASSANDRA-12008

        Test decommission operation is resumable
        """
        self.ignore_log_patterns = [
            r'Streaming error occurred', r'Error while decommissioning node',
            r'Remote peer 127.0.0.2 failed stream session'
        ]
        cluster = self.cluster
        cluster.set_configuration_options(
            values={'stream_throughput_outbound_megabits_per_sec': 1})
        cluster.populate(3, install_byteman=True).start(wait_other_notice=True)
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node2)
        self.create_ks(session, 'ks', 2)
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ALL)

        # Execute first rebuild, should fail
        with self.assertRaises(ToolError):
            script = ['./byteman/decommission_failure_inject.btm']
            node2.byteman_submit(script)
            node2.nodetool('decommission')

        # Make sure previous ToolError is due to decommission
        node2.watch_log_for('Error while decommissioning node')

        # Decommission again
        mark = node2.mark_log()
        node2.nodetool('decommission')

        # Check decommision is done and we skipped transfereed ranges
        node2.watch_log_for('DECOMMISSIONED', from_mark=mark)
        node2.grep_log(
            "Skipping transferred range .* of keyspace ks, endpoint /127.0.0.3",
            filename='debug.log')

        # Check data is correctly forwarded to node1 and node3
        cluster.remove(node2)
        node3.stop(gently=False)
        session = self.patient_exclusive_cql_connection(node1)
        session.execute('USE ks')
        for i in xrange(0, 10000):
            query_c1c2(session, i, ConsistencyLevel.ONE)
        node1.stop(gently=False)
        node3.start()
        session.shutdown()
        mark = node3.mark_log()
        node3.watch_log_for('Starting listening for CQL clients',
                            from_mark=mark)
        session = self.patient_exclusive_cql_connection(node3)
        session.execute('USE ks')
        for i in xrange(0, 10000):
            query_c1c2(session, i, ConsistencyLevel.ONE)
コード例 #13
0
    def test_consistent_reads_after_bootstrap(self):
        logger.debug("Creating a ring")
        cluster = self.cluster
        cluster.set_configuration_options(
            values={
                'hinted_handoff_enabled': False,
                'write_request_timeout_in_ms': 60000,
                'read_request_timeout_in_ms': 60000,
                'dynamic_snitch_badness_threshold': 0.0
            })
        cluster.set_batch_commitlog(enabled=True)

        cluster.populate(2)
        node1, node2 = cluster.nodelist()
        cluster.start(wait_for_binary_proto=True, wait_other_notice=True)

        logger.debug("Set to talk to node 2")
        n2session = self.patient_cql_connection(node2)
        create_ks(n2session, 'ks', 2)
        create_c1c2_table(self, n2session)

        logger.debug("Generating some data for all nodes")
        insert_c1c2(n2session,
                    keys=list(range(10, 20)),
                    consistency=ConsistencyLevel.ALL)

        node1.flush()
        logger.debug("Taking down node1")
        node1.stop(wait_other_notice=True)

        logger.debug("Writing data to only node2")
        insert_c1c2(n2session,
                    keys=list(range(30, 1000)),
                    consistency=ConsistencyLevel.ONE)
        node2.flush()

        logger.debug("Restart node1")
        node1.start(wait_other_notice=True)

        logger.debug("Bootstraping node3")
        node3 = new_node(cluster)
        node3.start(wait_for_binary_proto=True)

        n3session = self.patient_cql_connection(node3)
        n3session.execute("USE ks")
        logger.debug("Checking that no data was lost")
        for n in range(10, 20):
            query_c1c2(n3session, n, ConsistencyLevel.ALL)

        for n in range(30, 1000):
            query_c1c2(n3session, n, ConsistencyLevel.ALL)
コード例 #14
0
    def _deprecated_repair_jmx(self, method, arguments):
        """
        * Launch a two node, two DC cluster
        * Create a keyspace and table
        * Insert some data
        * Call the deprecated repair JMX API based on the arguments passed into this method
        * Check the node log to see if the correct repair was performed based on the jmx args
        """
        cluster = self.cluster

        logger.debug("Starting cluster..")
        cluster.populate([1, 1])
        node1, node2 = cluster.nodelist()
        remove_perf_disable_shared_mem(node1)
        cluster.start()
        supports_pull_repair = cluster.version() >= LooseVersion('3.10')

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 2)
        create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL)

        # Run repair
        mbean = make_mbean('db', 'StorageService')
        with JolokiaAgent(node1) as jmx:
            # assert repair runs and returns valid cmd number
            assert jmx.execute_method(mbean, method, arguments) == 1
        # wait for log to start
        node1.watch_log_for("Starting repair command")
        # get repair parameters from the log
        line = node1.grep_log(("Starting repair command #1" + (" \([^\)]+\)" if cluster.version() >= LooseVersion("3.10") else "") +
                               ", repairing keyspace ks with repair options \(parallelism: (?P<parallelism>\w+), primary range: (?P<pr>\w+), "
                               "incremental: (?P<incremental>\w+), job threads: (?P<jobs>\d+), ColumnFamilies: (?P<cfs>.+), dataCenters: (?P<dc>.+), "
                               "hosts: (?P<hosts>.+), # of ranges: (?P<ranges>\d+)(, pull repair: (?P<pullrepair>true|false))?\)"))

        assert_length_equal(line, 1)
        line, m = line[0]

        if supports_pull_repair:
            assert m.group("pullrepair"), "false" == "Pull repair cannot be enabled through the deprecated API so the pull repair option should always be false."

        return {"parallelism": m.group("parallelism"),
                "primary_range": m.group("pr"),
                "incremental": m.group("incremental"),
                "job_threads": m.group("jobs"),
                "column_families": m.group("cfs"),
                "data_centers": m.group("dc"),
                "hosts": m.group("hosts"),
                "ranges": m.group("ranges")}
コード例 #15
0
    def resumable_decommission_test(self):
        """
        @jira_ticket CASSANDRA-12008

        Test decommission operation is resumable
        """
        self.ignore_log_patterns = [r'Streaming error occurred', r'Error while decommissioning node', r'Remote peer 127.0.0.2 failed stream session']
        cluster = self.cluster
        cluster.set_configuration_options(values={'stream_throughput_outbound_megabits_per_sec': 1})
        cluster.populate(3, install_byteman=True).start(wait_other_notice=True)
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node2)
        create_ks(session, 'ks', 2)
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ALL)

        # Execute first rebuild, should fail
        with self.assertRaises(ToolError):
            script = ['./byteman/decommission_failure_inject.btm']
            node2.byteman_submit(script)
            node2.nodetool('decommission')

        # Make sure previous ToolError is due to decommission
        node2.watch_log_for('Error while decommissioning node')

        # Decommission again
        mark = node2.mark_log()
        node2.nodetool('decommission')

        # Check decommision is done and we skipped transfereed ranges
        node2.watch_log_for('DECOMMISSIONED', from_mark=mark)
        node2.grep_log("Skipping transferred range .* of keyspace ks, endpoint /127.0.0.3", filename='debug.log')

        # Check data is correctly forwarded to node1 and node3
        cluster.remove(node2)
        node3.stop(gently=False)
        session = self.patient_exclusive_cql_connection(node1)
        session.execute('USE ks')
        for i in xrange(0, 10000):
            query_c1c2(session, i, ConsistencyLevel.ONE)
        node1.stop(gently=False)
        node3.start()
        session.shutdown()
        mark = node3.mark_log()
        node3.watch_log_for('Starting listening for CQL clients', from_mark=mark)
        session = self.patient_exclusive_cql_connection(node3)
        session.execute('USE ks')
        for i in xrange(0, 10000):
            query_c1c2(session, i, ConsistencyLevel.ONE)
コード例 #16
0
    def test_movement(self):
        cluster = self.cluster

        # Create an unbalanced ring
        cluster.populate(3, tokens=[0, 2**48, 2**62]).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 1)
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=30000, consistency=ConsistencyLevel.ONE)

        cluster.flush()

        # Move nodes to balance the cluster
        def move_node(node, token):
            mark = node.mark_log()
            node.move(token)  # can't assume 0 is balanced with m3p
            node.watch_log_for('{} state jump to NORMAL'.format(
                node.address_for_current_version()),
                               from_mark=mark,
                               timeout=180)
            time.sleep(3)

        balancing_tokens = cluster.balanced_tokens(3)

        move_node(node1, balancing_tokens[0])
        move_node(node2, balancing_tokens[1])
        move_node(node3, balancing_tokens[2])

        time.sleep(1)
        cluster.cleanup()
        for node in cluster.nodelist():
            # after moving nodes we need to relocate any tokens in the wrong places, and after doing that
            # we might have overlapping tokens on the disks, so run a major compaction to get balance even
            if cluster.version() >= '3.2':
                node.nodetool("relocatesstables")
            node.nodetool("compact")

        # Check we can get all the keys
        for n in range(0, 30000):
            query_c1c2(session, n, ConsistencyLevel.ONE)

        # Now the load should be basically even
        sizes = [node.data_size() for node in [node1, node2, node3]]

        assert_almost_equal(sizes[0], sizes[1], error=0.05)
        assert_almost_equal(sizes[0], sizes[2], error=0.05)
        assert_almost_equal(sizes[1], sizes[2], error=0.05)
コード例 #17
0
    def _deprecated_repair_jmx(self, method, arguments):
        """
        * Launch a two node, two DC cluster
        * Create a keyspace and table
        * Insert some data
        * Call the deprecated repair JMX API based on the arguments passed into this method
        * Check the node log to see if the correct repair was performed based on the jmx args
        """
        cluster = self.cluster

        logger.debug("Starting cluster..")
        cluster.populate([1, 1])
        node1, node2 = cluster.nodelist()
        cluster.start()
        supports_pull_repair = cluster.version() >= LooseVersion('3.10')

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 2)
        create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL)

        # Run repair
        mbean = make_mbean('db', 'StorageService')
        with JolokiaAgent(node1) as jmx:
            # assert repair runs and returns valid cmd number
            assert jmx.execute_method(mbean, method, arguments) == 1
        # wait for log to start
        node1.watch_log_for("Starting repair command")
        # get repair parameters from the log
        line = node1.grep_log((r"Starting repair command #1" + (r" \([^\)]+\)" if cluster.version() >= LooseVersion("3.10") else "") +
                               r", repairing keyspace ks with repair options \(parallelism: (?P<parallelism>\w+), primary range: (?P<pr>\w+), "
                               r"incremental: (?P<incremental>\w+), job threads: (?P<jobs>\d+), ColumnFamilies: (?P<cfs>.+), dataCenters: (?P<dc>.+), "
                               r"hosts: (?P<hosts>.+), # of ranges: (?P<ranges>\d+)(, pull repair: (?P<pullrepair>true|false))?(, ignore unreplicated keyspaces: (?P<ignoreunrepl>true|false))?\)"))

        assert_length_equal(line, 1)
        line, m = line[0]

        if supports_pull_repair:
            assert m.group("pullrepair"), "false" == "Pull repair cannot be enabled through the deprecated API so the pull repair option should always be false."

        return {"parallelism": m.group("parallelism"),
                "primary_range": m.group("pr"),
                "incremental": m.group("incremental"),
                "job_threads": m.group("jobs"),
                "column_families": m.group("cfs"),
                "data_centers": m.group("dc"),
                "hosts": m.group("hosts"),
                "ranges": m.group("ranges")}
コード例 #18
0
    def test_non_local_read(self):
        """ This test reads from a coordinator we know has no copy of the data """
        cluster = self.cluster

        cluster.populate(3).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 2)
        create_c1c2_table(self, session)

        # insert and get at CL.QUORUM (since RF=2, node1 won't have all key locally)
        insert_c1c2(session, n=1000, consistency=ConsistencyLevel.QUORUM)
        for n in range(0, 1000):
            query_c1c2(session, n, ConsistencyLevel.QUORUM)
コード例 #19
0
 def hintedhandoff_decom_test(self):
     self.cluster.populate(4).start(wait_for_binary_proto=True)
     [node1, node2, node3, node4] = self.cluster.nodelist()
     session = self.patient_cql_connection(node1)
     create_ks(session, 'ks', 2)
     create_c1c2_table(self, session)
     node4.stop(wait_other_notice=True)
     insert_c1c2(session, n=100, consistency=ConsistencyLevel.ONE)
     node1.decommission()
     node4.start(wait_for_binary_proto=True)
     node2.decommission()
     node3.decommission()
     time.sleep(5)
     for x in xrange(0, 100):
         query_c1c2(session, x, ConsistencyLevel.ONE)
コード例 #20
0
 def hintedhandoff_decom_test(self):
     self.cluster.populate(4).start(wait_for_binary_proto=True)
     [node1, node2, node3, node4] = self.cluster.nodelist()
     session = self.patient_cql_connection(node1)
     self.create_ks(session, 'ks', 2)
     create_c1c2_table(self, session)
     node4.stop(wait_other_notice=True)
     insert_c1c2(session, n=100, consistency=ConsistencyLevel.ONE)
     node1.decommission()
     node4.start(wait_for_binary_proto=True)
     node2.decommission()
     node3.decommission()
     time.sleep(5)
     for x in xrange(0, 100):
         query_c1c2(session, x, ConsistencyLevel.ONE)
コード例 #21
0
    def test_non_local_read(self):
        """ This test reads from a coordinator we know has no copy of the data """
        cluster = self.cluster

        cluster.populate(3).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 2)
        create_c1c2_table(self, session)

        # insert and get at CL.QUORUM (since RF=2, node1 won't have all key locally)
        insert_c1c2(session, n=1000, consistency=ConsistencyLevel.QUORUM)
        for n in range(0, 1000):
            query_c1c2(session, n, ConsistencyLevel.QUORUM)
コード例 #22
0
    def test_movement(self):
        cluster = self.cluster

        # Create an unbalanced ring
        cluster.populate(3, tokens=[0, 2**48, 2**62]).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 1)
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=30000, consistency=ConsistencyLevel.ONE)

        cluster.flush()

        # Move nodes to balance the cluster
        def move_node(node, token):
            mark = node.mark_log()
            node.move(token)  # can't assume 0 is balanced with m3p
            node.watch_log_for('{} state jump to NORMAL'.format(node.address_for_current_version()), from_mark=mark, timeout=180)
            time.sleep(3)

        balancing_tokens = cluster.balanced_tokens(3)

        move_node(node1, balancing_tokens[0])
        move_node(node2, balancing_tokens[1])
        move_node(node3, balancing_tokens[2])

        time.sleep(1)
        cluster.cleanup()
        for node in cluster.nodelist():
            # after moving nodes we need to relocate any tokens in the wrong places, and after doing that
            # we might have overlapping tokens on the disks, so run a major compaction to get balance even
            if cluster.version() >= '3.2':
                node.nodetool("relocatesstables")
            node.nodetool("compact")

        # Check we can get all the keys
        for n in range(0, 30000):
            query_c1c2(session, n, ConsistencyLevel.ONE)

        # Now the load should be basically even
        sizes = [node.data_size() for node in [node1, node2, node3]]

        assert_almost_equal(sizes[0], sizes[1], error=0.05)
        assert_almost_equal(sizes[0], sizes[2], error=0.05)
        assert_almost_equal(sizes[1], sizes[2], error=0.05)
コード例 #23
0
    def test_consistent_reads_after_move(self):
        logger.debug("Creating a ring")
        cluster = self.cluster
        cluster.set_configuration_options(
            values={
                'hinted_handoff_enabled': False,
                'write_request_timeout_in_ms': 60000,
                'read_request_timeout_in_ms': 60000,
                'dynamic_snitch_badness_threshold': 0.0
            })
        cluster.set_batch_commitlog(enabled=True)

        cluster.populate(3, tokens=[0, 2**48, 2**62]).start()
        node1, node2, node3 = cluster.nodelist()

        logger.debug("Set to talk to node 2")
        n2session = self.patient_cql_connection(node2)
        create_ks(n2session, 'ks', 2)
        create_c1c2_table(self, n2session)

        logger.debug("Generating some data for all nodes")
        insert_c1c2(n2session,
                    keys=list(range(10, 20)),
                    consistency=ConsistencyLevel.ALL)

        node1.flush()
        logger.debug("Taking down node1")
        node1.stop(wait_other_notice=True)

        logger.debug("Writing data to node2")
        insert_c1c2(n2session,
                    keys=list(range(30, 1000)),
                    consistency=ConsistencyLevel.ONE)
        node2.flush()

        logger.debug("Restart node1")
        node1.start()

        logger.debug("Move token on node3")
        node3.move(2)

        logger.debug("Checking that no data was lost")
        for n in range(10, 20):
            query_c1c2(n2session, n, ConsistencyLevel.ALL, max_attempts=3)

        for n in range(30, 1000):
            query_c1c2(n2session, n, ConsistencyLevel.ALL, max_attempts=3)
コード例 #24
0
    def test_movement(self):
        cluster = self.cluster

        # Create an unbalanced ring
        cluster.populate(3, tokens=[0, 2**48, 2**62]).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 1)
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=30000, consistency=ConsistencyLevel.ONE)

        cluster.flush()

        # Move nodes to balance the cluster
        def move_node(node, token):
            mark = node.mark_log()
            node.move(token)  # can't assume 0 is balanced with m3p
            node.watch_log_for('{} state jump to NORMAL'.format(
                node.address_for_current_version()),
                               from_mark=mark,
                               timeout=180)
            time.sleep(3)

        balancing_tokens = cluster.balanced_tokens(3)

        move_node(node1, balancing_tokens[0])
        move_node(node2, balancing_tokens[1])
        move_node(node3, balancing_tokens[2])

        time.sleep(1)

        cluster.cleanup()

        # Check we can get all the keys
        for n in range(0, 30000):
            query_c1c2(session, n, ConsistencyLevel.ONE)

        # Now the load should be basically even
        sizes = [node.data_size() for node in [node1, node2, node3]]

        assert_almost_equal(sizes[0], sizes[1])
        assert_almost_equal(sizes[0], sizes[2])
        assert_almost_equal(sizes[1], sizes[2])
コード例 #25
0
    def test_consistent_reads_after_bootstrap(self):
        logger.debug("Creating a ring")
        cluster = self.cluster
        cluster.set_configuration_options(values={'hinted_handoff_enabled': False,
                                                  'write_request_timeout_in_ms': 60000,
                                                  'read_request_timeout_in_ms': 60000,
                                                  'dynamic_snitch_badness_threshold': 0.0})
        cluster.set_batch_commitlog(enabled=True)

        cluster.populate(2)
        node1, node2 = cluster.nodelist()
        cluster.start(wait_for_binary_proto=True, wait_other_notice=True)

        logger.debug("Set to talk to node 2")
        n2session = self.patient_cql_connection(node2)
        create_ks(n2session, 'ks', 2)
        create_c1c2_table(self, n2session)

        logger.debug("Generating some data for all nodes")
        insert_c1c2(n2session, keys=list(range(10, 20)), consistency=ConsistencyLevel.ALL)

        node1.flush()
        logger.debug("Taking down node1")
        node1.stop(wait_other_notice=True)

        logger.debug("Writing data to only node2")
        insert_c1c2(n2session, keys=list(range(30, 1000)), consistency=ConsistencyLevel.ONE)
        node2.flush()

        logger.debug("Restart node1")
        node1.start(wait_other_notice=True)

        logger.debug("Bootstraping node3")
        node3 = new_node(cluster)
        node3.start(wait_for_binary_proto=True)

        n3session = self.patient_cql_connection(node3)
        n3session.execute("USE ks")
        logger.debug("Checking that no data was lost")
        for n in range(10, 20):
            query_c1c2(n3session, n, ConsistencyLevel.ALL)

        for n in range(30, 1000):
            query_c1c2(n3session, n, ConsistencyLevel.ALL)
コード例 #26
0
    def _test_streaming(self, op_zerocopy, op_partial, num_partial, num_zerocopy,
                        compaction_strategy='LeveledCompactionStrategy', num_keys=1000, rf=3, num_nodes=3):
        keys = num_keys

        cluster = self.cluster
        tokens = cluster.balanced_tokens(num_nodes)
        cluster.set_configuration_options(values={'endpoint_snitch': 'org.apache.cassandra.locator.PropertyFileSnitch'})
        cluster.set_configuration_options(values={'num_tokens': 1})

        cluster.populate(num_nodes)
        nodes = cluster.nodelist()

        for i in range(0, len(nodes)):
            nodes[i].set_configuration_options(values={'initial_token': tokens[i]})

        cluster.start(wait_for_binary_proto=True)

        session = self.patient_cql_connection(nodes[0])

        create_ks(session, name='ks2', rf=rf)

        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'},
                  compaction_strategy=compaction_strategy)
        insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL)

        session_n2 = self.patient_exclusive_cql_connection(nodes[1])
        session_n2.execute("TRUNCATE system.available_ranges;")

        mark = nodes[1].mark_log()
        nodes[1].nodetool('rebuild -ks ks2')

        nodes[1].watch_log_for('Completed submission of build tasks', filename='debug.log', timeout=120)
        zerocopy_streamed_sstable = len(
            nodes[1].grep_log('.*CassandraEntireSSTableStreamReader.*?Finished receiving Data.*', filename='debug.log',
                              from_mark=mark))
        partial_streamed_sstable = len(
            nodes[1].grep_log('.*CassandraStreamReader.*?Finished receiving file.*', filename='debug.log',
                              from_mark=mark))

        assert op_zerocopy(zerocopy_streamed_sstable, num_zerocopy), "%s %s %s" % (num_zerocopy, opmap.get(op_zerocopy),
                                                                                   zerocopy_streamed_sstable)
        assert op_partial(partial_streamed_sstable, num_partial), "%s %s %s" % (num_partial, op_partial,
                                                                                partial_streamed_sstable)
コード例 #27
0
    def test_movement(self):
        cluster = self.cluster

        # Create an unbalanced ring
        cluster.populate(3, tokens=[0, 2**48, 2**62]).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 1)
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=30000, consistency=ConsistencyLevel.ONE)

        cluster.flush()

        # Move nodes to balance the cluster
        def move_node(node, token):
            mark = node.mark_log()
            node.move(token)  # can't assume 0 is balanced with m3p
            node.watch_log_for('{} state jump to NORMAL'.format(node.address_for_current_version()), from_mark=mark, timeout=180)
            time.sleep(3)

        balancing_tokens = cluster.balanced_tokens(3)

        move_node(node1, balancing_tokens[0])
        move_node(node2, balancing_tokens[1])
        move_node(node3, balancing_tokens[2])

        time.sleep(1)

        cluster.cleanup()

        # Check we can get all the keys
        for n in range(0, 30000):
            query_c1c2(session, n, ConsistencyLevel.ONE)

        # Now the load should be basically even
        sizes = [node.data_size() for node in [node1, node2, node3]]

        assert_almost_equal(sizes[0], sizes[1])
        assert_almost_equal(sizes[0], sizes[2])
        assert_almost_equal(sizes[1], sizes[2])
コード例 #28
0
    def concurrent_decommission_not_allowed_test(self):
        """
        Test concurrent decommission is not allowed
        """
        cluster = self.cluster
        cluster.set_configuration_options(
            values={'stream_throughput_outbound_megabits_per_sec': 1})
        cluster.populate(2).start(wait_other_notice=True)
        node1, node2 = cluster.nodelist()

        session = self.patient_cql_connection(node2)
        create_ks(session, 'ks', 1)
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ALL)

        mark = node2.mark_log()

        def decommission():
            node2.nodetool('decommission')

        # Launch first decommission in a external thread
        t = Thread(target=decommission)
        t.start()

        # Make sure first decommission is initialized before second decommission
        node2.watch_log_for('DECOMMISSIONING', filename='debug.log')

        # Launch a second decommission, should fail
        with self.assertRaises(ToolError):
            node2.nodetool('decommission')

        # Check data is correctly forwarded to node1 after node2 is decommissioned
        t.join()
        node2.watch_log_for('DECOMMISSIONED', from_mark=mark)
        session = self.patient_cql_connection(node1)
        session.execute('USE ks')
        for n in xrange(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.ONE)
コード例 #29
0
    def consistent_reads_after_move_test(self):
        debug("Creating a ring")
        cluster = self.cluster
        cluster.set_configuration_options(values={'hinted_handoff_enabled': False, 'write_request_timeout_in_ms': 60000,
                                                  'read_request_timeout_in_ms': 60000, 'dynamic_snitch_badness_threshold': 0.0})
        cluster.set_batch_commitlog(enabled=True)

        cluster.populate(3, tokens=[0, 2**48, 2**62]).start()
        node1, node2, node3 = cluster.nodelist()

        debug("Set to talk to node 2")
        n2session = self.patient_cql_connection(node2)
        create_ks(n2session, 'ks', 2)
        create_c1c2_table(self, n2session)

        debug("Generating some data for all nodes")
        insert_c1c2(n2session, keys=range(10, 20), consistency=ConsistencyLevel.ALL)

        node1.flush()
        debug("Taking down node1")
        node1.stop(wait_other_notice=True)

        debug("Writing data to node2")
        insert_c1c2(n2session, keys=range(30, 1000), consistency=ConsistencyLevel.ONE)
        node2.flush()

        debug("Restart node1")
        node1.start(wait_other_notice=True)

        debug("Move token on node3")
        node3.move(2)

        debug("Checking that no data was lost")
        for n in xrange(10, 20):
            query_c1c2(n2session, n, ConsistencyLevel.ALL)

        for n in xrange(30, 1000):
            query_c1c2(n2session, n, ConsistencyLevel.ALL)
コード例 #30
0
    def test_concurrent_decommission_not_allowed(self):
        """
        Test concurrent decommission is not allowed
        """
        cluster = self.cluster
        cluster.set_configuration_options(values={'stream_throughput_outbound_megabits_per_sec': 1})
        cluster.populate(2).start(wait_other_notice=True)
        node1, node2 = cluster.nodelist()

        session = self.patient_cql_connection(node2)
        create_ks(session, 'ks', 1)
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ALL)

        mark = node2.mark_log()

        def decommission():
            node2.nodetool('decommission')

        # Launch first decommission in a external thread
        t = Thread(target=decommission)
        t.start()

        # Make sure first decommission is initialized before second decommission
        node2.watch_log_for('DECOMMISSIONING', filename='debug.log')

        # Launch a second decommission, should fail
        with pytest.raises(ToolError):
            node2.nodetool('decommission')

        # Check data is correctly forwarded to node1 after node2 is decommissioned
        t.join()
        node2.watch_log_for('DECOMMISSIONED', from_mark=mark)
        session = self.patient_cql_connection(node1)
        session.execute('USE ks')
        for n in range(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.ONE)
コード例 #31
0
    def test_bloom_filter_false_ratio(self):
        """
        Test for CASSANDRA-15834

        Verifies if BloomFilterFalseRatio takes into account true negatives. Without this fix, the following
        scenario (many reads for non-existing rows) would yield BloomFilterFalseRatio=1.0. With the fix we assume
        it should be less then the default bloom_filter_fp_chance.
        """
        cluster = self.cluster
        cluster.populate(1)
        node = cluster.nodelist()[0]
        cluster.start(wait_for_binary_proto=True)

        session = self.patient_exclusive_cql_connection(node)

        keyspace = 'bloom_ratio_test_ks'
        create_ks(session, keyspace, 1)
        create_c1c2_table(self, session)
        insert_c1c2(session, n=10)
        node.nodetool("flush " + keyspace)

        for key in range(10000):
            session.execute("SELECT * from cf where key = '{0}'".format(key))

        bloom_filter_false_ratios = [
            make_mbean('metrics', type='Table', name='RecentBloomFilterFalseRatio'),
            make_mbean('metrics', type='Table', keyspace=keyspace, scope='cf', name='BloomFilterFalseRatio'),
            make_mbean('metrics', type='Table', name='BloomFilterFalseRatio'),
            make_mbean('metrics', type='Table', keyspace=keyspace, scope='cf', name='RecentBloomFilterFalseRatio'),
        ]

        with JolokiaAgent(node) as jmx:
            for metric in bloom_filter_false_ratios:
                ratio = jmx.read_attribute(metric, "Value")
                # Bloom filter false positive ratio should not be greater than the default bloom_filter_fp_chance.
                assert ratio < 0.01
コード例 #32
0
    def test_multiple_repair(self):
        """
        * Launch a three node cluster
        * Create a keyspace with RF 3 and a table
        * Insert 49 rows
        * Stop node3
        * Insert 50 more rows
        * Restart node3
        * Issue an incremental repair on node3
        * Stop node2
        * Insert a final50 rows
        * Restart node2
        * Issue an incremental repair on node2
        * Replace node3 with a new node
        * Verify data integrity
        # TODO: Several more verifications of data need to be interspersed throughout the test. The final assertion is insufficient.
        @jira_ticket CASSANDRA-10644
        """
        cluster = self.cluster
        cluster.populate(3).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 3)
        if cluster.version() < '4.0':
            create_cf(session,
                      'cf',
                      read_repair=0.0,
                      columns={
                          'c1': 'text',
                          'c2': 'text'
                      })
        else:
            create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        logger.debug("insert data")

        insert_c1c2(session,
                    keys=list(range(1, 50)),
                    consistency=ConsistencyLevel.ALL)
        node1.flush()

        logger.debug("bringing down node 3")
        node3.flush()
        node3.stop(gently=False)

        logger.debug("inserting additional data into node 1 and 2")
        insert_c1c2(session,
                    keys=list(range(50, 100)),
                    consistency=ConsistencyLevel.TWO)
        node1.flush()
        node2.flush()

        logger.debug("restarting and repairing node 3")
        node3.start(wait_for_binary_proto=True)

        if cluster.version() >= "2.2":
            node3.repair()
        else:
            node3.nodetool("repair -par -inc")

        # wait stream handlers to be closed on windows
        # after session is finished (See CASSANDRA-10644)
        if is_win:
            time.sleep(2)

        logger.debug("stopping node 2")
        node2.stop(gently=False)

        logger.debug("inserting data in nodes 1 and 3")
        insert_c1c2(session,
                    keys=list(range(100, 150)),
                    consistency=ConsistencyLevel.TWO)
        node1.flush()
        node3.flush()

        logger.debug("start and repair node 2")
        node2.start(wait_for_binary_proto=True)

        if cluster.version() >= "2.2":
            node2.repair()
        else:
            node2.nodetool("repair -par -inc")

        logger.debug("replace node and check data integrity")
        node3.stop(gently=False)
        node5 = Node('node5', cluster, True, ('127.0.0.5', 9160),
                     ('127.0.0.5', 7000), '7500', '0', None,
                     ('127.0.0.5', 9042))
        cluster.add(node5, False, data_center="dc1")
        node5.start(replace_address='127.0.0.3')

        assert_one(session, "SELECT COUNT(*) FROM ks.cf LIMIT 200", [149])
コード例 #33
0
    def rebuild_with_specific_sources_test(self):
        """
        @jira_ticket CASSANDRA-9875
        Verifies that an operator can specify specific sources to use
        when rebuilding.

        1. Set up a 2 node cluster across dc1 and dc2
        2. Create new keyspaces with replication factor 2 (one replica in each datacenter)
        4. Populate nodes with data
        5. Create a new node in dc3 and update the keyspace replication
        6. Run rebuild on the new node with a specific source in dc2
        7. Assert that streaming only occurred between the new node and the specified source
        8. Assert that the rebuild was successful by checking the data
        """
        keys = 1000

        cluster = self.cluster
        tokens = cluster.balanced_tokens_across_dcs(['dc1', 'dc2', 'dc3'])
        cluster.set_configuration_options(values={
            'endpoint_snitch':
            'org.apache.cassandra.locator.PropertyFileSnitch'
        })
        cluster.set_configuration_options(values={'num_tokens': 1})

        cluster.populate([1, 1], tokens=tokens[:2])
        node1, node2 = cluster.nodelist()

        cluster.start(wait_for_binary_proto=True)

        # populate data in dc1, dc2
        session = self.patient_exclusive_cql_connection(node1)
        # ks1 will be rebuilt in node3
        create_ks(session, 'ks1', {'dc1': 1, 'dc2': 1})
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL)
        # ks2 will not be rebuilt in node3
        create_ks(session, 'ks2', {'dc1': 1, 'dc2': 1})
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL)
        session.shutdown()

        # bootstrap a new node in dc3 with auto_bootstrap: false
        node3 = cluster.create_node('node3',
                                    False, ('127.0.0.3', 9160),
                                    ('127.0.0.3', 7000),
                                    '7300',
                                    '2002',
                                    tokens[2],
                                    binary_interface=('127.0.0.3', 9042))
        cluster.add(node3, False, data_center='dc3')
        node3.start(wait_other_notice=True, wait_for_binary_proto=True)

        # wait for snitch to reload
        time.sleep(60)
        # alter keyspace to replicate to dc3
        session = self.patient_exclusive_cql_connection(node3)
        session.execute(
            "ALTER KEYSPACE ks1 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1, 'dc3':1};"
        )
        session.execute(
            "ALTER KEYSPACE ks2 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1, 'dc3':1};"
        )
        session.execute('USE ks1')

        node2_address = node2.network_interfaces['binary'][0]
        node3_address = node3.network_interfaces['binary'][0]

        # rebuild only ks1, restricting the source to node2
        node3.nodetool('rebuild -ks ks1 -ts (%s,%s] -s %s' %
                       (tokens[2], str(pow(2, 63) - 1), node2_address))

        # verify that node2 streamed to node3
        log_matches = node2.grep_log('Session with /%s is complete' %
                                     node3_address)
        self.assertTrue(len(log_matches) > 0)

        # verify that node1 did not participate
        log_matches = node1.grep_log('streaming plan for Rebuild')
        self.assertEqual(len(log_matches), 0)

        # check data is sent by stopping node1, node2
        node1.stop()
        node2.stop()
        for i in xrange(0, keys):
            query_c1c2(session, i, ConsistencyLevel.ONE)
        # ks2 should not be streamed
        session.execute('USE ks2')
        for i in xrange(0, keys):
            query_c1c2(session,
                       i,
                       ConsistencyLevel.ONE,
                       tolerate_missing=True,
                       must_be_missing=True)
コード例 #34
0
    def rebuild_ranges_test(self):
        """
        @jira_ticket CASSANDRA-10406
        """
        keys = 1000

        cluster = self.cluster
        tokens = cluster.balanced_tokens_across_dcs(['dc1', 'dc2'])
        cluster.set_configuration_options(values={
            'endpoint_snitch':
            'org.apache.cassandra.locator.PropertyFileSnitch'
        })
        cluster.set_configuration_options(values={'num_tokens': 1})
        node1 = cluster.create_node('node1',
                                    False, ('127.0.0.1', 9160),
                                    ('127.0.0.1', 7000),
                                    '7100',
                                    '2000',
                                    tokens[0],
                                    binary_interface=('127.0.0.1', 9042))
        node1.set_configuration_options(values={'initial_token': tokens[0]})
        cluster.add(node1, True, data_center='dc1')
        node1 = cluster.nodelist()[0]

        # start node in dc1
        node1.start(wait_for_binary_proto=True)

        # populate data in dc1
        session = self.patient_exclusive_cql_connection(node1)
        # ks1 will be rebuilt in node2
        create_ks(session, 'ks1', {'dc1': 1})
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL)
        # ks2 will not be rebuilt in node2
        create_ks(session, 'ks2', {'dc1': 1})
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL)
        session.shutdown()

        # Bootstraping a new node in dc2 with auto_bootstrap: false
        node2 = cluster.create_node('node2',
                                    False, ('127.0.0.2', 9160),
                                    ('127.0.0.2', 7000),
                                    '7200',
                                    '2001',
                                    tokens[1],
                                    binary_interface=('127.0.0.2', 9042))
        node2.set_configuration_options(values={'initial_token': tokens[1]})
        cluster.add(node2, False, data_center='dc2')
        node2.start(wait_other_notice=True, wait_for_binary_proto=True)

        # wait for snitch to reload
        time.sleep(60)
        # alter keyspace to replicate to dc2
        session = self.patient_exclusive_cql_connection(node2)
        session.execute(
            "ALTER KEYSPACE ks1 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};"
        )
        session.execute(
            "ALTER KEYSPACE ks2 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};"
        )
        session.execute('USE ks1')

        # rebuild only ks1 with range that is node1's replica
        node2.nodetool('rebuild -ks ks1 -ts (%s,%s] dc1' %
                       (tokens[1], str(pow(2, 63) - 1)))

        # check data is sent by stopping node1
        node1.stop()
        for i in xrange(0, keys):
            query_c1c2(session, i, ConsistencyLevel.ONE)
        # ks2 should not be streamed
        session.execute('USE ks2')
        for i in xrange(0, keys):
            query_c1c2(session,
                       i,
                       ConsistencyLevel.ONE,
                       tolerate_missing=True,
                       must_be_missing=True)
コード例 #35
0
    def simple_rebuild_test(self):
        """
        @jira_ticket CASSANDRA-9119

        Test rebuild from other dc works as expected.
        """

        keys = 1000

        cluster = self.cluster
        cluster.set_configuration_options(values={
            'endpoint_snitch':
            'org.apache.cassandra.locator.PropertyFileSnitch'
        })
        node1 = cluster.create_node('node1',
                                    False,
                                    None, ('127.0.0.1', 7000),
                                    '7100',
                                    '2000',
                                    None,
                                    binary_interface=('127.0.0.1', 9042))
        cluster.add(node1, True, data_center='dc1')

        # start node in dc1
        node1.start(wait_for_binary_proto=True)

        # populate data in dc1
        session = self.patient_exclusive_cql_connection(node1)
        create_ks(session, 'ks', {'dc1': 1})
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=keys, consistency=ConsistencyLevel.LOCAL_ONE)

        # check data
        for i in xrange(0, keys):
            query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE)
        session.shutdown()

        # Bootstrapping a new node in dc2 with auto_bootstrap: false
        node2 = cluster.create_node('node2',
                                    False, ('127.0.0.2', 9160),
                                    ('127.0.0.2', 7000),
                                    '7200',
                                    '2001',
                                    None,
                                    binary_interface=('127.0.0.2', 9042))
        cluster.add(node2, False, data_center='dc2')
        node2.start(wait_other_notice=True, wait_for_binary_proto=True)

        # wait for snitch to reload
        time.sleep(60)
        # alter keyspace to replicate to dc2
        session = self.patient_exclusive_cql_connection(node2)
        session.execute(
            "ALTER KEYSPACE ks WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};"
        )
        if self.cluster.version() >= '2.2':
            # alter system_auth -- rebuilding it no longer possible after
            # CASSANDRA-11848 prevented local node from being considered a source
            # Only do this on 2.2+, because on 2.1, this keyspace only
            # exists if auth is enabled, which it isn't in this test
            session.execute(
                "ALTER KEYSPACE system_auth WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};"
            )
        session.execute('USE ks')

        self.rebuild_errors = 0

        # rebuild dc2 from dc1
        def rebuild():
            try:
                node2.nodetool('rebuild dc1')
            except ToolError as e:
                if 'Node is still rebuilding' in e.stdout:
                    self.rebuild_errors += 1
                else:
                    raise e

        class Runner(Thread):
            def __init__(self, func):
                Thread.__init__(self)
                self.func = func
                self.thread_exc_info = None

            def run(self):
                """
                Closes over self to catch any exceptions raised by func and
                register them at self.thread_exc_info
                Based on http://stackoverflow.com/a/1854263
                """
                try:
                    self.func()
                except Exception:
                    import sys
                    self.thread_exc_info = sys.exc_info()

        cmd1 = Runner(rebuild)
        cmd1.start()

        # concurrent rebuild should not be allowed (CASSANDRA-9119)
        # (following sleep is needed to avoid conflict in 'nodetool()' method setting up env.)
        time.sleep(.1)
        # we don't need to manually raise exeptions here -- already handled
        rebuild()

        cmd1.join()

        # manually raise exception from cmd1 thread
        # see http://stackoverflow.com/a/1854263
        if cmd1.thread_exc_info is not None:
            raise cmd1.thread_exc_info[1], None, cmd1.thread_exc_info[2]

        # exactly 1 of the two nodetool calls should fail
        # usually it will be the one in the main thread,
        # but occasionally it wins the race with the one in the secondary thread,
        # so we check that one succeeded and the other failed
        self.assertEqual(
            self.rebuild_errors,
            1,
            msg=
            'rebuild errors should be 1, but found {}. Concurrent rebuild should not be allowed, but one rebuild command should have succeeded.'
            .format(self.rebuild_errors))

        # check data
        for i in xrange(0, keys):
            query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE)
コード例 #36
0
    def resumable_rebuild_test(self):
        """
        @jira_ticket CASSANDRA-10810

        Test rebuild operation is resumable
        """
        self.ignore_log_patterns = list(self.ignore_log_patterns) + [
            r'Error while rebuilding node',
            r'Streaming error occurred on session with peer 127.0.0.3',
            r'Remote peer 127.0.0.3 failed stream session'
        ]
        cluster = self.cluster
        cluster.set_configuration_options(values={
            'endpoint_snitch':
            'org.apache.cassandra.locator.PropertyFileSnitch'
        })

        # Create 2 nodes on dc1
        node1 = cluster.create_node('node1',
                                    False, ('127.0.0.1', 9160),
                                    ('127.0.0.1', 7000),
                                    '7100',
                                    '2000',
                                    None,
                                    binary_interface=('127.0.0.1', 9042))
        node2 = cluster.create_node('node2',
                                    False, ('127.0.0.2', 9160),
                                    ('127.0.0.2', 7000),
                                    '7200',
                                    '2001',
                                    None,
                                    binary_interface=('127.0.0.2', 9042))

        cluster.add(node1, True, data_center='dc1')
        cluster.add(node2, True, data_center='dc1')

        node1.start(wait_for_binary_proto=True)
        node2.start(wait_for_binary_proto=True)

        # Insert data into node1 and node2
        session = self.patient_exclusive_cql_connection(node1)
        create_ks(session, 'ks', {'dc1': 1})
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ALL)
        key = list(range(10000, 20000))
        session = self.patient_exclusive_cql_connection(node2)
        session.execute('USE ks')
        insert_c1c2(session, keys=key, consistency=ConsistencyLevel.ALL)
        session.shutdown()

        # Create a new node3 on dc2
        node3 = cluster.create_node('node3',
                                    False, ('127.0.0.3', 9160),
                                    ('127.0.0.3', 7000),
                                    '7300',
                                    '2002',
                                    None,
                                    binary_interface=('127.0.0.3', 9042),
                                    byteman_port='8300')

        cluster.add(node3, False, data_center='dc2')

        node3.start(wait_other_notice=False, wait_for_binary_proto=True)

        # Wait for snitch to be refreshed
        time.sleep(5)

        # Alter necessary keyspace for rebuild operation
        session = self.patient_exclusive_cql_connection(node3)
        session.execute(
            "ALTER KEYSPACE ks WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};"
        )
        session.execute(
            "ALTER KEYSPACE system_auth WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};"
        )

        # Path to byteman script which makes the streaming to node2 throw an exception, making rebuild fail
        if cluster.version() < '4.0':
            script = ['./byteman/pre4.0/inject_failure_streaming_to_node2.btm']
        else:
            script = ['./byteman/4.0/inject_failure_streaming_to_node2.btm']
        node3.byteman_submit(script)

        # First rebuild must fail and data must be incomplete
        with self.assertRaises(ToolError, msg='Unexpected: SUCCEED'):
            debug('Executing first rebuild -> '),
            node3.nodetool('rebuild dc1')
        debug('Expected: FAILED')

        session.execute('USE ks')
        with self.assertRaises(AssertionError, msg='Unexpected: COMPLETE'):
            debug('Checking data is complete -> '),
            for i in xrange(0, 20000):
                query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE)
        debug('Expected: INCOMPLETE')

        debug('Executing second rebuild -> '),
        node3.nodetool('rebuild dc1')
        debug('Expected: SUCCEED')

        # Check all streaming sessions completed, streamed ranges are skipped and verify streamed data
        node3.watch_log_for('All sessions completed')
        node3.watch_log_for('Skipping streaming those ranges.')
        debug('Checking data is complete -> '),
        for i in xrange(0, 20000):
            query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE)
        debug('Expected: COMPLETE')
コード例 #37
0
    def rebuild_with_specific_sources_test(self):
        """
        @jira_ticket CASSANDRA-9875
        Verifies that an operator can specify specific sources to use
        when rebuilding.

        1. Set up a 2 node cluster across dc1 and dc2
        2. Create new keyspaces with replication factor 2 (one replica in each datacenter)
        4. Populate nodes with data
        5. Create a new node in dc3 and update the keyspace replication
        6. Run rebuild on the new node with a specific source in dc2
        7. Assert that streaming only occurred between the new node and the specified source
        8. Assert that the rebuild was successful by checking the data
        """
        keys = 1000

        cluster = self.cluster
        tokens = cluster.balanced_tokens_across_dcs(['dc1', 'dc2', 'dc3'])
        cluster.set_configuration_options(values={'endpoint_snitch': 'org.apache.cassandra.locator.PropertyFileSnitch'})
        cluster.set_configuration_options(values={'num_tokens': 1})

        cluster.populate([1, 1], tokens=tokens[:2])
        node1, node2 = cluster.nodelist()

        cluster.start(wait_for_binary_proto=True)

        # populate data in dc1, dc2
        session = self.patient_exclusive_cql_connection(node1)
        # ks1 will be rebuilt in node3
        create_ks(session, 'ks1', {'dc1': 1, 'dc2': 1})
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL)
        # ks2 will not be rebuilt in node3
        create_ks(session, 'ks2', {'dc1': 1, 'dc2': 1})
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL)
        session.shutdown()

        # bootstrap a new node in dc3 with auto_bootstrap: false
        node3 = cluster.create_node('node3', False,
                                    ('127.0.0.3', 9160),
                                    ('127.0.0.3', 7000),
                                    '7300', '2002', tokens[2],
                                    binary_interface=('127.0.0.3', 9042))
        cluster.add(node3, False, data_center='dc3')
        node3.start(wait_other_notice=True, wait_for_binary_proto=True)

        # wait for snitch to reload
        time.sleep(60)
        # alter keyspace to replicate to dc3
        session = self.patient_exclusive_cql_connection(node3)
        session.execute("ALTER KEYSPACE ks1 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1, 'dc3':1};")
        session.execute("ALTER KEYSPACE ks2 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1, 'dc3':1};")
        session.execute('USE ks1')

        node2_address = node2.network_interfaces['binary'][0]
        node3_address = node3.network_interfaces['binary'][0]

        # rebuild only ks1, restricting the source to node2
        node3.nodetool('rebuild -ks ks1 -ts (%s,%s] -s %s' % (tokens[2], str(pow(2, 63) - 1), node2_address))

        # verify that node2 streamed to node3
        log_matches = node2.grep_log('Session with /%s is complete' % node3_address)
        self.assertTrue(len(log_matches) > 0)

        # verify that node1 did not participate
        log_matches = node1.grep_log('streaming plan for Rebuild')
        self.assertEqual(len(log_matches), 0)

        # check data is sent by stopping node1, node2
        node1.stop()
        node2.stop()
        for i in xrange(0, keys):
            query_c1c2(session, i, ConsistencyLevel.ONE)
        # ks2 should not be streamed
        session.execute('USE ks2')
        for i in xrange(0, keys):
            query_c1c2(session, i, ConsistencyLevel.ONE, tolerate_missing=True, must_be_missing=True)
コード例 #38
0
    def multiple_repair_test(self):
        """
        * Launch a three node cluster
        * Create a keyspace with RF 3 and a table
        * Insert 49 rows
        * Stop node3
        * Insert 50 more rows
        * Restart node3
        * Issue an incremental repair on node3
        * Stop node2
        * Insert a final50 rows
        * Restart node2
        * Issue an incremental repair on node2
        * Replace node3 with a new node
        * Verify data integrity
        # TODO: Several more verifications of data need to be interspersed throughout the test. The final assertion is insufficient.
        @jira_ticket CASSANDRA-10644
        """
        cluster = self.cluster
        cluster.populate(3).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 3)
        create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'})

        debug("insert data")

        insert_c1c2(session, keys=range(1, 50), consistency=ConsistencyLevel.ALL)
        node1.flush()

        debug("bringing down node 3")
        node3.flush()
        node3.stop(gently=False)

        debug("inserting additional data into node 1 and 2")
        insert_c1c2(session, keys=range(50, 100), consistency=ConsistencyLevel.TWO)
        node1.flush()
        node2.flush()

        debug("restarting and repairing node 3")
        node3.start(wait_for_binary_proto=True)

        if cluster.version() >= "2.2":
            node3.repair()
        else:
            node3.nodetool("repair -par -inc")

        # wait stream handlers to be closed on windows
        # after session is finished (See CASSANDRA-10644)
        if is_win:
            time.sleep(2)

        debug("stopping node 2")
        node2.stop(gently=False)

        debug("inserting data in nodes 1 and 3")
        insert_c1c2(session, keys=range(100, 150), consistency=ConsistencyLevel.TWO)
        node1.flush()
        node3.flush()

        debug("start and repair node 2")
        node2.start(wait_for_binary_proto=True)

        if cluster.version() >= "2.2":
            node2.repair()
        else:
            node2.nodetool("repair -par -inc")

        debug("replace node and check data integrity")
        node3.stop(gently=False)
        node5 = Node('node5', cluster, True, ('127.0.0.5', 9160), ('127.0.0.5', 7000), '7500', '0', None, ('127.0.0.5', 9042))
        cluster.add(node5, False)
        node5.start(replace_address='127.0.0.3', wait_other_notice=True)

        assert_one(session, "SELECT COUNT(*) FROM ks.cf LIMIT 200", [149])
コード例 #39
0
    def simple_rebuild_test(self):
        """
        @jira_ticket CASSANDRA-9119

        Test rebuild from other dc works as expected.
        """

        keys = 1000

        cluster = self.cluster
        cluster.set_configuration_options(values={'endpoint_snitch': 'org.apache.cassandra.locator.PropertyFileSnitch'})
        node1 = cluster.create_node('node1', False,
                                    ('127.0.0.1', 9160),
                                    ('127.0.0.1', 7000),
                                    '7100', '2000', None,
                                    binary_interface=('127.0.0.1', 9042))
        cluster.add(node1, True, data_center='dc1')

        # start node in dc1
        node1.start(wait_for_binary_proto=True)

        # populate data in dc1
        session = self.patient_exclusive_cql_connection(node1)
        create_ks(session, 'ks', {'dc1': 1})
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=keys, consistency=ConsistencyLevel.LOCAL_ONE)

        # check data
        for i in xrange(0, keys):
            query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE)
        session.shutdown()

        # Bootstrapping a new node in dc2 with auto_bootstrap: false
        node2 = cluster.create_node('node2', False,
                                    ('127.0.0.2', 9160),
                                    ('127.0.0.2', 7000),
                                    '7200', '2001', None,
                                    binary_interface=('127.0.0.2', 9042))
        cluster.add(node2, False, data_center='dc2')
        node2.start(wait_other_notice=True, wait_for_binary_proto=True)

        # wait for snitch to reload
        time.sleep(60)
        # alter keyspace to replicate to dc2
        session = self.patient_exclusive_cql_connection(node2)
        session.execute("ALTER KEYSPACE ks WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};")
        if self.cluster.version() >= '2.2':
            # alter system_auth -- rebuilding it no longer possible after
            # CASSANDRA-11848 prevented local node from being considered a source
            # Only do this on 2.2+, because on 2.1, this keyspace only
            # exists if auth is enabled, which it isn't in this test
            session.execute("ALTER KEYSPACE system_auth WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};")
        session.execute('USE ks')

        self.rebuild_errors = 0

        # rebuild dc2 from dc1
        def rebuild():
            try:
                node2.nodetool('rebuild dc1')
            except ToolError as e:
                if 'Node is still rebuilding' in e.stdout:
                    self.rebuild_errors += 1
                else:
                    raise e

        class Runner(Thread):
            def __init__(self, func):
                Thread.__init__(self)
                self.func = func
                self.thread_exc_info = None

            def run(self):
                """
                Closes over self to catch any exceptions raised by func and
                register them at self.thread_exc_info
                Based on http://stackoverflow.com/a/1854263
                """
                try:
                    self.func()
                except Exception:
                    import sys
                    self.thread_exc_info = sys.exc_info()

        cmd1 = Runner(rebuild)
        cmd1.start()

        # concurrent rebuild should not be allowed (CASSANDRA-9119)
        # (following sleep is needed to avoid conflict in 'nodetool()' method setting up env.)
        time.sleep(.1)
        # we don't need to manually raise exeptions here -- already handled
        rebuild()

        cmd1.join()

        # manually raise exception from cmd1 thread
        # see http://stackoverflow.com/a/1854263
        if cmd1.thread_exc_info is not None:
            raise cmd1.thread_exc_info[1], None, cmd1.thread_exc_info[2]

        # exactly 1 of the two nodetool calls should fail
        # usually it will be the one in the main thread,
        # but occasionally it wins the race with the one in the secondary thread,
        # so we check that one succeeded and the other failed
        self.assertEqual(self.rebuild_errors, 1,
                         msg='rebuild errors should be 1, but found {}. Concurrent rebuild should not be allowed, but one rebuild command should have succeeded.'.format(self.rebuild_errors))

        # check data
        for i in xrange(0, keys):
            query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE)
コード例 #40
0
    def test_hintedhandoff_window(self):
        """
        Test that we only store at a maximum the hint window worth of hints.
        Prior to CASSANDRA-14309 we would store another window worth of hints
        if the down node was brought up and then taken back down immediately.
        We would also store another window of hints on a live node if the live
        node was restarted.
        @jira_ticket CASSANDRA-14309
        """

        # hint_window_persistent_enabled is set to true by default
        self.cluster.set_configuration_options({
            'max_hint_window_in_ms': 5000,
            'hinted_handoff_enabled': True,
            'max_hints_delivery_threads': 1,
            'hints_flush_period_in_ms': 100,
        })
        self.cluster.populate(2).start()
        node1, node2 = self.cluster.nodelist()
        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 2)
        create_c1c2_table(self, session)

        # Stop handoff until very end and take node2 down for first round of hints
        node1.nodetool('pausehandoff')
        node2.nodetool('disablebinary')
        node2.nodetool('disablegossip')
        # First round of hints. We expect these to be replayed and the only
        # hints within the window
        insert_c1c2(session, n=(0, 100), consistency=ConsistencyLevel.ONE)
        # Let hint window pass
        time.sleep(10)
        # Re-enable and disable the node. Prior to CASSANDRA-14215 this should make the hint window on node1 reset.
        node2.nodetool('enablegossip')
        node2.nodetool('disablegossip')
        # Second round of inserts. We do not expect hints to be stored.
        insert_c1c2(session, n=(100, 200), consistency=ConsistencyLevel.ONE)

        # Restart node1. Prior to CASSANDRA-14215 this would reset node1's hint window.
        node1.stop()
        node1.start(wait_for_binary_proto=True, wait_other_notice=False)
        session = self.patient_exclusive_cql_connection(node1)
        session.execute('USE ks')
        # Third round of inserts. We do not expect hints to be stored.
        insert_c1c2(session, n=(200, 300), consistency=ConsistencyLevel.ONE)

        # Enable node2 and wait for hints to be replayed
        node2.nodetool('enablegossip')
        node2.nodetool('enablebinary')
        node1.nodetool('resumehandoff')
        node1.watch_log_for('Finished hinted handoff')
        # Stop node1 so that we only query node2
        node1.stop()

        session = self.patient_exclusive_cql_connection(node2)
        session.execute('USE ks')
        # Ensure first dataset is present (through replayed hints)
        for x in range(0, 100):
            query_c1c2(session, x, ConsistencyLevel.ONE)

        # Ensure second and third datasets are not present
        for x in range(100, 300):
            query_c1c2(session,
                       x,
                       ConsistencyLevel.ONE,
                       tolerate_missing=True,
                       must_be_missing=True)
コード例 #41
0
    def rebuild_ranges_test(self):
        """
        @jira_ticket CASSANDRA-10406
        """
        keys = 1000

        cluster = self.cluster
        tokens = cluster.balanced_tokens_across_dcs(['dc1', 'dc2'])
        cluster.set_configuration_options(values={'endpoint_snitch': 'org.apache.cassandra.locator.PropertyFileSnitch'})
        cluster.set_configuration_options(values={'num_tokens': 1})
        node1 = cluster.create_node('node1', False,
                                    ('127.0.0.1', 9160),
                                    ('127.0.0.1', 7000),
                                    '7100', '2000', tokens[0],
                                    binary_interface=('127.0.0.1', 9042))
        node1.set_configuration_options(values={'initial_token': tokens[0]})
        cluster.add(node1, True, data_center='dc1')
        node1 = cluster.nodelist()[0]

        # start node in dc1
        node1.start(wait_for_binary_proto=True)

        # populate data in dc1
        session = self.patient_exclusive_cql_connection(node1)
        # ks1 will be rebuilt in node2
        create_ks(session, 'ks1', {'dc1': 1})
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL)
        # ks2 will not be rebuilt in node2
        create_ks(session, 'ks2', {'dc1': 1})
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL)
        session.shutdown()

        # Bootstraping a new node in dc2 with auto_bootstrap: false
        node2 = cluster.create_node('node2', False,
                                    ('127.0.0.2', 9160),
                                    ('127.0.0.2', 7000),
                                    '7200', '2001', tokens[1],
                                    binary_interface=('127.0.0.2', 9042))
        node2.set_configuration_options(values={'initial_token': tokens[1]})
        cluster.add(node2, False, data_center='dc2')
        node2.start(wait_other_notice=True, wait_for_binary_proto=True)

        # wait for snitch to reload
        time.sleep(60)
        # alter keyspace to replicate to dc2
        session = self.patient_exclusive_cql_connection(node2)
        session.execute("ALTER KEYSPACE ks1 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};")
        session.execute("ALTER KEYSPACE ks2 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};")
        session.execute('USE ks1')

        # rebuild only ks1 with range that is node1's replica
        node2.nodetool('rebuild -ks ks1 -ts (%s,%s] dc1' % (tokens[1], str(pow(2, 63) - 1)))

        # check data is sent by stopping node1
        node1.stop()
        for i in xrange(0, keys):
            query_c1c2(session, i, ConsistencyLevel.ONE)
        # ks2 should not be streamed
        session.execute('USE ks2')
        for i in xrange(0, keys):
            query_c1c2(session, i, ConsistencyLevel.ONE, tolerate_missing=True, must_be_missing=True)
コード例 #42
0
    def resumable_rebuild_test(self):
        """
        @jira_ticket CASSANDRA-10810

        Test rebuild operation is resumable
        """
        self.ignore_log_patterns = list(self.ignore_log_patterns) + [r'Error while rebuilding node',
                                                                     r'Streaming error occurred on session with peer 127.0.0.3',
                                                                     r'Remote peer 127.0.0.3 failed stream session']
        cluster = self.cluster
        cluster.set_configuration_options(values={'endpoint_snitch': 'org.apache.cassandra.locator.PropertyFileSnitch'})

        # Create 2 nodes on dc1
        node1 = cluster.create_node('node1', False,
                                    ('127.0.0.1', 9160),
                                    ('127.0.0.1', 7000),
                                    '7100', '2000', None,
                                    binary_interface=('127.0.0.1', 9042))
        node2 = cluster.create_node('node2', False,
                                    ('127.0.0.2', 9160),
                                    ('127.0.0.2', 7000),
                                    '7200', '2001', None,
                                    binary_interface=('127.0.0.2', 9042))

        cluster.add(node1, True, data_center='dc1')
        cluster.add(node2, True, data_center='dc1')

        node1.start(wait_for_binary_proto=True)
        node2.start(wait_for_binary_proto=True)

        # Insert data into node1 and node2
        session = self.patient_exclusive_cql_connection(node1)
        create_ks(session, 'ks', {'dc1': 1})
        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ALL)
        key = list(range(10000, 20000))
        session = self.patient_exclusive_cql_connection(node2)
        session.execute('USE ks')
        insert_c1c2(session, keys=key, consistency=ConsistencyLevel.ALL)
        session.shutdown()

        # Create a new node3 on dc2
        node3 = cluster.create_node('node3', False,
                                    ('127.0.0.3', 9160),
                                    ('127.0.0.3', 7000),
                                    '7300', '2002', None,
                                    binary_interface=('127.0.0.3', 9042),
                                    byteman_port='8300')

        cluster.add(node3, False, data_center='dc2')

        node3.start(wait_other_notice=False, wait_for_binary_proto=True)

        # Wait for snitch to be refreshed
        time.sleep(5)

        # Alter necessary keyspace for rebuild operation
        session = self.patient_exclusive_cql_connection(node3)
        session.execute("ALTER KEYSPACE ks WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};")
        session.execute("ALTER KEYSPACE system_auth WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};")

        # Path to byteman script which makes the streaming to node2 throw an exception, making rebuild fail
        script = ['./byteman/inject_failure_streaming_to_node2.btm']
        node3.byteman_submit(script)

        # First rebuild must fail and data must be incomplete
        with self.assertRaises(ToolError, msg='Unexpected: SUCCEED'):
            debug('Executing first rebuild -> '),
            node3.nodetool('rebuild dc1')
        debug('Expected: FAILED')

        session.execute('USE ks')
        with self.assertRaises(AssertionError, msg='Unexpected: COMPLETE'):
            debug('Checking data is complete -> '),
            for i in xrange(0, 20000):
                query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE)
        debug('Expected: INCOMPLETE')

        debug('Executing second rebuild -> '),
        node3.nodetool('rebuild dc1')
        debug('Expected: SUCCEED')

        # Check all streaming sessions completed, streamed ranges are skipped and verify streamed data
        node3.watch_log_for('All sessions completed')
        node3.watch_log_for('Skipping streaming those ranges.')
        debug('Checking data is complete -> '),
        for i in xrange(0, 20000):
            query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE)
        debug('Expected: COMPLETE')