Example #1
0
    def table_metric_mbeans_test(self):
        """
        Test some basic table metric mbeans with simple writes.
        """
        cluster = self.cluster
        cluster.populate(3)
        node1, node2, node3 = cluster.nodelist()
        remove_perf_disable_shared_mem(node1)
        cluster.start(wait_for_binary_proto=True)

        version = cluster.version()
        node1.stress(['write', 'n=10K', 'no-warmup', '-schema', 'replication(factor=3)'])

        typeName = "ColumnFamily" if version <= '2.2.X' else 'Table'
        debug('Version {} typeName {}'.format(version, typeName))

        # TODO the keyspace and table name are capitalized in 2.0
        memtable_size = make_mbean('metrics', type=typeName, keyspace='keyspace1', scope='standard1', name='AllMemtablesHeapSize')
        disk_size = make_mbean('metrics', type=typeName, keyspace='keyspace1', scope='standard1', name='LiveDiskSpaceUsed')
        sstable_count = make_mbean('metrics', type=typeName, keyspace='keyspace1', scope='standard1', name='LiveSSTableCount')

        with JolokiaAgent(node1) as jmx:
            mem_size = jmx.read_attribute(memtable_size, "Value")
            self.assertGreater(int(mem_size), 10000)

            on_disk_size = jmx.read_attribute(disk_size, "Count")
            self.assertEquals(int(on_disk_size), 0)

            node1.flush()

            on_disk_size = jmx.read_attribute(disk_size, "Count")
            self.assertGreater(int(on_disk_size), 10000)

            sstables = jmx.read_attribute(sstable_count, "Value")
            self.assertGreaterEqual(int(sstables), 1)
    def query_and_check_repaired_mismatches(
            self,
            jmx,
            session,
            query,
            expect_read_repair=True,
            expect_unconfirmed_inconsistencies=False,
            expect_confirmed_inconsistencies=False):

        rr_count = make_mbean('metrics',
                              type='ReadRepair',
                              name='ReconcileRead')
        unconfirmed_count = make_mbean(
            'metrics',
            type='Table,keyspace=ks',
            name='RepairedDataInconsistenciesUnconfirmed,scope=tbl')
        confirmed_count = make_mbean(
            'metrics',
            type='Table,keyspace=ks',
            name='RepairedDataInconsistenciesConfirmed,scope=tbl')

        rr_before = self.get_attribute_count(jmx, rr_count)
        uc_before = self.get_attribute_count(jmx, unconfirmed_count)
        cc_before = self.get_attribute_count(jmx, confirmed_count)

        stmt = SimpleStatement(query)
        stmt.consistency_level = ConsistencyLevel.ALL
        session.execute(stmt)

        rr_after = self.get_attribute_count(jmx, rr_count)
        uc_after = self.get_attribute_count(jmx, unconfirmed_count)
        cc_after = self.get_attribute_count(jmx, confirmed_count)

        logger.debug("Read Repair Count: {before}, {after}".format(
            before=rr_before, after=rr_after))
        logger.debug(
            "Unconfirmed Inconsistency Count: {before}, {after}".format(
                before=uc_before, after=uc_after))
        logger.debug("Confirmed Inconsistency Count: {before}, {after}".format(
            before=cc_before, after=cc_after))

        if expect_read_repair:
            assert rr_after > rr_before
        else:
            assert rr_after == rr_before

        if expect_unconfirmed_inconsistencies:
            assert uc_after > uc_before
        else:
            assert uc_after == uc_before

        if expect_confirmed_inconsistencies:
            assert cc_after > cc_before
        else:
            assert cc_after == cc_before
    def __init__(self, node, scope):
        self.node = node

        self.local_requests_mbean = make_mbean('metrics',
                                               type='ClientRequest',
                                               scope=scope,
                                               name='LocalRequests')

        self.remote_requests_mbean = make_mbean('metrics',
                                                type='ClientRequest',
                                                scope=scope,
                                                name='RemoteRequests')
Example #4
0
    def table_metric_mbeans_test(self):
        """
        Test some basic table metric mbeans with simple writes.
        """
        cluster = self.cluster
        cluster.populate(3)
        node1, node2, node3 = cluster.nodelist()
        remove_perf_disable_shared_mem(node1)
        cluster.start(wait_for_binary_proto=True)

        version = cluster.version()
        node1.stress([
            'write', 'n=10K', 'no-warmup', '-schema', 'replication(factor=3)'
        ])

        typeName = "ColumnFamily" if version <= '2.2.X' else 'Table'
        debug('Version {} typeName {}'.format(version, typeName))

        # TODO the keyspace and table name are capitalized in 2.0
        memtable_size = make_mbean('metrics',
                                   type=typeName,
                                   keyspace='keyspace1',
                                   scope='standard1',
                                   name='AllMemtablesHeapSize')
        disk_size = make_mbean('metrics',
                               type=typeName,
                               keyspace='keyspace1',
                               scope='standard1',
                               name='LiveDiskSpaceUsed')
        sstable_count = make_mbean('metrics',
                                   type=typeName,
                                   keyspace='keyspace1',
                                   scope='standard1',
                                   name='LiveSSTableCount')

        with JolokiaAgent(node1) as jmx:
            mem_size = jmx.read_attribute(memtable_size, "Value")
            self.assertGreater(int(mem_size), 10000)

            on_disk_size = jmx.read_attribute(disk_size, "Count")
            self.assertEquals(int(on_disk_size), 0)

            node1.flush()

            on_disk_size = jmx.read_attribute(disk_size, "Count")
            self.assertGreater(int(on_disk_size), 10000)

            sstables = jmx.read_attribute(sstable_count, "Value")
            self.assertGreaterEqual(int(sstables), 1)
Example #5
0
    def test_table_metric_mbeans(self):
        """
        Test some basic table metric mbeans with simple writes.
        """
        cluster = self.cluster
        cluster.populate(3)
        node1, node2, node3 = cluster.nodelist()
        cluster.start()

        version = cluster.version()
        node1.stress([
            'write', 'n=10K', 'no-warmup', '-schema', 'replication(factor=3)'
        ])

        typeName = "ColumnFamily" if version < '3.0' else 'Table'
        logger.debug('Version {} typeName {}'.format(version, typeName))

        # TODO the keyspace and table name are capitalized in 2.0
        memtable_size = make_mbean('metrics',
                                   type=typeName,
                                   keyspace='keyspace1',
                                   scope='standard1',
                                   name='AllMemtablesHeapSize')
        disk_size = make_mbean('metrics',
                               type=typeName,
                               keyspace='keyspace1',
                               scope='standard1',
                               name='LiveDiskSpaceUsed')
        sstable_count = make_mbean('metrics',
                                   type=typeName,
                                   keyspace='keyspace1',
                                   scope='standard1',
                                   name='LiveSSTableCount')

        with JolokiaAgent(node1) as jmx:
            mem_size = jmx.read_attribute(memtable_size, "Value")
            assert int(mem_size) > 10000

            on_disk_size = jmx.read_attribute(disk_size, "Count")
            assert int(on_disk_size) == 0

            node1.flush()

            on_disk_size = jmx.read_attribute(disk_size, "Count")
            assert int(on_disk_size) > 10000

            sstables = jmx.read_attribute(sstable_count, "Value")
            assert int(sstables) >= 1
Example #6
0
    def _batchlog_replay_compatibility_test(self, coordinator_idx,
                                            current_nodes, previous_version,
                                            previous_nodes, protocol_version):
        session = self.prepare_mixed(coordinator_idx,
                                     current_nodes,
                                     previous_version,
                                     previous_nodes,
                                     protocol_version=protocol_version,
                                     install_byteman=True)

        coordinator = self.cluster.nodelist()[coordinator_idx]
        coordinator.byteman_submit(
            [mk_bman_path('fail_after_batchlog_write.btm')])
        logger.debug(
            "Injected byteman scripts to enable batchlog replay {}".format(
                coordinator.name))

        query = """
            BEGIN BATCH
            INSERT INTO users (id, firstname, lastname) VALUES (0, 'Jack', 'Sparrow')
            INSERT INTO users (id, firstname, lastname) VALUES (1, 'Will', 'Turner')
            APPLY BATCH
        """
        session.execute(query)

        # batchlog replay skips over all entries that are younger than
        # 2 * write_request_timeout_in_ms ms: 1x timeout for all mutations to be written,
        # and another 1x timeout for batch remove mutation to be received.
        delay = 2 * coordinator.get_conf_option(
            'write_request_timeout_in_ms') / 1000.0 + 1
        logger.debug(
            'Sleeping for {}s for the batches to not be skipped'.format(delay))
        time.sleep(delay)

        total_batches_replayed = 0
        blm = make_mbean('db', type='BatchlogManager')

        for n in self.cluster.nodelist():
            if n == coordinator:
                continue

            with JolokiaAgent(n) as jmx:
                logger.debug('Forcing batchlog replay for {}'.format(n.name))
                jmx.execute_method(blm, 'forceBatchlogReplay')
                batches_replayed = jmx.read_attribute(blm,
                                                      'TotalBatchesReplayed')
                logger.debug('{} batches replayed on node {}'.format(
                    batches_replayed, n.name))
                total_batches_replayed += batches_replayed

        assert total_batches_replayed >= 2

        for node in self.cluster.nodelist():
            session = self.patient_exclusive_cql_connection(
                node, protocol_version=protocol_version)
            rows = sorted(
                session.execute(
                    'SELECT id, firstname, lastname FROM ks.users'))
            assert [[0, 'Jack', 'Sparrow'],
                    [1, 'Will', 'Turner']], [list(rows[0]) == list(rows[1])]
 def get_table_metric(self, keyspace, table, metric, attr="Count"):
     mbean = make_mbean("metrics",
                        keyspace=keyspace,
                        scope=table,
                        type="Table",
                        name=metric)
     return self.jmx.read_attribute(mbean, attr)
Example #8
0
    def test_blacklisted_directory(self):
        cluster = self.cluster
        cluster.set_datadir_count(3)
        cluster.populate(1)
        [node] = cluster.nodelist()
        cluster.start()

        session = self.patient_cql_connection(node)
        create_ks(session, 'ks', 1)
        create_c1c2_table(self, session)
        insert_c1c2(session, n=10000)
        node.flush()
        for k in range(0, 10000):
            query_c1c2(session, k)

        node.compact()
        mbean = make_mbean('db', type='BlacklistedDirectories')
        with JolokiaAgent(node) as jmx:
            jmx.execute_method(mbean, 'markUnwritable',
                               [os.path.join(node.get_path(), 'data1')])

        for k in range(0, 10000):
            query_c1c2(session, k)

        node.nodetool('relocatesstables')

        for k in range(0, 10000):
            query_c1c2(session, k)
Example #9
0
    def test_oversized_mutation(self):
        """
        Test that multi-DC write failures return operation failed rather than a timeout.
        @jira_ticket CASSANDRA-16334.
        """

        cluster = self.cluster
        cluster.populate([2, 2])
        cluster.set_configuration_options(
            values={'max_mutation_size_in_kb': 128})
        cluster.start()

        node1 = cluster.nodelist()[0]
        session = self.patient_exclusive_cql_connection(node1)

        session.execute(
            "CREATE KEYSPACE k WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 2, 'dc2': 2}"
        )
        session.execute("CREATE TABLE k.t (key int PRIMARY KEY, val blob)")

        payload = '1' * 1024 * 256
        query = "INSERT INTO k.t (key, val) VALUES (1, textAsBlob('{}'))".format(
            payload)

        assert_write_failure(session, query, ConsistencyLevel.LOCAL_ONE)
        assert_write_failure(session, query, ConsistencyLevel.ONE)

        # verify that no hints are created
        with JolokiaAgent(node1) as jmx:
            assert 0 == jmx.read_attribute(
                make_mbean('metrics', type='Storage', name='TotalHints'),
                'Count')
Example #10
0
    def test_tracing_does_not_interfere_with_digest_calculation(self):
        """
        Test that enabling tracing doesn't interfere with digest responses when using RandomPartitioner.
        The use of a threadlocal MessageDigest for generating both DigestResponse messages and for
        calculating tokens meant that the DigestResponse was always incorrect when both RP and tracing
        were enabled, leading to unnecessary data reads.

        @jira_ticket CASSANDRA-13964
        """

        session = self.prepare(random_partitioner=True)
        self.trace(session)

        node1 = self.cluster.nodelist()[0]

        rr_count = make_mbean('metrics',
                              type='ReadRepair',
                              name='RepairedBlocking')
        with JolokiaAgent(node1) as jmx:
            # the MBean may not have been initialized, in which case Jolokia agent will return
            # a HTTP 404 response. If we receive such, we know that no digest mismatch was reported
            # If we are able to read the MBean attribute, assert that the count is 0
            if jmx.has_mbean(rr_count):
                # expect 0 digest mismatches
                assert 0 == jmx.read_attribute(rr_count, 'Count')
            else:
                pass
    def test_tracing_does_not_interfere_with_digest_calculation(self):
        """
        Test that enabling tracing doesn't interfere with digest responses when using RandomPartitioner.
        The use of a threadlocal MessageDigest for generating both DigestResponse messages and for
        calculating tokens meant that the DigestResponse was always incorrect when both RP and tracing
        were enabled, leading to unnecessary data reads.

        @jira_ticket CASSANDRA-13964
        """
        cluster = self.cluster
        cluster.populate(3)
        cluster.set_configuration_options(
            values={
                'write_request_timeout_in_ms': 30000,
                'read_request_timeout_in_ms': 30000
            })
        cluster.set_partitioner("org.apache.cassandra.dht.RandomPartitioner")
        cluster.start(
            jvm_args=['-Dcassandra.wait_for_tracing_events_timeout_secs=15'])

        node1 = cluster.nodelist()[0]
        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 3)

        session.execute("""
            CREATE TABLE ks.users (
                userid uuid PRIMARY KEY,
                firstname text,
                lastname text,
                age int
            );
        """)

        insert = session.prepare(
            "INSERT INTO ks.users (userid, firstname, lastname, age) "
            "VALUES (?, 'Frodo', 'Baggins', 32)")
        insert.consistency_level = ConsistencyLevel.ALL

        select = session.prepare("SELECT firstname, lastname "
                                 "FROM ks.users WHERE userid = ?")
        select.consistency_level = ConsistencyLevel.ALL

        for _ in range(10):
            id = uuid4()
            session.execute(insert.bind((id, )), timeout=30)
            res = session.execute(select.bind((id, )), timeout=30, trace=True)
            assert 1 == len(res.response_future.get_query_trace_ids())

        rr_count = make_mbean('metrics',
                              type='ReadRepair',
                              name='RepairedBlocking')
        with JolokiaAgent(node1) as jmx:
            # the MBean may not have been initialized, in which case Jolokia agent will return
            # a HTTP 404 response. If we receive such, we know that no digest mismatch was reported
            # If we are able to read the MBean attribute, assert that the count is 0
            if jmx.has_mbean(rr_count):
                # expect 0 digest mismatches
                assert 0 == jmx.read_attribute(rr_count, 'Count')
            else:
                pass
    def blacklisted_directory_test(self):
        cluster = self.cluster
        cluster.set_datadir_count(3)
        cluster.populate(1)
        [node] = cluster.nodelist()
        remove_perf_disable_shared_mem(node)
        cluster.start(wait_for_binary_proto=True)

        session = self.patient_cql_connection(node)
        create_ks(session, 'ks', 1)
        create_c1c2_table(self, session)
        insert_c1c2(session, n=10000)
        node.flush()
        for k in xrange(0, 10000):
            query_c1c2(session, k)

        node.compact()
        mbean = make_mbean('db', type='BlacklistedDirectories')
        with JolokiaAgent(node) as jmx:
            jmx.execute_method(mbean, 'markUnwritable', [os.path.join(node.get_path(), 'data0')])

        for k in xrange(0, 10000):
            query_c1c2(session, k)

        node.nodetool('relocatesstables')

        for k in xrange(0, 10000):
            query_c1c2(session, k)
    def test_tracing_does_not_interfere_with_digest_calculation(self):
        """
        Test that enabling tracing doesn't interfere with digest responses when using RandomPartitioner.
        The use of a threadlocal MessageDigest for generating both DigestResponse messages and for
        calculating tokens meant that the DigestResponse was always incorrect when both RP and tracing
        were enabled, leading to unnecessary data reads.

        @jira_ticket CASSANDRA-13964
        """

        session = self.prepare(random_partitioner=True)
        self.trace(session)

        node1 = self.cluster.nodelist()[0]

        rr_count = make_mbean('metrics', type='ReadRepair', name='RepairedBlocking')
        with JolokiaAgent(node1) as jmx:
            # the MBean may not have been initialized, in which case Jolokia agent will return
            # a HTTP 404 response. If we receive such, we know that no digest mismatch was reported
            # If we are able to read the MBean attribute, assert that the count is 0
            if jmx.has_mbean(rr_count):
                # expect 0 digest mismatches
                assert 0 == jmx.read_attribute(rr_count, 'Count')
            else:
                pass
    def overlapping_data_folders(self):
        """
        @jira_ticket CASSANDRA-10902
        """
        self.cluster.populate(1)
        node1 = self.cluster.nodelist()[0]
        default_path = node1.data_directories()[0]
        node1.set_configuration_options({
            'saved_caches_directory':
            os.path.join(default_path, 'saved_caches')
        })
        remove_perf_disable_shared_mem(node1)
        self.cluster.start(wait_for_binary_proto=True)

        session = self.patient_exclusive_cql_connection(node1)
        session.execute(
            "CREATE KEYSPACE ks WITH REPLICATION = {'class': 'SimpleStrategy', 'replication_factor': 1}"
        )
        session.execute("CREATE TABLE ks.tab (key int PRIMARY KEY, a int)")
        session.execute("INSERT INTO ks.tab (key, a) VALUES (%s, %s)", [0, 0])
        session.execute("SELECT * FROM ks.tab WHERE key = %s", [0])

        cache_service = make_mbean('db', type="Caches")
        with JolokiaAgent(node1) as jmx:
            jmx.execute_method(cache_service, 'saveCaches')

        self.cluster.stop()
        self.cluster.start(wait_for_binary_proto=True)
Example #15
0
    def test_reloadlocalschema(self):
        """
        @jira_ticket CASSANDRA-13954

        Test that `nodetool reloadlocalschema` works as intended
        """
        cluster = self.cluster
        cluster.populate(1)
        node = cluster.nodelist()[0]
        remove_perf_disable_shared_mem(node)  # for jmx
        cluster.start()

        session = self.patient_cql_connection(node)

        query = "CREATE KEYSPACE IF NOT EXISTS test WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': 2};"
        session.execute(query)

        query = 'CREATE TABLE test.test (pk int, ck int, PRIMARY KEY (pk, ck));'
        session.execute(query)

        ss = make_mbean('db', type='StorageService')

        schema_version = ''

        # get initial schema version
        with JolokiaAgent(node) as jmx:
            schema_version = jmx.read_attribute(ss, 'SchemaVersion')

        # manually add a regular column 'val' to test.test
        query = """
            INSERT INTO system_schema.columns
                (keyspace_name, table_name, column_name, clustering_order,
                 column_name_bytes, kind, position, type)
            VALUES
                ('test', 'test', 'val', 'none',
                 0x76616c, 'regular', -1, 'int');"""
        session.execute(query)

        # validate that schema version wasn't automatically updated
        with JolokiaAgent(node) as jmx:
            self.assertEqual(schema_version,
                             jmx.read_attribute(ss, 'SchemaVersion'))

        # make sure the new column wasn't automagically picked up
        assert_invalid(
            session, 'INSERT INTO test.test (pk, ck, val) VALUES (0, 1, 2);')

        # force the node to reload schema from disk
        node.nodetool('reloadlocalschema')

        # validate that schema version changed
        with JolokiaAgent(node) as jmx:
            self.assertNotEqual(schema_version,
                                jmx.read_attribute(ss, 'SchemaVersion'))

        # try an insert with the new column again and validate it succeeds this time
        session.execute(
            'INSERT INTO test.test (pk, ck, val) VALUES (0, 1, 2);')
        assert_all(session, 'SELECT pk, ck, val FROM test.test;', [[0, 1, 2]])
Example #16
0
def table_metric(node, keyspace, table, name):
    version = node.get_cassandra_version()
    typeName = "ColumnFamily" if version < '3.0' else 'Table'
    with JolokiaAgent(node) as jmx:
        mbean = make_mbean('metrics', type=typeName,
                           name=name, keyspace=keyspace, scope=table)
        value = jmx.read_attribute(mbean, 'Value')

    return value
Example #17
0
 def __init__(self, node, keyspace, table):
     assert isinstance(node, Node)
     self.jmx = JolokiaAgent(node)
     self.write_latency_mbean = make_mbean("metrics",
                                           type="Table",
                                           name="WriteLatency",
                                           keyspace=keyspace,
                                           scope=table)
     self.speculative_reads_mbean = make_mbean("metrics",
                                               type="Table",
                                               name="SpeculativeRetries",
                                               keyspace=keyspace,
                                               scope=table)
     self.transient_writes_mbean = make_mbean("metrics",
                                              type="Table",
                                              name="TransientWrites",
                                              keyspace=keyspace,
                                              scope=table)
 def __init__(self, scope, name):
     self.scope = scope
     self.name = name
     self.mbean = make_mbean('metrics',
                             type='ClientRequest',
                             scope=scope,
                             name=name)
     self.values = {}
     self.init()
def table_metric(node, keyspace, table, name):
    version = node.get_cassandra_version()
    typeName = "ColumnFamily" if version < '3.0' else 'Table'
    with JolokiaAgent(node) as jmx:
        mbean = make_mbean('metrics', type=typeName,
                           name=name, keyspace=keyspace, scope=table)
        value = jmx.read_attribute(mbean, 'Value')

    return value
Example #20
0
    def test_reloadlocalschema(self):
        """
        @jira_ticket CASSANDRA-13954

        Test that `nodetool reloadlocalschema` works as intended
        """
        cluster = self.cluster
        cluster.populate(1)
        node = cluster.nodelist()[0]
        remove_perf_disable_shared_mem(node)  # for jmx
        cluster.start()

        session = self.patient_cql_connection(node)

        query = "CREATE KEYSPACE IF NOT EXISTS test WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': 2};"
        session.execute(query)

        query = 'CREATE TABLE test.test (pk int, ck int, PRIMARY KEY (pk, ck));'
        session.execute(query)

        ss = make_mbean('db', type='StorageService')

        schema_version = ''

        # get initial schema version
        with JolokiaAgent(node) as jmx:
            schema_version = jmx.read_attribute(ss, 'SchemaVersion')

        # manually add a regular column 'val' to test.test
        query = """
            INSERT INTO system_schema.columns
                (keyspace_name, table_name, column_name, clustering_order,
                 column_name_bytes, kind, position, type)
            VALUES
                ('test', 'test', 'val', 'none',
                 0x76616c, 'regular', -1, 'int');"""
        session.execute(query)

        # validate that schema version wasn't automatically updated
        with JolokiaAgent(node) as jmx:
            self.assertEqual(schema_version, jmx.read_attribute(ss, 'SchemaVersion'))

        # make sure the new column wasn't automagically picked up
        assert_invalid(session, 'INSERT INTO test.test (pk, ck, val) VALUES (0, 1, 2);')

        # force the node to reload schema from disk
        node.nodetool('reloadlocalschema')

        # validate that schema version changed
        with JolokiaAgent(node) as jmx:
            self.assertNotEqual(schema_version, jmx.read_attribute(ss, 'SchemaVersion'))

        # try an insert with the new column again and validate it succeeds this time
        session.execute('INSERT INTO test.test (pk, ck, val) VALUES (0, 1, 2);')
        assert_all(session, 'SELECT pk, ck, val FROM test.test;', [[0, 1, 2]])
Example #21
0
    def test_closing_connections(self):
        """
        @jira_ticket CASSANDRA-6546

        Test CASSANDRA-6546 - do connections get closed when disabling / renabling thrift service?
        """
        cluster = self.cluster
        cluster.set_configuration_options(values={
            'start_rpc': 'true',
            'rpc_server_type': 'hsha',
            'rpc_max_threads': 20
        })

        cluster.populate(1)
        (node1, ) = cluster.nodelist()
        remove_perf_disable_shared_mem(node1)
        cluster.start()

        session = self.patient_cql_connection(node1)
        create_ks(session, 'test', 1)
        session.execute(
            "CREATE TABLE \"CF\" (key text PRIMARY KEY, val text) WITH COMPACT STORAGE;"
        )

        def make_connection():
            host, port = node1.network_interfaces['thrift']
            client = get_thrift_client(host, port)
            client.transport.open()
            return client

        pools = []
        connected_thrift_clients = make_mbean('metrics',
                                              type='Client',
                                              name='connectedThriftClients')
        for i in range(10):
            logger.debug("Creating connection pools..")
            for x in range(3):
                pools.append(make_connection())
            logger.debug(
                "Disabling/Enabling thrift iteration #{i}".format(i=i))
            node1.nodetool('disablethrift')
            node1.nodetool('enablethrift')
            logger.debug("Closing connections from the client side..")
            for client in pools:
                client.transport.close()

            with JolokiaAgent(node1) as jmx:
                num_clients = jmx.read_attribute(connected_thrift_clients,
                                                 "Value")
                assert int(
                    num_clients
                ) == 0, "There are still open Thrift connections after stopping service " + str(
                    num_clients)
    def query_and_check_repaired_mismatches(self, jmx, session, query,
                                            expect_read_repair=True,
                                            expect_unconfirmed_inconsistencies=False,
                                            expect_confirmed_inconsistencies=False):

        rr_count = make_mbean('metrics', type='ReadRepair', name='ReconcileRead')
        unconfirmed_count = make_mbean('metrics', type='Table,keyspace=ks', name='RepairedDataInconsistenciesUnconfirmed,scope=tbl')
        confirmed_count = make_mbean('metrics', type='Table,keyspace=ks', name='RepairedDataInconsistenciesConfirmed,scope=tbl')

        rr_before = self.get_attribute_count(jmx, rr_count)
        uc_before = self.get_attribute_count(jmx, unconfirmed_count)
        cc_before = self.get_attribute_count(jmx, confirmed_count)

        stmt = SimpleStatement(query)
        stmt.consistency_level = ConsistencyLevel.ALL
        session.execute(stmt)

        rr_after = self.get_attribute_count(jmx, rr_count)
        uc_after = self.get_attribute_count(jmx, unconfirmed_count)
        cc_after = self.get_attribute_count(jmx, confirmed_count)

        logger.debug("Read Repair Count: {before}, {after}".format(before=rr_before, after=rr_after))
        logger.debug("Unconfirmed Inconsistency Count: {before}, {after}".format(before=uc_before, after=uc_after))
        logger.debug("Confirmed Inconsistency Count: {before}, {after}".format(before=cc_before, after=cc_after))

        if expect_read_repair:
            assert rr_after > rr_before
        else:
            assert rr_after == rr_before

        if expect_unconfirmed_inconsistencies:
            assert uc_after > uc_before
        else:
            assert uc_after == uc_before

        if expect_confirmed_inconsistencies:
            assert cc_after > cc_before
        else:
            assert cc_after == cc_before
    def test_closing_connections(self):
        """
        @jira_ticket CASSANDRA-6546

        Test CASSANDRA-6546 - do connections get closed when disabling / renabling thrift service?
        """
        cluster = self.cluster
        cluster.set_configuration_options(values={
            'start_rpc': 'true',
            'rpc_server_type': 'hsha',
            'rpc_max_threads': 20
        })

        cluster.populate(1)
        (node1, ) = cluster.nodelist()
        remove_perf_disable_shared_mem(node1)
        cluster.start(wait_for_binary_proto=True)

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'test', 1)
        session.execute(
            "CREATE TABLE \"CF\" (key text PRIMARY KEY, val text) WITH COMPACT STORAGE;"
        )

        def make_connection():
            pool = pycassa.ConnectionPool('test', timeout=None)
            cf = pycassa.ColumnFamily(pool, 'CF')
            return pool

        pools = []
        connected_thrift_clients = make_mbean('metrics',
                                              type='Client',
                                              name='connectedThriftClients')
        for i in xrange(10):
            debug("Creating connection pools..")
            for x in xrange(3):
                pools.append(make_connection())
            debug("Disabling/Enabling thrift iteration #{i}".format(i=i))
            node1.nodetool('disablethrift')
            node1.nodetool('enablethrift')
            debug("Closing connections from the client side..")
            for pool in pools:
                pool.dispose()

            with JolokiaAgent(node1) as jmx:
                num_clients = jmx.read_attribute(connected_thrift_clients,
                                                 "Value")
                self.assertEqual(
                    int(num_clients), 0,
                    "There are still open Thrift connections after stopping service"
                )
    def _deprecated_repair_jmx(self, method, arguments):
        """
        * Launch a two node, two DC cluster
        * Create a keyspace and table
        * Insert some data
        * Call the deprecated repair JMX API based on the arguments passed into this method
        * Check the node log to see if the correct repair was performed based on the jmx args
        """
        cluster = self.cluster

        logger.debug("Starting cluster..")
        cluster.populate([1, 1])
        node1, node2 = cluster.nodelist()
        remove_perf_disable_shared_mem(node1)
        cluster.start()
        supports_pull_repair = cluster.version() >= LooseVersion('3.10')

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 2)
        create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL)

        # Run repair
        mbean = make_mbean('db', 'StorageService')
        with JolokiaAgent(node1) as jmx:
            # assert repair runs and returns valid cmd number
            assert jmx.execute_method(mbean, method, arguments) == 1
        # wait for log to start
        node1.watch_log_for("Starting repair command")
        # get repair parameters from the log
        line = node1.grep_log(("Starting repair command #1" + (" \([^\)]+\)" if cluster.version() >= LooseVersion("3.10") else "") +
                               ", repairing keyspace ks with repair options \(parallelism: (?P<parallelism>\w+), primary range: (?P<pr>\w+), "
                               "incremental: (?P<incremental>\w+), job threads: (?P<jobs>\d+), ColumnFamilies: (?P<cfs>.+), dataCenters: (?P<dc>.+), "
                               "hosts: (?P<hosts>.+), # of ranges: (?P<ranges>\d+)(, pull repair: (?P<pullrepair>true|false))?\)"))

        assert_length_equal(line, 1)
        line, m = line[0]

        if supports_pull_repair:
            assert m.group("pullrepair"), "false" == "Pull repair cannot be enabled through the deprecated API so the pull repair option should always be false."

        return {"parallelism": m.group("parallelism"),
                "primary_range": m.group("pr"),
                "incremental": m.group("incremental"),
                "job_threads": m.group("jobs"),
                "column_families": m.group("cfs"),
                "data_centers": m.group("dc"),
                "hosts": m.group("hosts"),
                "ranges": m.group("ranges")}
    def _deprecated_repair_jmx(self, method, arguments):
        """
        * Launch a two node, two DC cluster
        * Create a keyspace and table
        * Insert some data
        * Call the deprecated repair JMX API based on the arguments passed into this method
        * Check the node log to see if the correct repair was performed based on the jmx args
        """
        cluster = self.cluster

        logger.debug("Starting cluster..")
        cluster.populate([1, 1])
        node1, node2 = cluster.nodelist()
        cluster.start()
        supports_pull_repair = cluster.version() >= LooseVersion('3.10')

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 2)
        create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL)

        # Run repair
        mbean = make_mbean('db', 'StorageService')
        with JolokiaAgent(node1) as jmx:
            # assert repair runs and returns valid cmd number
            assert jmx.execute_method(mbean, method, arguments) == 1
        # wait for log to start
        node1.watch_log_for("Starting repair command")
        # get repair parameters from the log
        line = node1.grep_log((r"Starting repair command #1" + (r" \([^\)]+\)" if cluster.version() >= LooseVersion("3.10") else "") +
                               r", repairing keyspace ks with repair options \(parallelism: (?P<parallelism>\w+), primary range: (?P<pr>\w+), "
                               r"incremental: (?P<incremental>\w+), job threads: (?P<jobs>\d+), ColumnFamilies: (?P<cfs>.+), dataCenters: (?P<dc>.+), "
                               r"hosts: (?P<hosts>.+), # of ranges: (?P<ranges>\d+)(, pull repair: (?P<pullrepair>true|false))?(, ignore unreplicated keyspaces: (?P<ignoreunrepl>true|false))?\)"))

        assert_length_equal(line, 1)
        line, m = line[0]

        if supports_pull_repair:
            assert m.group("pullrepair"), "false" == "Pull repair cannot be enabled through the deprecated API so the pull repair option should always be false."

        return {"parallelism": m.group("parallelism"),
                "primary_range": m.group("pr"),
                "incremental": m.group("incremental"),
                "job_threads": m.group("jobs"),
                "column_families": m.group("cfs"),
                "data_centers": m.group("dc"),
                "hosts": m.group("hosts"),
                "ranges": m.group("ranges")}
    def _batchlog_replay_compatibility_test(self, coordinator_idx,
                                            current_nodes, previous_version,
                                            previous_nodes, protocol_version):
        session = self.prepare_mixed(coordinator_idx,
                                     current_nodes,
                                     previous_version,
                                     previous_nodes,
                                     protocol_version=protocol_version,
                                     install_byteman=True)

        coordinator = self.cluster.nodelist()[coordinator_idx]
        coordinator.byteman_submit(['./byteman/fail_after_batchlog_write.btm'])
        debug("Injected byteman scripts to enable batchlog replay {}".format(
            coordinator.name))

        query = """
            BEGIN BATCH
            INSERT INTO users (id, firstname, lastname) VALUES (0, 'Jack', 'Sparrow')
            INSERT INTO users (id, firstname, lastname) VALUES (1, 'Will', 'Turner')
            APPLY BATCH
        """
        session.execute(query)

        total_batches_replayed = 0
        blm = make_mbean('db', type='BatchlogManager')

        for n in self.cluster.nodelist():
            if n == coordinator:
                continue

            with JolokiaAgent(n) as jmx:
                debug('Forcing batchlog replay for {}'.format(n.name))
                jmx.execute_method(blm, 'forceBatchlogReplay')
                batches_replayed = jmx.read_attribute(blm,
                                                      'TotalBatchesReplayed')
                debug('{} batches replayed on node {}'.format(
                    batches_replayed, n.name))
                total_batches_replayed += batches_replayed

        assert_greater_equal(total_batches_replayed, 2)

        for node in self.cluster.nodelist():
            session = self.patient_exclusive_cql_connection(
                node, protocol_version=protocol_version)
            rows = sorted(
                session.execute(
                    'SELECT id, firstname, lastname FROM ks.users'))
            self.assertEqual([[0, 'Jack', 'Sparrow'], [1, 'Will', 'Turner']],
                             [list(rows[0]), list(rows[1])])
Example #27
0
    def test_bloom_filter_false_ratio(self):
        """
        Test for CASSANDRA-15834

        Verifies if BloomFilterFalseRatio takes into account true negatives. Without this fix, the following
        scenario (many reads for non-existing rows) would yield BloomFilterFalseRatio=1.0. With the fix we assume
        it should be less then the default bloom_filter_fp_chance.
        """
        cluster = self.cluster
        cluster.populate(1)
        node = cluster.nodelist()[0]
        cluster.start(wait_for_binary_proto=True)

        session = self.patient_exclusive_cql_connection(node)

        keyspace = 'bloom_ratio_test_ks'
        create_ks(session, keyspace, 1)
        create_c1c2_table(self, session)
        insert_c1c2(session, n=10)
        node.nodetool("flush " + keyspace)

        for key in range(10000):
            session.execute("SELECT * from cf where key = '{0}'".format(key))

        bloom_filter_false_ratios = [
            make_mbean('metrics', type='Table', name='RecentBloomFilterFalseRatio'),
            make_mbean('metrics', type='Table', keyspace=keyspace, scope='cf', name='BloomFilterFalseRatio'),
            make_mbean('metrics', type='Table', name='BloomFilterFalseRatio'),
            make_mbean('metrics', type='Table', keyspace=keyspace, scope='cf', name='RecentBloomFilterFalseRatio'),
        ]

        with JolokiaAgent(node) as jmx:
            for metric in bloom_filter_false_ratios:
                ratio = jmx.read_attribute(metric, "Value")
                # Bloom filter false positive ratio should not be greater than the default bloom_filter_fp_chance.
                assert ratio < 0.01
Example #28
0
    def test_set_get_batchlog_replay_throttle(self):
        """
        @jira_ticket CASSANDRA-13614

        Test that batchlog replay throttle can be set and get through JMX
        """
        cluster = self.cluster
        cluster.populate(2)
        node = cluster.nodelist()[0]
        cluster.start()

        # Set and get throttle with JMX, ensuring that the rate change is logged
        with JolokiaAgent(node) as jmx:
            mbean = make_mbean('db', 'StorageService')
            jmx.write_attribute(mbean, 'BatchlogReplayThrottleInKB', 4096)
            assert len(node.grep_log('Updating batchlog replay throttle to 4096 KB/s, 2048 KB/s per endpoint',
                                     filename='debug.log')) > 0
            assert 4096 == jmx.read_attribute(mbean, 'BatchlogReplayThrottleInKB')
    def test_closing_connections(self):
        """
        @jira_ticket CASSANDRA-6546

        Test CASSANDRA-6546 - do connections get closed when disabling / renabling thrift service?
        """
        cluster = self.cluster
        cluster.set_configuration_options(values={
            'start_rpc': 'true',
            'rpc_server_type': 'hsha',
            'rpc_max_threads': 20
        })

        cluster.populate(1)
        (node1,) = cluster.nodelist()
        remove_perf_disable_shared_mem(node1)
        cluster.start(wait_for_binary_proto=True)

        session = self.patient_cql_connection(node1)
        create_ks(session, 'test', 1)
        session.execute("CREATE TABLE \"CF\" (key text PRIMARY KEY, val text) WITH COMPACT STORAGE;")

        def make_connection():
            host, port = node1.network_interfaces['thrift']
            client = get_thrift_client(host, port)
            client.transport.open()
            return client

        pools = []
        connected_thrift_clients = make_mbean('metrics', type='Client', name='connectedThriftClients')
        for i in range(10):
            logger.debug("Creating connection pools..")
            for x in range(3):
                pools.append(make_connection())
            logger.debug("Disabling/Enabling thrift iteration #{i}".format(i=i))
            node1.nodetool('disablethrift')
            node1.nodetool('enablethrift')
            logger.debug("Closing connections from the client side..")
            for client in pools:
                client.transport.close()

            with JolokiaAgent(node1) as jmx:
                num_clients = jmx.read_attribute(connected_thrift_clients, "Value")
                assert int(num_clients) == 0, "There are still open Thrift connections after stopping service " + str(num_clients)
Example #30
0
    def _batchlog_replay_compatibility_test(self, coordinator_idx, current_nodes, previous_version, previous_nodes, protocol_version):
        session = self.prepare_mixed(coordinator_idx, current_nodes, previous_version, previous_nodes,
                                     protocol_version=protocol_version, install_byteman=True)

        coordinator = self.cluster.nodelist()[coordinator_idx]
        coordinator.byteman_submit(['./byteman/fail_after_batchlog_write.btm'])
        logger.debug("Injected byteman scripts to enable batchlog replay {}".format(coordinator.name))

        query = """
            BEGIN BATCH
            INSERT INTO users (id, firstname, lastname) VALUES (0, 'Jack', 'Sparrow')
            INSERT INTO users (id, firstname, lastname) VALUES (1, 'Will', 'Turner')
            APPLY BATCH
        """
        session.execute(query)

        # batchlog replay skips over all entries that are younger than
        # 2 * write_request_timeout_in_ms ms: 1x timeout for all mutations to be written,
        # and another 1x timeout for batch remove mutation to be received.
        delay = 2 * coordinator.get_conf_option('write_request_timeout_in_ms') / 1000.0 + 1
        logger.debug('Sleeping for {}s for the batches to not be skipped'.format(delay))
        time.sleep(delay)

        total_batches_replayed = 0
        blm = make_mbean('db', type='BatchlogManager')

        for n in self.cluster.nodelist():
            if n == coordinator:
                continue

            with JolokiaAgent(n) as jmx:
                logger.debug('Forcing batchlog replay for {}'.format(n.name))
                jmx.execute_method(blm, 'forceBatchlogReplay')
                batches_replayed = jmx.read_attribute(blm, 'TotalBatchesReplayed')
                logger.debug('{} batches replayed on node {}'.format(batches_replayed, n.name))
                total_batches_replayed += batches_replayed

        assert total_batches_replayed >= 2

        for node in self.cluster.nodelist():
            session = self.patient_exclusive_cql_connection(node, protocol_version=protocol_version)
            rows = sorted(session.execute('SELECT id, firstname, lastname FROM ks.users'))
            assert [[0, 'Jack', 'Sparrow'], [1, 'Will', 'Turner']], [list(rows[0]) == list(rows[1])]
    def test_set_get_batchlog_replay_throttle(self):
        """
        @jira_ticket CASSANDRA-13614

        Test that batchlog replay throttle can be set and get through JMX
        """
        cluster = self.cluster
        cluster.populate(2)
        node = cluster.nodelist()[0]
        remove_perf_disable_shared_mem(node)
        cluster.start()

        # Set and get throttle with JMX, ensuring that the rate change is logged
        with JolokiaAgent(node) as jmx:
            mbean = make_mbean('db', 'StorageService')
            jmx.write_attribute(mbean, 'BatchlogReplayThrottleInKB', 4096)
            assert len(node.grep_log('Updating batchlog replay throttle to 4096 KB/s, 2048 KB/s per endpoint',
                                     filename='debug.log')) > 0
            assert 4096 == jmx.read_attribute(mbean, 'BatchlogReplayThrottleInKB')
    def test_closing_connections(self):
        """
        @jira_ticket CASSANDRA-6546

        Test CASSANDRA-6546 - do connections get closed when disabling / renabling thrift service?
        """
        cluster = self.cluster
        cluster.set_configuration_options(values={
            'start_rpc': 'true',
            'rpc_server_type': 'hsha',
            'rpc_max_threads': 20
        })

        cluster.populate(1)
        (node1,) = cluster.nodelist()
        remove_perf_disable_shared_mem(node1)
        cluster.start(wait_for_binary_proto=True)

        session = self.patient_cql_connection(node1)
        create_ks(session, 'test', 1)
        session.execute("CREATE TABLE \"CF\" (key text PRIMARY KEY, val text) WITH COMPACT STORAGE;")

        def make_connection():
            pool = pycassa.ConnectionPool('test', timeout=None)
            cf = pycassa.ColumnFamily(pool, 'CF')
            return pool

        pools = []
        connected_thrift_clients = make_mbean('metrics', type='Client', name='connectedThriftClients')
        for i in xrange(10):
            debug("Creating connection pools..")
            for x in xrange(3):
                pools.append(make_connection())
            debug("Disabling/Enabling thrift iteration #{i}".format(i=i))
            node1.nodetool('disablethrift')
            node1.nodetool('enablethrift')
            debug("Closing connections from the client side..")
            for pool in pools:
                pool.dispose()

            with JolokiaAgent(node1) as jmx:
                num_clients = jmx.read_attribute(connected_thrift_clients, "Value")
                self.assertEqual(int(num_clients), 0, "There are still open Thrift connections after stopping service")
    def overlapping_data_folders(self):
        """
        @jira_ticket CASSANDRA-10902
        """
        self.cluster.populate(1)
        node1 = self.cluster.nodelist()[0]
        default_path = node1.data_directories()[0]
        node1.set_configuration_options({'saved_caches_directory': os.path.join(default_path, 'saved_caches')})
        remove_perf_disable_shared_mem(node1)
        self.cluster.start(wait_for_binary_proto=True)

        session = self.patient_exclusive_cql_connection(node1)
        session.execute("CREATE KEYSPACE ks WITH REPLICATION = {'class': 'SimpleStrategy', 'replication_factor': 1}")
        session.execute("CREATE TABLE ks.tab (key int PRIMARY KEY, a int)")
        session.execute("INSERT INTO ks.tab (key, a) VALUES (%s, %s)", [0, 0])
        session.execute("SELECT * FROM ks.tab WHERE key = %s", [0])

        cache_service = make_mbean('db', type="Caches")
        with JolokiaAgent(node1) as jmx:
            jmx.execute_method(cache_service, 'saveCaches')

        self.cluster.stop()
        self.cluster.start(wait_for_binary_proto=True)
Example #34
0
def preview_failure_count(node):
    mbean = make_mbean('metrics', type='Repair', name='PreviewFailures')
    with JolokiaAgent(node) as jmx:
        return jmx.read_attribute(mbean, 'Count')
 def __init__(self, node, keyspace, table):
     assert isinstance(node, Node)
     self.jmx = JolokiaAgent(node)
     self.write_latency_mbean = make_mbean("metrics", type="Table", name="WriteLatency", keyspace=keyspace, scope=table)
     self.speculative_reads_mbean = make_mbean("metrics", type="Table", name="SpeculativeRetries", keyspace=keyspace, scope=table)
     self.transient_writes_mbean = make_mbean("metrics", type="Table", name="TransientWrites", keyspace=keyspace, scope=table)
Example #36
0
    def test_multidatacenter_local_quorum(self):
        '''
        @jira_ticket CASSANDRA-13074

        If we do only local datacenters reads in a multidatacenter DES setup,
        DES should take effect and route around a degraded node
        '''

        def no_cross_dc(scores, cross_dc_nodes):
            return all('/' + k.address() not in scores for k in cross_dc_nodes)

        def snitchable(scores_before, scores_after, needed_nodes):
            return all('/' + k.address() in scores_before and '/' + k.address()
                       in scores_after for k in needed_nodes)

        cluster = self.cluster
        cluster.populate([3, 3])
        coordinator_node, healthy_node, degraded_node, node4, node5, node6 = cluster.nodelist()
        # increase DES reset/update interval so we clear any cross-DC startup reads faster
        cluster.set_configuration_options(values={'dynamic_snitch_reset_interval_in_ms': 10000,
                                                  'dynamic_snitch_update_interval_in_ms': 50,
                                                  'phi_convict_threshold': 12})
        remove_perf_disable_shared_mem(coordinator_node)
        remove_perf_disable_shared_mem(degraded_node)
        # Delay reads on the degraded node by 50 milliseconds
        degraded_node.start(jvm_args=['-Dcassandra.test.read_iteration_delay_ms=50',
                                      '-Dcassandra.allow_unsafe_join=true'])
        cluster.start(wait_for_binary_proto=30, wait_other_notice=True)

        des = make_mbean('db', type='DynamicEndpointSnitch')
        read_stage = make_mbean('metrics', type='ThreadPools', path='request',
                                scope='ReadStage', name='CompletedTasks')
        session = self.patient_exclusive_cql_connection(coordinator_node)
        session.execute("CREATE KEYSPACE snitchtestks WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 3, 'dc2': 3}")
        session.execute("CREATE TABLE snitchtestks.tbl1 (key int PRIMARY KEY) WITH speculative_retry = 'NONE' AND dclocal_read_repair_chance = 0.0")
        read_stmt = session.prepare("SELECT * FROM snitchtestks.tbl1 where key = ?")
        read_stmt.consistency_level = ConsistencyLevel.LOCAL_QUORUM
        insert_stmt = session.prepare("INSERT INTO snitchtestks.tbl1 (key) VALUES (?)")
        insert_stmt.consistency_level = ConsistencyLevel.ALL
        with JolokiaAgent(coordinator_node) as jmx:
            with JolokiaAgent(degraded_node) as bad_jmx:
                for x in range(0, 300):
                    session.execute(insert_stmt, [x])

                cleared = False
                # Wait for a snitch reset in case any earlier
                # startup process populated cross-DC read timings
                while not cleared:
                    scores = jmx.read_attribute(des, 'Scores')
                    cleared = ('/127.0.0.1' in scores and (len(scores) == 1)) or not scores

                snitchable_count = 0

                for x in range(0, 300):
                    degraded_reads_before = bad_jmx.read_attribute(read_stage, 'Value')
                    scores_before = jmx.read_attribute(des, 'Scores')
                    assert no_cross_dc(scores_before, [node4, node5, node6]), "Cross DC scores were present: " + str(scores_before)
                    future = session.execute_async(read_stmt, [x])
                    future.result()
                    scores_after = jmx.read_attribute(des, 'Scores')
                    assert no_cross_dc(scores_after, [node4, node5, node6]), "Cross DC scores were present: " + str(scores_after)

                    if snitchable(scores_before, scores_after,
                                  [coordinator_node, healthy_node, degraded_node]):
                        snitchable_count = snitchable_count + 1
                        # If the DES correctly routed the read around the degraded node,
                        # it shouldn't have another completed read request in metrics
                        assert (degraded_reads_before ==
                                     bad_jmx.read_attribute(read_stage, 'Value'))
                    else:
                        # sleep to give dynamic snitch time to recalculate scores
                        time.sleep(.1)

                # check that most reads were snitchable, with some
                # room allowed in case score recalculation is slow
                assert snitchable_count >= 250
    def test_compactionstats(self):
        """
        @jira_ticket CASSANDRA-10504
        @jira_ticket CASSANDRA-10427

        Test that jmx MBean used by nodetool compactionstats
        properly updates the progress of a compaction
        """

        cluster = self.cluster
        cluster.populate(1)
        node = cluster.nodelist()[0]
        remove_perf_disable_shared_mem(node)
        cluster.start(wait_for_binary_proto=True)

        # Run a quick stress command to create the keyspace and table
        node.stress(['write', 'n=1', 'no-warmup'])
        # Disable compaction on the table
        node.nodetool('disableautocompaction keyspace1 standard1')
        node.nodetool('setcompactionthroughput 1')
        node.stress(['write', 'n=150K', 'no-warmup'])
        node.flush()
        # Run a major compaction. This will be the compaction whose
        # progress we track.
        node.nodetool_process('compact')
        # We need to sleep here to give compaction time to start
        # Why not do something smarter? Because if the bug regresses,
        # we can't rely on jmx to tell us that compaction started.
        time.sleep(5)

        compaction_manager = make_mbean('db', type='CompactionManager')
        with JolokiaAgent(node) as jmx:
            progress_string = jmx.read_attribute(compaction_manager, 'CompactionSummary')[0]

            # Pause in between reads
            # to allow compaction to move forward
            time.sleep(2)

            updated_progress_string = jmx.read_attribute(compaction_manager, 'CompactionSummary')[0]
            var = 'Compaction@{uuid}(keyspace1, standard1, {progress}/{total})bytes'
            progress = int(parse.search(var, progress_string).named['progress'])
            updated_progress = int(parse.search(var, updated_progress_string).named['progress'])

            logger.debug(progress_string)
            logger.debug(updated_progress_string)

            # We want to make sure that the progress is increasing,
            # and that values other than zero are displayed.
            assert updated_progress > progress
            assert progress >= 0
            assert updated_progress > 0

            # Block until the major compaction is complete
            # Otherwise nodetool will throw an exception
            # Give a timeout, in case compaction is broken
            # and never ends.
            start = time.time()
            max_query_timeout = 600
            logger.debug("Waiting for compaction to finish:")
            while (len(jmx.read_attribute(compaction_manager, 'CompactionSummary')) > 0) and (
                    time.time() - start < max_query_timeout):
                logger.debug(jmx.read_attribute(compaction_manager, 'CompactionSummary'))
                time.sleep(2)
 def _get_metric(self, metric):
     mbean = make_mbean("metrics", type="ReadRepair", name=metric)
     return self.jmx.read_attribute(mbean, "Count")
def commitlog_size(node):
    commitlog_size_mbean = make_mbean('metrics',
                                      type='CommitLog',
                                      name='TotalCommitLogSize')
    with JolokiaAgent(node) as jmx:
        return jmx.read_attribute(commitlog_size_mbean, 'Value')
    def test_mv_metric_mbeans_release(self):
        """
        Test that the right mbeans are created and released when creating mvs
        """
        cluster = self.cluster
        cluster.set_configuration_options({'enable_materialized_views': 'true'})
        cluster.populate(1)
        node = cluster.nodelist()[0]
        remove_perf_disable_shared_mem(node)
        cluster.start(wait_for_binary_proto=True)

        node.run_cqlsh(cmds="""
            CREATE KEYSPACE mvtest WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor': 1 };
            CREATE TABLE mvtest.testtable (
                foo int,
                bar text,
                baz text,
                PRIMARY KEY (foo, bar)
            );

            CREATE MATERIALIZED VIEW mvtest.testmv AS
                SELECT foo, bar, baz FROM mvtest.testtable WHERE
                foo IS NOT NULL AND bar IS NOT NULL AND baz IS NOT NULL
            PRIMARY KEY (foo, bar, baz);""")

        table_memtable_size = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testtable',
                                         name='AllMemtablesHeapSize')
        table_view_read_time = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testtable',
                                          name='ViewReadTime')
        table_view_lock_time = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testtable',
                                          name='ViewLockAcquireTime')
        mv_memtable_size = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testmv',
                                      name='AllMemtablesHeapSize')
        mv_view_read_time = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testmv',
                                       name='ViewReadTime')
        mv_view_lock_time = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testmv',
                                       name='ViewLockAcquireTime')

        missing_metric_message = "Table metric %s should have been registered after creating table %s" \
                                 "but wasn't!"

        with JolokiaAgent(node) as jmx:
            assert jmx.read_attribute(table_memtable_size, "Value") is not None, \
                missing_metric_message.format("AllMemtablesHeapSize", "testtable")
            assert jmx.read_attribute(table_view_read_time, "Count") is not None, \
                missing_metric_message.format("ViewReadTime", "testtable")
            assert jmx.read_attribute(table_view_lock_time, "Count") is not None, \
                missing_metric_message.format("ViewLockAcquireTime", "testtable")
            assert jmx.read_attribute(mv_memtable_size, "Value") is not None, \
                missing_metric_message.format("AllMemtablesHeapSize", "testmv")
            with pytest.raises(Exception, match=".*InstanceNotFoundException.*"):
                jmx.read_attribute(mbean=mv_view_read_time, attribute="Count", verbose=False)
            with pytest.raises(Exception, match=".*InstanceNotFoundException.*"):
                jmx.read_attribute(mbean=mv_view_lock_time, attribute="Count", verbose=False)

        node.run_cqlsh(cmds="DROP KEYSPACE mvtest;")
        with JolokiaAgent(node) as jmx:
            with pytest.raises(Exception, match=".*InstanceNotFoundException.*"):
                jmx.read_attribute(mbean=table_memtable_size, attribute="Value", verbose=False)
            with pytest.raises(Exception, match=".*InstanceNotFoundException.*"):
                jmx.read_attribute(mbean=table_view_lock_time, attribute="Count", verbose=False)
            with pytest.raises(Exception, match=".*InstanceNotFoundException.*"):
                jmx.read_attribute(mbean=table_view_read_time, attribute="Count", verbose=False)
            with pytest.raises(Exception, match=".*InstanceNotFoundException.*"):
                jmx.read_attribute(mbean=mv_memtable_size, attribute="Value", verbose=False)
            with pytest.raises(Exception, match=".*InstanceNotFoundException.*"):
                jmx.read_attribute(mbean=mv_view_lock_time, attribute="Count", verbose=False)
            with pytest.raises(Exception, match=".*InstanceNotFoundException.*"):
                jmx.read_attribute(mbean=mv_view_read_time, attribute="Count", verbose=False)
Example #41
0
    def test_multidatacenter_local_quorum(self):
        '''
        @jira_ticket CASSANDRA-13074

        If we do only local datacenters reads in a multidatacenter DES setup,
        DES should take effect and route around a degraded node
        '''
        def no_cross_dc(scores, cross_dc_nodes):
            return all('/' + k.address() not in scores for k in cross_dc_nodes)

        def snitchable(scores_before, scores_after, needed_nodes):
            return all('/' + k.address() in scores_before and '/' +
                       k.address() in scores_after for k in needed_nodes)

        cluster = self.cluster
        cluster.populate([3, 3])
        coordinator_node, healthy_node, degraded_node, node4, node5, node6 = cluster.nodelist(
        )
        # increase DES reset/update interval so we clear any cross-DC startup reads faster
        cluster.set_configuration_options(
            values={
                'dynamic_snitch_reset_interval_in_ms': 10000,
                'dynamic_snitch_update_interval_in_ms': 50,
                'phi_convict_threshold': 12
            })
        remove_perf_disable_shared_mem(coordinator_node)
        remove_perf_disable_shared_mem(degraded_node)
        # Delay reads on the degraded node by 50 milliseconds
        degraded_node.start(jvm_args=[
            '-Dcassandra.test.read_iteration_delay_ms=50',
            '-Dcassandra.allow_unsafe_join=true'
        ])
        cluster.start(wait_for_binary_proto=30, wait_other_notice=True)

        des = make_mbean('db', type='DynamicEndpointSnitch')
        read_stage = make_mbean('metrics',
                                type='ThreadPools',
                                path='request',
                                scope='ReadStage',
                                name='CompletedTasks')
        session = self.patient_exclusive_cql_connection(coordinator_node)
        session.execute(
            "CREATE KEYSPACE snitchtestks WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 3, 'dc2': 3}"
        )
        session.execute(
            "CREATE TABLE snitchtestks.tbl1 (key int PRIMARY KEY) WITH speculative_retry = 'NONE' AND dclocal_read_repair_chance = 0.0"
        )
        read_stmt = session.prepare(
            "SELECT * FROM snitchtestks.tbl1 where key = ?")
        read_stmt.consistency_level = ConsistencyLevel.LOCAL_QUORUM
        insert_stmt = session.prepare(
            "INSERT INTO snitchtestks.tbl1 (key) VALUES (?)")
        insert_stmt.consistency_level = ConsistencyLevel.ALL
        with JolokiaAgent(coordinator_node) as jmx:
            with JolokiaAgent(degraded_node) as bad_jmx:
                for x in range(0, 300):
                    session.execute(insert_stmt, [x])

                cleared = False
                # Wait for a snitch reset in case any earlier
                # startup process populated cross-DC read timings
                while not cleared:
                    scores = jmx.read_attribute(des, 'Scores')
                    cleared = ('/127.0.0.1' in scores and
                               (len(scores) == 1)) or not scores

                snitchable_count = 0

                for x in range(0, 300):
                    degraded_reads_before = bad_jmx.read_attribute(
                        read_stage, 'Value')
                    scores_before = jmx.read_attribute(des, 'Scores')
                    assert_true(
                        no_cross_dc(scores_before, [node4, node5, node6]),
                        "Cross DC scores were present: " + str(scores_before))
                    future = session.execute_async(read_stmt, [x])
                    future.result()
                    scores_after = jmx.read_attribute(des, 'Scores')
                    assert_true(
                        no_cross_dc(scores_after, [node4, node5, node6]),
                        "Cross DC scores were present: " + str(scores_after))

                    if snitchable(
                            scores_before, scores_after,
                        [coordinator_node, healthy_node, degraded_node]):
                        snitchable_count = snitchable_count + 1
                        # If the DES correctly routed the read around the degraded node,
                        # it shouldn't have another completed read request in metrics
                        assert_equal(
                            degraded_reads_before,
                            bad_jmx.read_attribute(read_stage, 'Value'))
                    else:
                        # sleep to give dynamic snitch time to recalculate scores
                        time.sleep(.1)

                # check that most reads were snitchable, with some
                # room allowed in case score recalculation is slow
                assert_greater_equal(snitchable_count, 250)
 def __init__(self, node):
     assert isinstance(node, Node)
     self.node = node
     self.jmx = JolokiaAgent(node)
     self.mbean = make_mbean("db", type="StorageProxy")
 def compact_sstable(self, node, sstable):
     mbean = make_mbean('db', type='CompactionManager')
     with JolokiaAgent(node) as jmx:
         jmx.execute_method(mbean, 'forceUserDefinedCompaction', [sstable])
def commitlog_size(node):
    commitlog_size_mbean = make_mbean('metrics', type='CommitLog', name='TotalCommitLogSize')
    with JolokiaAgent(node) as jmx:
        return jmx.read_attribute(commitlog_size_mbean, 'Value')
 def get_table_metric(self, keyspace, table, metric, attr="Count"):
     mbean = make_mbean("metrics", keyspace=keyspace, scope=table, type="Table", name=metric)
     return self.jmx.read_attribute(mbean, attr)
Example #46
0
    def test_compactionstats(self):
        """
        @jira_ticket CASSANDRA-10504
        @jira_ticket CASSANDRA-10427

        Test that jmx MBean used by nodetool compactionstats
        properly updates the progress of a compaction
        """

        cluster = self.cluster
        cluster.populate(1)
        node = cluster.nodelist()[0]
        remove_perf_disable_shared_mem(node)
        cluster.start(wait_for_binary_proto=True)

        # Run a quick stress command to create the keyspace and table
        node.stress(['write', 'n=1', 'no-warmup'])
        # Disable compaction on the table
        node.nodetool('disableautocompaction keyspace1 standard1')
        node.nodetool('setcompactionthroughput 1')
        node.stress(['write', 'n=150K', 'no-warmup'])
        node.flush()
        # Run a major compaction. This will be the compaction whose
        # progress we track.
        node.nodetool_process('compact')
        # We need to sleep here to give compaction time to start
        # Why not do something smarter? Because if the bug regresses,
        # we can't rely on jmx to tell us that compaction started.
        time.sleep(5)

        compaction_manager = make_mbean('db', type='CompactionManager')
        with JolokiaAgent(node) as jmx:
            progress_string = jmx.read_attribute(compaction_manager,
                                                 'CompactionSummary')[0]

            # Pause in between reads
            # to allow compaction to move forward
            time.sleep(2)

            updated_progress_string = jmx.read_attribute(
                compaction_manager, 'CompactionSummary')[0]
            var = 'Compaction@{uuid}(keyspace1, standard1, {progress}/{total})bytes'
            progress = int(
                parse.search(var, progress_string).named['progress'])
            updated_progress = int(
                parse.search(var, updated_progress_string).named['progress'])

            debug(progress_string)
            debug(updated_progress_string)

            # We want to make sure that the progress is increasing,
            # and that values other than zero are displayed.
            self.assertGreater(updated_progress, progress)
            self.assertGreaterEqual(progress, 0)
            self.assertGreater(updated_progress, 0)

            # Block until the major compaction is complete
            # Otherwise nodetool will throw an exception
            # Give a timeout, in case compaction is broken
            # and never ends.
            start = time.time()
            max_query_timeout = 600
            debug("Waiting for compaction to finish:")
            while (len(
                    jmx.read_attribute(compaction_manager,
                                       'CompactionSummary')) >
                   0) and (time.time() - start < max_query_timeout):
                debug(
                    jmx.read_attribute(compaction_manager,
                                       'CompactionSummary'))
                time.sleep(2)
Example #47
0
    def mv_metric_mbeans_release_test(self):
        """
        Test that the right mbeans are created and released when creating mvs
        """
        cluster = self.cluster
        cluster.populate(1)
        node = cluster.nodelist()[0]
        remove_perf_disable_shared_mem(node)
        cluster.start(wait_for_binary_proto=True)

        node.run_cqlsh(cmds="""
            CREATE KEYSPACE mvtest WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor': 1 };
            CREATE TABLE mvtest.testtable (
                foo int,
                bar text,
                baz text,
                PRIMARY KEY (foo, bar)
            );

            CREATE MATERIALIZED VIEW mvtest.testmv AS
                SELECT foo, bar, baz FROM mvtest.testtable WHERE
                foo IS NOT NULL AND bar IS NOT NULL AND baz IS NOT NULL
            PRIMARY KEY (foo, bar, baz);""")

        table_memtable_size = make_mbean('metrics',
                                         type='Table',
                                         keyspace='mvtest',
                                         scope='testtable',
                                         name='AllMemtablesHeapSize')
        table_view_read_time = make_mbean('metrics',
                                          type='Table',
                                          keyspace='mvtest',
                                          scope='testtable',
                                          name='ViewReadTime')
        table_view_lock_time = make_mbean('metrics',
                                          type='Table',
                                          keyspace='mvtest',
                                          scope='testtable',
                                          name='ViewLockAcquireTime')
        mv_memtable_size = make_mbean('metrics',
                                      type='Table',
                                      keyspace='mvtest',
                                      scope='testmv',
                                      name='AllMemtablesHeapSize')
        mv_view_read_time = make_mbean('metrics',
                                       type='Table',
                                       keyspace='mvtest',
                                       scope='testmv',
                                       name='ViewReadTime')
        mv_view_lock_time = make_mbean('metrics',
                                       type='Table',
                                       keyspace='mvtest',
                                       scope='testmv',
                                       name='ViewLockAcquireTime')

        missing_metric_message = "Table metric %s should have been registered after creating table %s" \
                                 "but wasn't!"

        with JolokiaAgent(node) as jmx:
            self.assertIsNotNone(
                jmx.read_attribute(table_memtable_size, "Value"),
                missing_metric_message.format("AllMemtablesHeapSize",
                                              "testtable"))
            self.assertIsNotNone(
                jmx.read_attribute(table_view_read_time, "Count"),
                missing_metric_message.format("ViewReadTime", "testtable"))
            self.assertIsNotNone(
                jmx.read_attribute(table_view_lock_time, "Count"),
                missing_metric_message.format("ViewLockAcquireTime",
                                              "testtable"))
            self.assertIsNotNone(
                jmx.read_attribute(mv_memtable_size, "Value"),
                missing_metric_message.format("AllMemtablesHeapSize",
                                              "testmv"))
            self.assertRaisesRegexp(Exception,
                                    ".*InstanceNotFoundException.*",
                                    jmx.read_attribute,
                                    mbean=mv_view_read_time,
                                    attribute="Count",
                                    verbose=False)
            self.assertRaisesRegexp(Exception,
                                    ".*InstanceNotFoundException.*",
                                    jmx.read_attribute,
                                    mbean=mv_view_lock_time,
                                    attribute="Count",
                                    verbose=False)

        node.run_cqlsh(cmds="DROP KEYSPACE mvtest;")
        with JolokiaAgent(node) as jmx:
            self.assertRaisesRegexp(Exception,
                                    ".*InstanceNotFoundException.*",
                                    jmx.read_attribute,
                                    mbean=table_memtable_size,
                                    attribute="Value",
                                    verbose=False)
            self.assertRaisesRegexp(Exception,
                                    ".*InstanceNotFoundException.*",
                                    jmx.read_attribute,
                                    mbean=table_view_lock_time,
                                    attribute="Count",
                                    verbose=False)
            self.assertRaisesRegexp(Exception,
                                    ".*InstanceNotFoundException.*",
                                    jmx.read_attribute,
                                    mbean=table_view_read_time,
                                    attribute="Count",
                                    verbose=False)
            self.assertRaisesRegexp(Exception,
                                    ".*InstanceNotFoundException.*",
                                    jmx.read_attribute,
                                    mbean=mv_memtable_size,
                                    attribute="Value",
                                    verbose=False)
            self.assertRaisesRegexp(Exception,
                                    ".*InstanceNotFoundException.*",
                                    jmx.read_attribute,
                                    mbean=mv_view_lock_time,
                                    attribute="Count",
                                    verbose=False)
            self.assertRaisesRegexp(Exception,
                                    ".*InstanceNotFoundException.*",
                                    jmx.read_attribute,
                                    mbean=mv_view_read_time,
                                    attribute="Count",
                                    verbose=False)
Example #48
0
 def __init__(self, node):
     assert isinstance(node, Node)
     self.node = node
     self.jmx = JolokiaAgent(node)
     self.mbean = make_mbean("db", type="StorageProxy")
Example #49
0
    def test_mv_metric_mbeans_release(self):
        """
        Test that the right mbeans are created and released when creating mvs
        """
        cluster = self.cluster
        cluster.set_configuration_options({'enable_materialized_views': 'true'})
        cluster.populate(1)
        node = cluster.nodelist()[0]
        cluster.start()

        node.run_cqlsh(cmds="""
            CREATE KEYSPACE mvtest WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor': 1 };
            CREATE TABLE mvtest.testtable (
                foo int,
                bar text,
                baz text,
                PRIMARY KEY (foo, bar)
            );

            CREATE MATERIALIZED VIEW mvtest.testmv AS
                SELECT foo, bar, baz FROM mvtest.testtable WHERE
                foo IS NOT NULL AND bar IS NOT NULL AND baz IS NOT NULL
            PRIMARY KEY (foo, bar, baz);""")

        table_memtable_size = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testtable',
                                         name='AllMemtablesHeapSize')
        table_view_read_time = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testtable',
                                          name='ViewReadTime')
        table_view_lock_time = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testtable',
                                          name='ViewLockAcquireTime')
        mv_memtable_size = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testmv',
                                      name='AllMemtablesHeapSize')
        mv_view_read_time = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testmv',
                                       name='ViewReadTime')
        mv_view_lock_time = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testmv',
                                       name='ViewLockAcquireTime')

        missing_metric_message = "Table metric %s should have been registered after creating table %s" \
                                 "but wasn't!"

        with JolokiaAgent(node) as jmx:
            assert jmx.read_attribute(table_memtable_size, "Value") is not None, \
                missing_metric_message.format("AllMemtablesHeapSize", "testtable")
            assert jmx.read_attribute(table_view_read_time, "Count") is not None, \
                missing_metric_message.format("ViewReadTime", "testtable")
            assert jmx.read_attribute(table_view_lock_time, "Count") is not None, \
                missing_metric_message.format("ViewLockAcquireTime", "testtable")
            assert jmx.read_attribute(mv_memtable_size, "Value") is not None, \
                missing_metric_message.format("AllMemtablesHeapSize", "testmv")
            with pytest.raises(Exception, match=".*InstanceNotFoundException.*"):
                jmx.read_attribute(mbean=mv_view_read_time, attribute="Count", verbose=False)
            with pytest.raises(Exception, match=".*InstanceNotFoundException.*"):
                jmx.read_attribute(mbean=mv_view_lock_time, attribute="Count", verbose=False)

        node.run_cqlsh(cmds="DROP KEYSPACE mvtest;")
        with JolokiaAgent(node) as jmx:
            with pytest.raises(Exception, match=".*InstanceNotFoundException.*"):
                jmx.read_attribute(mbean=table_memtable_size, attribute="Value", verbose=False)
            with pytest.raises(Exception, match=".*InstanceNotFoundException.*"):
                jmx.read_attribute(mbean=table_view_lock_time, attribute="Count", verbose=False)
            with pytest.raises(Exception, match=".*InstanceNotFoundException.*"):
                jmx.read_attribute(mbean=table_view_read_time, attribute="Count", verbose=False)
            with pytest.raises(Exception, match=".*InstanceNotFoundException.*"):
                jmx.read_attribute(mbean=mv_memtable_size, attribute="Value", verbose=False)
            with pytest.raises(Exception, match=".*InstanceNotFoundException.*"):
                jmx.read_attribute(mbean=mv_view_lock_time, attribute="Count", verbose=False)
            with pytest.raises(Exception, match=".*InstanceNotFoundException.*"):
                jmx.read_attribute(mbean=mv_view_read_time, attribute="Count", verbose=False)
 def _get_metric(self, metric):
     mbean = make_mbean("metrics", type="ReadRepair", name=metric)
     return self.jmx.read_attribute(mbean, "Count")
 def compact_sstable(self, node, sstable):
     mbean = make_mbean('db', type='CompactionManager')
     with JolokiaAgent(node) as jmx:
         jmx.execute_method(mbean, 'forceUserDefinedCompaction', [sstable])