def query_users(self, session):
     ret = list(session.execute("SELECT * FROM users"))
     ret.extend(list(session.execute("SELECT * FROM users WHERE state='TX'")))
     ret.extend(list(session.execute("SELECT * FROM users WHERE gender='f'")))
     ret.extend(list(session.execute("SELECT * FROM users WHERE birth_year=1978")))
     assert_length_equal(ret, 8)
     return ret
Example #2
0
    def simple_increment_test(self):
        """ Simple incrementation test (Created for #3465, that wasn't a bug) """
        cluster = self.cluster

        cluster.populate(3).start()
        nodes = cluster.nodelist()

        session = self.patient_cql_connection(nodes[0])
        self.create_ks(session, 'ks', 3)
        self.create_cf(session, 'cf', validation="CounterColumnType", columns={'c': 'counter'})

        sessions = [self.patient_cql_connection(node, 'ks') for node in nodes]
        nb_increment = 50
        nb_counter = 10

        for i in xrange(0, nb_increment):
            for c in xrange(0, nb_counter):
                session = sessions[(i + c) % len(nodes)]
                query = SimpleStatement("UPDATE cf SET c = c + 1 WHERE key = 'counter%i'" % c, consistency_level=ConsistencyLevel.QUORUM)
                session.execute(query)

            session = sessions[i % len(nodes)]
            keys = ",".join(["'counter%i'" % c for c in xrange(0, nb_counter)])
            query = SimpleStatement("SELECT key, c FROM cf WHERE key IN (%s)" % keys, consistency_level=ConsistencyLevel.QUORUM)
            res = list(session.execute(query))

            assert_length_equal(res, nb_counter)
            for c in xrange(0, nb_counter):
                self.assertEqual(len(res[c]), 2, "Expecting key and counter for counter {}, got {}".format(c, str(res[c])))
                self.assertEqual(res[c][1], i + 1, "Expecting counter {} = {}, got {}".format(c, i + 1, res[c][0]))
    def test_upgrade_with_range_tombstone_eoc_0(self):
        """
        Check sstable upgrading when the sstable contains a range tombstone with EOC=0.

        @jira_ticket CASSANDRA-12423
        """
        session = self._setup_cluster(cluster_options={'start_rpc': 'true'})

        session.execute("CREATE TABLE rt (id INT, c1 TEXT, c2 TEXT, v INT, PRIMARY KEY (id, c1, c2)) "
                        "with compact storage and compression = {'sstable_compression': ''};")

        range_delete = {
            i32(1): {
                'rt': [Mutation(deletion=Deletion(2470761440040513,
                                                  predicate=SlicePredicate(slice_range=SliceRange(
                                                      start=composite('a', eoc='\x00'),
                                                      finish=composite('asd', eoc='\x00')))))]
            }
        }

        client = get_thrift_client()
        client.transport.open()
        client.set_keyspace('ks')
        client.batch_mutate(range_delete, ConsistencyLevel.ONE)
        client.transport.close()

        session.execute("INSERT INTO rt (id, c1, c2, v) VALUES (1, 'asd', '', 0) USING TIMESTAMP 1470761451368658")
        session.execute("INSERT INTO rt (id, c1, c2, v) VALUES (1, 'asd', 'asd', 0) USING TIMESTAMP 1470761449416613")

        session = self._do_upgrade()

        ret = list(session.execute('SELECT * FROM rt'))
        assert_length_equal(ret, 2)
Example #4
0
    def test_upgrade_with_range_tombstone_eoc_0(self):
        """
        Check sstable upgrading when the sstable contains a range tombstone with EOC=0.

        @jira_ticket CASSANDRA-12423
        """
        session = self._setup_cluster(cluster_options={'start_rpc': 'true'})

        session.execute("CREATE TABLE rt (id INT, c1 TEXT, c2 TEXT, v INT, PRIMARY KEY (id, c1, c2)) "
                        "with compact storage and compression = {'sstable_compression': ''};")

        range_delete = {
            i32(1): {
                'rt': [Mutation(deletion=Deletion(2470761440040513,
                                                  predicate=SlicePredicate(slice_range=SliceRange(
                                                      start=composite('a', eoc=b'\x00'),
                                                      finish=composite('asd', eoc=b'\x00')))))]
            }
        }

        client = get_thrift_client()
        client.transport.open()
        client.set_keyspace('ks')
        client.batch_mutate(range_delete, ConsistencyLevel.ONE)
        client.transport.close()

        session.execute("INSERT INTO rt (id, c1, c2, v) VALUES (1, 'asd', '', 0) USING TIMESTAMP 1470761451368658")
        session.execute("INSERT INTO rt (id, c1, c2, v) VALUES (1, 'asd', 'asd', 0) USING TIMESTAMP 1470761449416613")

        session = self._do_upgrade()

        ret = list(session.execute('SELECT * FROM rt'))
        assert_length_equal(ret, 2)
    def large_compaction_warning_test(self):
        """
        @jira_ticket CASSANDRA-9643
        Check that we log a warning when the partition size is bigger than compaction_large_partition_warning_threshold_mb
        """
        cluster = self.cluster
        cluster.set_configuration_options({'compaction_large_partition_warning_threshold_mb': 1})
        cluster.populate(1).start(wait_for_binary_proto=True)
        [node] = cluster.nodelist()

        session = self.patient_cql_connection(node)
        self.create_ks(session, 'ks', 1)

        mark = node.mark_log()
        strlen = (1024 * 1024) / 100
        session.execute("CREATE TABLE large(userid text PRIMARY KEY, properties map<int, text>) with compression = {}")
        for i in range(200):  # ensures partition size larger than compaction_large_partition_warning_threshold_mb
            session.execute("UPDATE ks.large SET properties[%i] = '%s' WHERE userid = 'user'" % (i, get_random_word(strlen)))

        ret = list(session.execute("SELECT properties from ks.large where userid = 'user'"))
        assert_length_equal(ret, 1)
        self.assertEqual(200, len(ret[0][0].keys()))

        node.flush()

        node.nodetool('compact ks large')
        verb = 'Writing' if self.cluster.version() > '2.2' else 'Compacting'
        sizematcher = '\d+ bytes' if LooseVersion(self.cluster.version()) < LooseVersion('3.6') else '\d+\.\d{3}(K|M|G)iB'
        node.watch_log_for('{} large partition ks/large:user \({}\)'.format(verb, sizematcher), from_mark=mark, timeout=180)

        ret = list(session.execute("SELECT properties from ks.large where userid = 'user'"))
        assert_length_equal(ret, 1)
        self.assertEqual(200, len(ret[0][0].keys()))
    def simple_increment_test(self):
        """ Simple incrementation test (Created for #3465, that wasn't a bug) """
        cluster = self.cluster

        cluster.populate(3).start()
        nodes = cluster.nodelist()

        session = self.patient_cql_connection(nodes[0])
        create_ks(session, 'ks', 3)
        create_cf(session, 'cf', validation="CounterColumnType", columns={'c': 'counter'})

        sessions = [self.patient_cql_connection(node, 'ks') for node in nodes]
        nb_increment = 50
        nb_counter = 10

        for i in xrange(0, nb_increment):
            for c in xrange(0, nb_counter):
                session = sessions[(i + c) % len(nodes)]
                query = SimpleStatement("UPDATE cf SET c = c + 1 WHERE key = 'counter%i'" % c, consistency_level=ConsistencyLevel.QUORUM)
                session.execute(query)

            session = sessions[i % len(nodes)]
            keys = ",".join(["'counter%i'" % c for c in xrange(0, nb_counter)])
            query = SimpleStatement("SELECT key, c FROM cf WHERE key IN (%s)" % keys, consistency_level=ConsistencyLevel.QUORUM)
            res = list(session.execute(query))

            assert_length_equal(res, nb_counter)
            for c in xrange(0, nb_counter):
                self.assertEqual(len(res[c]), 2, "Expecting key and counter for counter {}, got {}".format(c, str(res[c])))
                self.assertEqual(res[c][1], i + 1, "Expecting counter {} = {}, got {}".format(c, i + 1, res[c][0]))
    def large_compaction_warning_test(self):
        """
        @jira_ticket CASSANDRA-9643
        Check that we log a warning when the partition size is bigger than compaction_large_partition_warning_threshold_mb
        """
        cluster = self.cluster
        cluster.set_configuration_options({'compaction_large_partition_warning_threshold_mb': 1})
        cluster.populate(1).start(wait_for_binary_proto=True)
        [node] = cluster.nodelist()

        session = self.patient_cql_connection(node)
        create_ks(session, 'ks', 1)

        mark = node.mark_log()
        strlen = (1024 * 1024) / 100
        session.execute("CREATE TABLE large(userid text PRIMARY KEY, properties map<int, text>) with compression = {}")
        for i in range(200):  # ensures partition size larger than compaction_large_partition_warning_threshold_mb
            session.execute("UPDATE ks.large SET properties[%i] = '%s' WHERE userid = 'user'" % (i, get_random_word(strlen)))

        ret = list(session.execute("SELECT properties from ks.large where userid = 'user'"))
        assert_length_equal(ret, 1)
        self.assertEqual(200, len(ret[0][0].keys()))

        node.flush()

        node.nodetool('compact ks large')
        verb = 'Writing' if self.cluster.version() > '2.2' else 'Compacting'
        sizematcher = '\d+ bytes' if self.cluster.version() < LooseVersion('3.6') else '\d+\.\d{3}(K|M|G)iB'
        node.watch_log_for('{} large partition ks/large:user \({}'.format(verb, sizematcher), from_mark=mark, timeout=180)

        ret = list(session.execute("SELECT properties from ks.large where userid = 'user'"))
        assert_length_equal(ret, 1)
        self.assertEqual(200, len(ret[0][0].keys()))
 def launch_nodetool_cmd(self, cmd):
     """
     Launch a nodetool command and check the result is empty (no error)
     """
     node1 = self.cluster.nodelist()[0]
     response = node1.nodetool(cmd).stdout
     if not common.is_win():  # nodetool always prints out on windows
         assert_length_equal(response, 0)  # nodetool does not print anything unless there is an error
    def test_assertions(self):
        # assert_exception_test
        mock_session = Mock(
            **
            {'execute.side_effect': AlreadyExists("Dummy exception message.")})
        assert_exception(mock_session, "DUMMY QUERY", expected=AlreadyExists)

        # assert_unavailable_test
        mock_session = Mock(**{
            'execute.side_effect':
            Unavailable("Dummy Unavailabile message.")
        })
        assert_unavailable(mock_session.execute)

        # assert_invalid_test
        mock_session = Mock(**{
            'execute.side_effect':
            InvalidRequest("Dummy InvalidRequest message.")
        })
        assert_invalid(mock_session, "DUMMY QUERY")

        # assert_unauthorized_test
        mock_session = Mock(**{
            'execute.side_effect':
            Unauthorized("Dummy Unauthorized message.")
        })
        assert_unauthorized(mock_session, "DUMMY QUERY", None)

        # assert_one_test
        mock_session = Mock()
        mock_session.execute = Mock(return_value=[[1, 1]])
        assert_one(mock_session, "SELECT * FROM test", [1, 1])

        # assert_none_test
        mock_session = Mock()
        mock_session.execute = Mock(return_value=[])
        assert_none(mock_session, "SELECT * FROM test")

        # assert_all_test
        mock_session = Mock()
        mock_session.execute = Mock(return_value=[[i, i]
                                                  for i in range(0, 10)])
        assert_all(mock_session,
                   "SELECT k, v FROM test", [[i, i] for i in range(0, 10)],
                   ignore_order=True)

        # assert_almost_equal_test
        assert_almost_equal(1, 1.1, 1.2, 1.9, error=1.0)

        # assert_row_count_test
        mock_session = Mock()
        mock_session.execute = Mock(return_value=[[1]])
        assert_row_count(mock_session, 'test', 1)

        # assert_length_equal_test
        check = [1, 2, 3, 4]
        assert_length_equal(check, 4)
Example #10
0
    def test_13747(self):
        """
        @jira_ticket CASSANDRA-13747
        """
        cluster = self.cluster

        # disable hinted handoff and set batch commit log so this doesn't interfere with the test
        cluster.set_configuration_options(
            values={'hinted_handoff_enabled': False})
        cluster.set_batch_commitlog(enabled=True)

        cluster.populate(2).start(wait_other_notice=True)
        node1, node2 = cluster.nodelist()

        session = self.patient_cql_connection(node1)

        query = "CREATE KEYSPACE IF NOT EXISTS test WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': 2};"
        session.execute(query)

        query = "CREATE TABLE IF NOT EXISTS test.test (id int PRIMARY KEY);"
        session.execute(query)

        #
        # populate the table with 10 rows:
        #

        # -7509452495886106294 |  5
        # -4069959284402364209 |  1 x
        # -3799847372828181882 |  8
        # -3485513579396041028 |  0 x
        # -3248873570005575792 |  2
        # -2729420104000364805 |  4 x
        #  1634052884888577606 |  7
        #  2705480034054113608 |  6 x
        #  3728482343045213994 |  9
        #  9010454139840013625 |  3 x

        stmt = session.prepare("INSERT INTO test.test (id) VALUES (?);")
        for id in range(0, 10):
            session.execute(stmt, [id], ConsistencyLevel.ALL)

        # with node2 down and hints disabled, delete every other row on node1
        node2.stop(wait_other_notice=True)
        session.execute("DELETE FROM test.test WHERE id IN (1, 0, 4, 6, 3);")

        # with both nodes up, do a DISTINCT range query with CL.ALL;
        # prior to CASSANDRA-13747 this would cause an assertion in short read protection code
        node2.start(wait_other_notice=True)
        stmt = SimpleStatement("SELECT DISTINCT token(id), id FROM test.test;",
                               consistency_level=ConsistencyLevel.ALL)
        result = list(session.execute(stmt))
        assert_length_equal(result, 5)
Example #11
0
def _validate_dense_thrift(client, cf='dense_super_1'):
    client.transport.open()
    client.set_keyspace('ks')
    result = client.get_slice('k1', ColumnParent(cf), SlicePredicate(slice_range=SliceRange('', '', False, 5)), ConsistencyLevel.ONE)
    assert_length_equal(result, 2)
    assert result[0].super_column.name == 'key1'
    assert result[1].super_column.name == 'key2'

    print((result[0]))
    print((result[1]))
    for cosc in result:
        assert cosc.super_column.columns[0].name == _i64(100)
        assert cosc.super_column.columns[0].value == 'value1'
    def test_13747(self):
        """
        @jira_ticket CASSANDRA-13747
        """
        cluster = self.cluster

        # disable hinted handoff and set batch commit log so this doesn't interfere with the test
        cluster.set_configuration_options(values={'hinted_handoff_enabled': False})
        cluster.set_batch_commitlog(enabled=True)

        cluster.populate(2).start(wait_other_notice=True)
        node1, node2 = cluster.nodelist()

        session = self.patient_cql_connection(node1)

        query = "CREATE KEYSPACE IF NOT EXISTS test WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': 2};"
        session.execute(query)

        query = "CREATE TABLE IF NOT EXISTS test.test (id int PRIMARY KEY);"
        session.execute(query)

        #
        # populate the table with 10 rows:
        #

        # -7509452495886106294 |  5
        # -4069959284402364209 |  1 x
        # -3799847372828181882 |  8
        # -3485513579396041028 |  0 x
        # -3248873570005575792 |  2
        # -2729420104000364805 |  4 x
        #  1634052884888577606 |  7
        #  2705480034054113608 |  6 x
        #  3728482343045213994 |  9
        #  9010454139840013625 |  3 x

        stmt = session.prepare("INSERT INTO test.test (id) VALUES (?);")
        for id in range(0, 10):
            session.execute(stmt, [id], ConsistencyLevel.ALL)

        # with node2 down and hints disabled, delete every other row on node1
        node2.stop(wait_other_notice=True)
        session.execute("DELETE FROM test.test WHERE id IN (1, 0, 4, 6, 3);")

        # with both nodes up, do a DISTINCT range query with CL.ALL;
        # prior to CASSANDRA-13747 this would cause an assertion in short read protection code
        node2.start(wait_other_notice=True)
        stmt = SimpleStatement("SELECT DISTINCT token(id), id FROM test.test;",
                               consistency_level = ConsistencyLevel.ALL)
        result = list(session.execute(stmt))
        assert_length_equal(result, 5)
    def _deprecated_repair_jmx(self, method, arguments):
        """
        * Launch a two node, two DC cluster
        * Create a keyspace and table
        * Insert some data
        * Call the deprecated repair JMX API based on the arguments passed into this method
        * Check the node log to see if the correct repair was performed based on the jmx args
        """
        cluster = self.cluster

        logger.debug("Starting cluster..")
        cluster.populate([1, 1])
        node1, node2 = cluster.nodelist()
        remove_perf_disable_shared_mem(node1)
        cluster.start()
        supports_pull_repair = cluster.version() >= LooseVersion('3.10')

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 2)
        create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL)

        # Run repair
        mbean = make_mbean('db', 'StorageService')
        with JolokiaAgent(node1) as jmx:
            # assert repair runs and returns valid cmd number
            assert jmx.execute_method(mbean, method, arguments) == 1
        # wait for log to start
        node1.watch_log_for("Starting repair command")
        # get repair parameters from the log
        line = node1.grep_log(("Starting repair command #1" + (" \([^\)]+\)" if cluster.version() >= LooseVersion("3.10") else "") +
                               ", repairing keyspace ks with repair options \(parallelism: (?P<parallelism>\w+), primary range: (?P<pr>\w+), "
                               "incremental: (?P<incremental>\w+), job threads: (?P<jobs>\d+), ColumnFamilies: (?P<cfs>.+), dataCenters: (?P<dc>.+), "
                               "hosts: (?P<hosts>.+), # of ranges: (?P<ranges>\d+)(, pull repair: (?P<pullrepair>true|false))?\)"))

        assert_length_equal(line, 1)
        line, m = line[0]

        if supports_pull_repair:
            assert m.group("pullrepair"), "false" == "Pull repair cannot be enabled through the deprecated API so the pull repair option should always be false."

        return {"parallelism": m.group("parallelism"),
                "primary_range": m.group("pr"),
                "incremental": m.group("incremental"),
                "job_threads": m.group("jobs"),
                "column_families": m.group("cfs"),
                "data_centers": m.group("dc"),
                "hosts": m.group("hosts"),
                "ranges": m.group("ranges")}
    def _deprecated_repair_jmx(self, method, arguments):
        """
        * Launch a two node, two DC cluster
        * Create a keyspace and table
        * Insert some data
        * Call the deprecated repair JMX API based on the arguments passed into this method
        * Check the node log to see if the correct repair was performed based on the jmx args
        """
        cluster = self.cluster

        logger.debug("Starting cluster..")
        cluster.populate([1, 1])
        node1, node2 = cluster.nodelist()
        cluster.start()
        supports_pull_repair = cluster.version() >= LooseVersion('3.10')

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 2)
        create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL)

        # Run repair
        mbean = make_mbean('db', 'StorageService')
        with JolokiaAgent(node1) as jmx:
            # assert repair runs and returns valid cmd number
            assert jmx.execute_method(mbean, method, arguments) == 1
        # wait for log to start
        node1.watch_log_for("Starting repair command")
        # get repair parameters from the log
        line = node1.grep_log((r"Starting repair command #1" + (r" \([^\)]+\)" if cluster.version() >= LooseVersion("3.10") else "") +
                               r", repairing keyspace ks with repair options \(parallelism: (?P<parallelism>\w+), primary range: (?P<pr>\w+), "
                               r"incremental: (?P<incremental>\w+), job threads: (?P<jobs>\d+), ColumnFamilies: (?P<cfs>.+), dataCenters: (?P<dc>.+), "
                               r"hosts: (?P<hosts>.+), # of ranges: (?P<ranges>\d+)(, pull repair: (?P<pullrepair>true|false))?(, ignore unreplicated keyspaces: (?P<ignoreunrepl>true|false))?\)"))

        assert_length_equal(line, 1)
        line, m = line[0]

        if supports_pull_repair:
            assert m.group("pullrepair"), "false" == "Pull repair cannot be enabled through the deprecated API so the pull repair option should always be false."

        return {"parallelism": m.group("parallelism"),
                "primary_range": m.group("pr"),
                "incremental": m.group("incremental"),
                "job_threads": m.group("jobs"),
                "column_families": m.group("cfs"),
                "data_centers": m.group("dc"),
                "hosts": m.group("hosts"),
                "ranges": m.group("ranges")}
def _validate_sparse_thrift(client, cf='sparse_super_1'):
    client.transport.open()
    client.set_keyspace('ks')
    result = client.get_slice('k1', ColumnParent(cf), SlicePredicate(slice_range=SliceRange('', '', False, 5)), ConsistencyLevel.ONE)
    assert_length_equal(result, 2)
    assert_equal(result[0].super_column.name, 'key1')
    assert_equal(result[1].super_column.name, 'key2')

    for cosc in result:
        assert_equal(cosc.super_column.columns[0].name, 'col1')
        assert_equal(cosc.super_column.columns[0].value, _i64(200))
        assert_equal(cosc.super_column.columns[1].name, 'col2')
        assert_equal(cosc.super_column.columns[1].value, _i64(300))
        assert_equal(cosc.super_column.columns[2].name, 'value1')
        assert_equal(cosc.super_column.columns[2].value, _i64(100))
def _validate_sparse_thrift(client, cf='sparse_super_1'):
    try:
        client.transport.open()
    except:
        pass
    client.set_keyspace('ks')
    result = client.get_slice('k1'.encode(), ColumnParent(cf), SlicePredicate(slice_range=SliceRange(''.encode(), ''.encode(), False, 5)), ConsistencyLevel.ONE)
    assert_length_equal(result, 2)
    assert result[0].super_column.name == 'key1'.encode()
    assert result[1].super_column.name == 'key2'.encode()

    for cosc in result:
        assert cosc.super_column.columns[0].name == 'col1'.encode()
        assert cosc.super_column.columns[0].value == _i64(200)
        assert cosc.super_column.columns[1].name == 'col2'.encode()
        assert cosc.super_column.columns[1].value == _i64(300)
        assert cosc.super_column.columns[2].name == 'value1'.encode()
        assert cosc.super_column.columns[2].value == _i64(100)
def _validate_sparse_thrift(client, cf='sparse_super_1'):
    try:
        client.transport.open()
    except:
        pass
    client.set_keyspace('ks')
    result = client.get_slice('k1'.encode(), ColumnParent(cf), SlicePredicate(slice_range=SliceRange(''.encode(), ''.encode(), False, 5)), ConsistencyLevel.ONE)
    assert_length_equal(result, 2)
    assert result[0].super_column.name == 'key1'.encode()
    assert result[1].super_column.name == 'key2'.encode()

    for cosc in result:
        assert cosc.super_column.columns[0].name == 'col1'.encode()
        assert cosc.super_column.columns[0].value == _i64(200)
        assert cosc.super_column.columns[1].name == 'col2'.encode()
        assert cosc.super_column.columns[1].value == _i64(300)
        assert cosc.super_column.columns[2].name == 'value1'.encode()
        assert cosc.super_column.columns[2].value == _i64(100)
    def assertions_test(self):
        # assert_exception_test
        mock_session = Mock(**{'execute.side_effect': AlreadyExists("Dummy exception message.")})
        assert_exception(mock_session, "DUMMY QUERY", expected=AlreadyExists)

        # assert_unavailable_test
        mock_session = Mock(**{'execute.side_effect': Unavailable("Dummy Unavailabile message.")})
        assert_unavailable(mock_session.execute)

        # assert_invalid_test
        mock_session = Mock(**{'execute.side_effect': InvalidRequest("Dummy InvalidRequest message.")})
        assert_invalid(mock_session, "DUMMY QUERY")

        # assert_unauthorized_test
        mock_session = Mock(**{'execute.side_effect': Unauthorized("Dummy Unauthorized message.")})
        assert_unauthorized(mock_session, "DUMMY QUERY", None)

        # assert_one_test
        mock_session = Mock()
        mock_session.execute = Mock(return_value=[[1, 1]])
        assert_one(mock_session, "SELECT * FROM test", [1, 1])

        # assert_none_test
        mock_session = Mock()
        mock_session.execute = Mock(return_value=[])
        assert_none(mock_session, "SELECT * FROM test")

        # assert_all_test
        mock_session = Mock()
        mock_session.execute = Mock(return_value=[[i, i] for i in range(0, 10)])
        assert_all(mock_session, "SELECT k, v FROM test", [[i, i] for i in range(0, 10)], ignore_order=True)

        # assert_almost_equal_test
        assert_almost_equal(1, 1.1, 1.2, 1.9, error=1.0)

        # assert_row_count_test
        mock_session = Mock()
        mock_session.execute = Mock(return_value=[[1]])
        assert_row_count(mock_session, 'test', 1)

        # assert_length_equal_test
        check = [1, 2, 3, 4]
        assert_length_equal(check, 4)
    def test_column_index_stress(self):
        """Write a large number of columns to a single row and set
        'column_index_size_in_kb' to a sufficiently low value to force
        the creation of a column index. The test will then randomly
        read columns from that row and ensure that all data is
        returned. See CASSANDRA-5225.
        """
        cluster = self.cluster
        cluster.populate(1).start()
        (node1, ) = cluster.nodelist()
        cluster.set_configuration_options(
            values={'column_index_size_in_kb':
                    1})  # reduce this value to force column index creation
        session = self.patient_cql_connection(node1)
        create_ks(session, 'wide_rows', 1)

        create_table_query = 'CREATE TABLE test_table (row varchar, name varchar, value int, PRIMARY KEY (row, name));'
        session.execute(create_table_query)

        # Now insert 100,000 columns to row 'row0'
        insert_column_query = "UPDATE test_table SET value = {value} WHERE row = '{row}' AND name = '{name}';"
        for i in range(100000):
            row = 'row0'
            name = 'val' + str(i)
            session.execute(
                insert_column_query.format(value=i, row=row, name=name))

        # now randomly fetch columns: 1 to 3 at a time
        for i in range(10000):
            select_column_query = "SELECT value FROM test_table WHERE row='row0' AND name in ('{name1}', '{name2}', '{name3}');"
            values2fetch = [str(random.randint(0, 99999)) for i in range(3)]
            # values2fetch is a list of random values.  Because they are random, they will not be unique necessarily.
            # To simplify the template logic in the select_column_query I will not expect the query to
            # necessarily return 3 values.  Hence I am computing the number of unique values in values2fetch
            # and using that in the assert at the end.
            expected_rows = len(set(values2fetch))
            rows = list(
                session.execute(
                    select_column_query.format(name1="val" + values2fetch[0],
                                               name2="val" + values2fetch[1],
                                               name3="val" + values2fetch[2])))
            assert_length_equal(rows, expected_rows)
    def ghosts_test(self):
        """ Check range ghost are correctly removed by the system """
        cluster = self.cluster
        cluster.populate(1).start()
        [node1] = cluster.nodelist()

        time.sleep(.5)
        session = self.cql_connection(node1)
        self.create_ks(session, 'ks', 1)
        self.create_cf(session, 'cf', gc_grace=0, columns={'c': 'text'})

        rows = 1000

        for i in xrange(0, rows):
            session.execute("UPDATE cf SET c = 'value' WHERE key = 'k%i'" % i)

        res = list(session.execute("SELECT * FROM cf LIMIT 10000"))
        assert_length_equal(res, rows)

        node1.flush()

        for i in xrange(0, rows / 2):
            session.execute("DELETE FROM cf WHERE key = 'k%i'" % i)

        res = list(session.execute("SELECT * FROM cf LIMIT 10000"))
        # no ghosts in 1.2+
        assert_length_equal(res, rows / 2)

        node1.flush()
        time.sleep(1)  # make sure tombstones are collected
        node1.compact()

        res = list(session.execute("SELECT * FROM cf LIMIT 10000"))
        assert_length_equal(res, rows / 2)
    def test_ghosts(self):
        """ Check range ghost are correctly removed by the system """
        cluster = self.cluster
        cluster.populate(1).start()
        [node1] = cluster.nodelist()

        time.sleep(.5)
        session = self.cql_connection(node1)
        create_ks(session, 'ks', 1)
        create_cf(session, 'cf', gc_grace=0, columns={'c': 'text'})

        rows = 1000

        for i in range(0, rows):
            session.execute("UPDATE cf SET c = 'value' WHERE key = 'k%i'" % i)

        res = list(session.execute("SELECT * FROM cf LIMIT 10000"))
        assert_length_equal(res, rows)

        node1.flush()

        for i in range(0, rows // 2):
            session.execute("DELETE FROM cf WHERE key = 'k%i'" % i)

        res = list(session.execute("SELECT * FROM cf LIMIT 10000"))
        # no ghosts in 1.2+
        assert_length_equal(res, rows / 2)

        node1.flush()
        time.sleep(1)  # make sure tombstones are collected
        node1.compact()

        res = list(session.execute("SELECT * FROM cf LIMIT 10000"))
        assert_length_equal(res, rows / 2)
Example #22
0
    def test_column_index_stress(self):
        """Write a large number of columns to a single row and set
        'column_index_size_in_kb' to a sufficiently low value to force
        the creation of a column index. The test will then randomly
        read columns from that row and ensure that all data is
        returned. See CASSANDRA-5225.
        """
        cluster = self.cluster
        cluster.populate(1).start()
        (node1,) = cluster.nodelist()
        cluster.set_configuration_options(values={'column_index_size_in_kb': 1})  # reduce this value to force column index creation
        session = self.patient_cql_connection(node1)
        create_ks(session, 'wide_rows', 1)

        create_table_query = 'CREATE TABLE test_table (row varchar, name varchar, value int, PRIMARY KEY (row, name));'
        session.execute(create_table_query)

        # Now insert 100,000 columns to row 'row0'
        insert_column_query = "UPDATE test_table SET value = {value} WHERE row = '{row}' AND name = '{name}';"
        for i in range(100000):
            row = 'row0'
            name = 'val' + str(i)
            session.execute(insert_column_query.format(value=i, row=row, name=name))

        # now randomly fetch columns: 1 to 3 at a time
        for i in range(10000):
            select_column_query = "SELECT value FROM test_table WHERE row='row0' AND name in ('{name1}', '{name2}', '{name3}');"
            values2fetch = [str(random.randint(0, 99999)) for i in range(3)]
            # values2fetch is a list of random values.  Because they are random, they will not be unique necessarily.
            # To simplify the template logic in the select_column_query I will not expect the query to
            # necessarily return 3 values.  Hence I am computing the number of unique values in values2fetch
            # and using that in the assert at the end.
            expected_rows = len(set(values2fetch))
            rows = list(session.execute(select_column_query.format(name1="val" + values2fetch[0],
                                                                   name2="val" + values2fetch[1],
                                                                   name3="val" + values2fetch[2])))
            assert_length_equal(rows, expected_rows)
Example #23
0
    def test_cdc_data_available_in_cdc_raw(self):
        ks_name = 'ks'
        # First, create a new node just for data generation.
        generation_node, generation_session = self.prepare(ks_name=ks_name)

        cdc_table_info = TableInfo(
            ks_name=ks_name,
            table_name='cdc_tab',
            column_spec=_16_uuid_column_spec,
            insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'cdc_tab'),
            options={
                'cdc': 'true',
                # give table an explicit id so when we create it again it's the
                # same table and we can replay into it
                'id': uuid.uuid4()
            })

        # Write until we get a new CL segment to avoid replaying initialization
        # mutations from this node's startup into system tables in the other
        # node. See CASSANDRA-11811.
        advance_to_next_cl_segment(session=generation_session,
                                   commitlog_dir=os.path.join(
                                       generation_node.get_path(),
                                       'commitlogs'))

        generation_session.execute(cdc_table_info.create_stmt)

        # insert 10000 rows
        inserted_rows = _insert_rows(generation_session, cdc_table_info.name,
                                     cdc_table_info.insert_stmt,
                                     repeat((), 10000))

        # drain the node to guarantee all cl segments will be recycled
        logger.debug('draining')
        generation_node.drain()
        logger.debug('stopping')
        # stop the node and clean up all sessions attached to it
        generation_session.cluster.shutdown()
        generation_node.stop()

        # We can rely on the existing _cdc.idx files to determine which .log files contain cdc data.
        source_path = os.path.join(generation_node.get_path(), 'cdc_raw')
        source_cdc_indexes = {
            ReplayData.load(source_path, name)
            for name in source_path if name.endswith('_cdc.idx')
        }
        # assertNotEqual(source_cdc_indexes, {})
        assert source_cdc_indexes != {}

        # create a new node to use for cdc_raw cl segment replay
        loading_node = self._init_new_loading_node(
            ks_name, cdc_table_info.create_stmt,
            self.cluster.version() < '4')

        # move cdc_raw contents to commitlog directories, then start the
        # node again to trigger commitlog replay, which should replay the
        # cdc_raw files we moved to commitlogs into memtables.
        logger.debug('moving cdc_raw and restarting node')
        _move_commitlog_segments(
            os.path.join(generation_node.get_path(), 'cdc_raw'),
            os.path.join(loading_node.get_path(), 'commitlogs'))
        loading_node.start(wait_for_binary_proto=True)
        logger.debug('node successfully started; waiting on log replay')
        loading_node.grep_log('Log replay complete')
        logger.debug('log replay complete')

        # final assertions
        validation_session = self.patient_exclusive_cql_connection(
            loading_node)
        data_in_cdc_table_after_restart = rows_to_list(
            validation_session.execute('SELECT * FROM ' + cdc_table_info.name))
        logger.debug('found {cdc} values in CDC table'.format(
            cdc=len(data_in_cdc_table_after_restart)))

        # Then we assert that the CDC data that we expect to be there is there.
        # All data that was in CDC tables should have been copied to cdc_raw,
        # then used in commitlog replay, so it should be back in the cluster.
        assert (inserted_rows == data_in_cdc_table_after_restart
                ), 'not all expected data selected'

        if self.cluster.version() >= '4.0':
            # Create ReplayData objects for each index file found in loading cluster
            loading_path = os.path.join(loading_node.get_path(), 'cdc_raw')
            dest_cdc_indexes = [
                ReplayData.load(loading_path, name)
                for name in os.listdir(loading_path)
                if name.endswith('_cdc.idx')
            ]

            # Compare source replay data to dest to ensure replay process created both hard links and index files.
            for srd in source_cdc_indexes:
                # Confirm both log and index are in dest
                assert os.path.isfile(os.path.join(loading_path, srd.idx_name))
                assert os.path.isfile(os.path.join(loading_path, srd.log_name))

                # Find dest ReplayData that corresponds to the source (should be exactly 1)
                corresponding_dest_replay_datae = [
                    x for x in dest_cdc_indexes if srd.idx_name == x.idx_name
                ]
                assert_length_equal(corresponding_dest_replay_datae, 1)
                drd = corresponding_dest_replay_datae[0]

                # We can't compare equality on offsets since replay uses the raw file length as the written
                # cdc offset. We *can*, however, confirm that the offset in the replayed file is >=
                # the source file, ensuring clients are signaled to replay at least all the data in the
                # log.
                assert drd.offset >= srd.offset

                # Confirm completed flag is the same in both
                assert srd.completed == drd.completed

            # Confirm that the relationship between index files on the source
            # and destination looks like we expect.
            # First, grab the mapping between the two, make sure it's a 1-1
            # mapping, and transform the dict to reflect that:
            src_to_dest_idx_map = {
                src_rd: [
                    dest_rd for dest_rd in dest_cdc_indexes
                    if dest_rd.idx_name == src_rd.idx_name
                ]
                for src_rd in source_cdc_indexes
            }
            for src_rd, dest_rds in src_to_dest_idx_map.items():
                assert_length_equal(dest_rds, 1)
                src_to_dest_idx_map[src_rd] = dest_rds[0]
            # All offsets in idx files that were copied should be >0 on the
            # destination node.
            assert (
                0 not in {i.offset for i in src_to_dest_idx_map.values()}),\
                ('Found index offsets == 0 in an index file on the '
                 'destination node that corresponds to an index file on the '
                 'source node:\n'
                 '{}').format(pformat(src_to_dest_idx_map))
            # Offsets of all shared indexes should be >= on the destination
            # than on the source.
            for src_rd, dest_rd in src_to_dest_idx_map.items():
                assert dest_rd.offset >= src_rd.offset

            src_to_dest_idx_map = {
                src_rd: [
                    dest_rd for dest_rd in dest_cdc_indexes
                    if dest_rd.idx_name == src_rd.idx_name
                ]
                for src_rd in source_cdc_indexes
            }
            for k, v in src_to_dest_idx_map.items():
                assert_length_equal(v, 1)
                assert k.offset >= v.offset
Example #24
0
    def test_cdc_data_available_in_cdc_raw(self):
        ks_name = 'ks'
        # First, create a new node just for data generation.
        generation_node, generation_session = self.prepare(ks_name=ks_name)

        cdc_table_info = TableInfo(
            ks_name=ks_name, table_name='cdc_tab',
            column_spec=_16_uuid_column_spec,
            insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'cdc_tab'),
            options={
                'cdc': 'true',
                # give table an explicit id so when we create it again it's the
                # same table and we can replay into it
                'id': uuid.uuid4()
            }
        )

        # Write until we get a new CL segment to avoid replaying initialization
        # mutations from this node's startup into system tables in the other
        # node. See CASSANDRA-11811.
        advance_to_next_cl_segment(
            session=generation_session,
            commitlog_dir=os.path.join(generation_node.get_path(), 'commitlogs')
        )

        generation_session.execute(cdc_table_info.create_stmt)

        # insert 10000 rows
        inserted_rows = _insert_rows(generation_session, cdc_table_info.name, cdc_table_info.insert_stmt,
                                     repeat((), 10000))

        # drain the node to guarantee all cl segments will be recycled
        logger.debug('draining')
        generation_node.drain()
        logger.debug('stopping')
        # stop the node and clean up all sessions attached to it
        generation_session.cluster.shutdown()
        generation_node.stop()

        # We can rely on the existing _cdc.idx files to determine which .log files contain cdc data.
        source_path = os.path.join(generation_node.get_path(), 'cdc_raw')
        source_cdc_indexes = {ReplayData.load(source_path, name)
                              for name in source_path if name.endswith('_cdc.idx')}
        # assertNotEqual(source_cdc_indexes, {})
        assert source_cdc_indexes != {}

        # create a new node to use for cdc_raw cl segment replay
        loading_node = self._init_new_loading_node(ks_name, cdc_table_info.create_stmt, self.cluster.version() < '4')

        # move cdc_raw contents to commitlog directories, then start the
        # node again to trigger commitlog replay, which should replay the
        # cdc_raw files we moved to commitlogs into memtables.
        logger.debug('moving cdc_raw and restarting node')
        _move_commitlog_segments(
            os.path.join(generation_node.get_path(), 'cdc_raw'),
            os.path.join(loading_node.get_path(), 'commitlogs')
        )
        loading_node.start(wait_for_binary_proto=True)
        logger.debug('node successfully started; waiting on log replay')
        loading_node.grep_log('Log replay complete')
        logger.debug('log replay complete')

        # final assertions
        validation_session = self.patient_exclusive_cql_connection(loading_node)
        data_in_cdc_table_after_restart = rows_to_list(
            validation_session.execute('SELECT * FROM ' + cdc_table_info.name)
        )
        logger.debug('found {cdc} values in CDC table'.format(
            cdc=len(data_in_cdc_table_after_restart)
        ))

        # Then we assert that the CDC data that we expect to be there is there.
        # All data that was in CDC tables should have been copied to cdc_raw,
        # then used in commitlog replay, so it should be back in the cluster.
        assert (inserted_rows == data_in_cdc_table_after_restart), 'not all expected data selected'

        if self.cluster.version() >= '4.0':
            # Create ReplayData objects for each index file found in loading cluster
            loading_path = os.path.join(loading_node.get_path(), 'cdc_raw')
            dest_cdc_indexes = [ReplayData.load(loading_path, name)
                                for name in os.listdir(loading_path) if name.endswith('_cdc.idx')]

            # Compare source replay data to dest to ensure replay process created both hard links and index files.
            for srd in source_cdc_indexes:
                # Confirm both log and index are in dest
                assert os.path.isfile(os.path.join(loading_path, srd.idx_name))
                assert os.path.isfile(os.path.join(loading_path, srd.log_name))

                # Find dest ReplayData that corresponds to the source (should be exactly 1)
                corresponding_dest_replay_datae = [x for x in dest_cdc_indexes
                                                   if srd.idx_name == x.idx_name]
                assert_length_equal(corresponding_dest_replay_datae, 1)
                drd = corresponding_dest_replay_datae[0]

                # We can't compare equality on offsets since replay uses the raw file length as the written
                # cdc offset. We *can*, however, confirm that the offset in the replayed file is >=
                # the source file, ensuring clients are signaled to replay at least all the data in the
                # log.
                assert drd.offset >= srd.offset

                # Confirm completed flag is the same in both
                assert srd.completed == drd.completed

            # Confirm that the relationship between index files on the source
            # and destination looks like we expect.
            # First, grab the mapping between the two, make sure it's a 1-1
            # mapping, and transform the dict to reflect that:
            src_to_dest_idx_map = {
                src_rd: [dest_rd for dest_rd in dest_cdc_indexes
                         if dest_rd.idx_name == src_rd.idx_name]
                for src_rd in source_cdc_indexes
            }
            for src_rd, dest_rds in src_to_dest_idx_map.items():
                assert_length_equal(dest_rds, 1)
                src_to_dest_idx_map[src_rd] = dest_rds[0]
            # All offsets in idx files that were copied should be >0 on the
            # destination node.
            assert (
                0 not in {i.offset for i in src_to_dest_idx_map.values()}),\
                ('Found index offsets == 0 in an index file on the '
                 'destination node that corresponds to an index file on the '
                 'source node:\n'
                 '{}').format(pformat(src_to_dest_idx_map))
            # Offsets of all shared indexes should be >= on the destination
            # than on the source.
            for src_rd, dest_rd in src_to_dest_idx_map.items():
                assert dest_rd.offset >= src_rd.offset

            src_to_dest_idx_map = {
                src_rd: [dest_rd for dest_rd in dest_cdc_indexes
                         if dest_rd.idx_name == src_rd.idx_name]
                for src_rd in source_cdc_indexes
            }
            for k, v in src_to_dest_idx_map.items():
                assert_length_equal(v, 1)
                assert k.offset >= v.offset
    def short_read_test(self):
        """
        @jira_ticket CASSANDRA-9460
        """
        cluster = self.cluster

        # Disable hinted handoff and set batch commit log so this doesn't
        # interfer with the test
        cluster.set_configuration_options(values={'hinted_handoff_enabled': False})
        cluster.set_batch_commitlog(enabled=True)

        cluster.populate(3).start(wait_other_notice=True)
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 3)
        create_cf(session, 'cf', read_repair=0.0)

        normal_key = 'normal'
        reversed_key = 'reversed'

        # Repeat this test 10 times to make it more easy to spot a null pointer exception caused by a race, see CASSANDRA-9460
        for k in xrange(10):
            # insert 9 columns in two rows
            insert_columns(self, session, normal_key, 9)
            insert_columns(self, session, reversed_key, 9)

            # Delete 3 first columns (and 3 last columns, for the reversed version) with a different node dead each time
            for node, column_number_to_delete in zip(range(1, 4), range(3)):
                self.stop_node(node)
                self.delete(node, normal_key, column_number_to_delete)
                self.delete(node, reversed_key, 8 - column_number_to_delete)
                self.restart_node(node)

            # Query 3 firsts columns in normal order
            session = self.patient_cql_connection(node1, 'ks')
            query = SimpleStatement(
                'SELECT c, v FROM cf WHERE key=\'k{}\' LIMIT 3'.format(normal_key),
                consistency_level=ConsistencyLevel.QUORUM)
            rows = list(session.execute(query))
            res = rows
            assert_length_equal(res, 3)

            # value 0, 1 and 2 have been deleted
            for i in xrange(1, 4):
                self.assertEqual('value{}'.format(i + 2), res[i - 1][1])

            # Query 3 firsts columns in reverse order
            session = self.patient_cql_connection(node1, 'ks')
            query = SimpleStatement(
                'SELECT c, v FROM cf WHERE key=\'k{}\' ORDER BY c DESC LIMIT 3'.format(reversed_key),
                consistency_level=ConsistencyLevel.QUORUM)
            rows = list(session.execute(query))
            res = rows
            assert_length_equal(res, 3)

            # value 6, 7 and 8 have been deleted
            for i in xrange(0, 3):
                self.assertEqual('value{}'.format(5 - i), res[i][1])

            session.execute('TRUNCATE cf')
 def query_users(self, session):
     ret = list(session.execute("SELECT * FROM users"))
     assert_length_equal(ret, 5)
     return ret
Example #27
0
    def short_read_test(self):
        """
        @jira_ticket CASSANDRA-9460
        """
        cluster = self.cluster

        # Disable hinted handoff and set batch commit log so this doesn't
        # interfer with the test
        cluster.set_configuration_options(
            values={'hinted_handoff_enabled': False})
        cluster.set_batch_commitlog(enabled=True)

        cluster.populate(3).start(wait_other_notice=True)
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 3)
        self.create_cf(session, 'cf', read_repair=0.0)

        normal_key = 'normal'
        reversed_key = 'reversed'

        # Repeat this test 10 times to make it more easy to spot a null pointer exception caused by a race, see CASSANDRA-9460
        for k in xrange(10):
            # insert 9 columns in two rows
            insert_columns(self, session, normal_key, 9)
            insert_columns(self, session, reversed_key, 9)

            # Delete 3 first columns (and 3 last columns, for the reversed version) with a different node dead each time
            for node, column_number_to_delete in zip(range(1, 4), range(3)):
                self.stop_node(node)
                self.delete(node, normal_key, column_number_to_delete)
                self.delete(node, reversed_key, 8 - column_number_to_delete)
                self.restart_node(node)

            # Query 3 firsts columns in normal order
            session = self.patient_cql_connection(node1, 'ks')
            query = SimpleStatement(
                'SELECT c, v FROM cf WHERE key=\'k{}\' LIMIT 3'.format(
                    normal_key),
                consistency_level=ConsistencyLevel.QUORUM)
            rows = list(session.execute(query))
            res = rows
            assert_length_equal(res, 3)

            # value 0, 1 and 2 have been deleted
            for i in xrange(1, 4):
                self.assertEqual('value{}'.format(i + 2), res[i - 1][1])

            # Query 3 firsts columns in reverse order
            session = self.patient_cql_connection(node1, 'ks')
            query = SimpleStatement(
                'SELECT c, v FROM cf WHERE key=\'k{}\' ORDER BY c DESC LIMIT 3'
                .format(reversed_key),
                consistency_level=ConsistencyLevel.QUORUM)
            rows = list(session.execute(query))
            res = rows
            assert_length_equal(res, 3)

            # value 6, 7 and 8 have been deleted
            for i in xrange(0, 3):
                self.assertEqual('value{}'.format(5 - i), res[i][1])

            session.execute('TRUNCATE cf')