Example #1
0
def test_cdc_alter_table_drop_column(scylla_only, cql, test_keyspace):
    schema = "pk int primary key, v int"
    extra = " with cdc = {'enabled': true}"
    with new_test_table(cql, test_keyspace, schema, extra) as table:
        cql.execute(f"insert into {table} (pk, v) values (0, 0)")
        cql.execute(f"insert into {table} (pk, v) values (1, null)")
        flush(cql, table)
        flush(cql, table + "_scylla_cdc_log")
        cql.execute(f"alter table {table} drop v")
        cql.execute(f"select * from {table}_scylla_cdc_log")
Example #2
0
def simple_no_clustering_table(cql, keyspace):
    table = util.unique_name()
    schema = f"CREATE TABLE {keyspace}.{table} (pk int PRIMARY KEY , v int)"

    cql.execute(schema)

    for pk in range(0, 10):
        cql.execute(f"INSERT INTO {keyspace}.{table} (pk, v) VALUES ({pk}, 0)")

    nodetool.flush(cql, f"{keyspace}.{table}")

    return table, schema
Example #3
0
def table_with_counters(cql, keyspace):
    table = util.unique_name()
    schema = f"CREATE TABLE {keyspace}.{table} (pk int PRIMARY KEY, v counter)"

    cql.execute(schema)

    for pk in range(0, 10):
        for c in range(0, 4):
            cql.execute(f"UPDATE {keyspace}.{table} SET v = v + 1 WHERE pk = {pk};")

    nodetool.flush(cql, f"{keyspace}.{table}")

    return table, schema
Example #4
0
def clustering_table_with_collection(cql, keyspace):
    table = util.unique_name()
    schema = f"CREATE TABLE {keyspace}.{table} (pk int, ck int, v map<int, text>, PRIMARY KEY (pk, ck))"

    cql.execute(schema)

    for pk in range(0, 10):
        for ck in range(0, 10):
            map_vals = {f"{p}: '{c}'" for p in range(0, pk) for c in range(0, ck)}
            map_str = ", ".join(map_vals)
            cql.execute(f"INSERT INTO {keyspace}.{table} (pk, ck, v) VALUES ({pk}, {ck}, {{{map_str}}})")

    nodetool.flush(cql, f"{keyspace}.{table}")

    return table, schema
Example #5
0
def clustering_table_with_udt(cql, keyspace):
    table = util.unique_name()
    create_type_schema = f"CREATE TYPE {keyspace}.type1 (f1 int, f2 text)"
    create_table_schema = f" CREATE TABLE {keyspace}.{table} (pk int, ck int, v type1, PRIMARY KEY (pk, ck))"

    cql.execute(create_type_schema)
    cql.execute(create_table_schema)

    for pk in range(0, 10):
        for ck in range(0, 10):
            cql.execute(f"INSERT INTO {keyspace}.{table} (pk, ck, v) VALUES ({pk}, {ck}, {{f1: 100, f2: 'asd'}})")

    nodetool.flush(cql, f"{keyspace}.{table}")

    return table, "; ".join((create_type_schema, create_table_schema))
Example #6
0
def test_partitions_estimate_only_deletions(cassandra_bug, cql, test_keyspace):
    N = 1000
    with new_test_table(cql, test_keyspace, 'k int PRIMARY KEY') as table:
        delete = cql.prepare(f"DELETE FROM {table} WHERE k=?")
        for i in range(N):
            cql.execute(delete, [i])
        nodetool.flush(cql, table)
        nodetool.refreshsizeestimates(cql)
        table_name = table[len(test_keyspace)+1:]
        counts = [x.partitions_count for x in cql.execute(
            f"SELECT partitions_count FROM system.size_estimates WHERE keyspace_name = '{test_keyspace}' AND table_name = '{table_name}'")]
        count = sum(counts)
        print(counts)
        print(count)
        # Count should be close to 0, not to N
        assert count < N/1.25
Example #7
0
def test_create_large_static_cells_and_rows(cql, test_keyspace):
    '''Test that `large_data_handler` successfully reports large static cells
    and static rows and this doesn't cause a crash of Scylla server.

    This is a regression test for https://github.com/scylladb/scylla/issues/6780'''
    schema = "pk int, ck int, user_ids set<text> static, PRIMARY KEY (pk, ck)"
    with new_test_table(cql, test_keyspace, schema) as table:
        insert_stmt = cql.prepare(
            f"INSERT INTO {table} (pk, ck, user_ids) VALUES (?, ?, ?)")
        # Default large data threshold for cells is 1 mb, for rows it is 10 mb.
        # Take 10 mb cell to trigger large data reporting code both for
        # static cells and static rows simultaneously.
        large_set = {'x' * 1024 * 1024 * 10}
        cql.execute(insert_stmt, [1, 1, large_set])

        nodetool.flush(cql, table)
Example #8
0
def test_mv_empty_string_partition_key(cql, test_keyspace):
    schema = 'p int, v text, primary key (p)'
    with new_test_table(cql, test_keyspace, schema) as table:
        with new_materialized_view(cql, table, '*', 'v, p',
                                   'v is not null and p is not null') as mv:
            cql.execute(f"INSERT INTO {table} (p,v) VALUES (123, '')")
            # Note that because cql-pytest runs on a single node, view
            # updates are synchronous, and we can read the view immediately
            # without retrying. In a general setup, this test would require
            # retries.
            # The view row with the empty partition key should exist.
            # In #9375, this failed in Scylla:
            assert list(cql.execute(f"SELECT * FROM {mv}")) == [('', 123)]
            # Verify that we can flush an sstable with just an one partition
            # with an empty-string key (in the past we had a summary-file
            # sanity check preventing this from working).
            nodetool.flush(cql, mv)
Example #9
0
def test_mv_empty_string_partition_key_individual(cassandra_bug, cql,
                                                  test_keyspace):
    schema = 'p int, v text, primary key (p)'
    with new_test_table(cql, test_keyspace, schema) as table:
        with new_materialized_view(cql, table, '*', 'v, p',
                                   'v is not null and p is not null') as mv:
            # Insert a bunch of (p,v) rows. One of the v's is the empty
            # string, which we would like to test, but let's insert more
            # rows to make it more likely to exercise various possibilities
            # of token ordering (see #9352).
            rows = [[123, ''], [1, 'dog'], [2, 'cat'], [700, 'hello'],
                    [3, 'horse']]
            for row in rows:
                cql.execute(
                    f"INSERT INTO {table} (p,v) VALUES ({row[0]}, '{row[1]}')")
            # Note that because cql-pytest runs on a single node, view
            # updates are synchronous, and we can read the view immediately
            # without retrying. In a general setup, this test would require
            # retries.
            # Check that we can read the individual partition with the
            # empty-string key:
            assert list(cql.execute(f"SELECT * FROM {mv} WHERE v=''")) == [
                ('', 123)
            ]
            # The SELECT above works from cache. However, empty partition
            # keys also used to be special-cased and be buggy when reading
            # and writing sstables, so let's verify that the empty partition
            # key can actually be written and read from disk, by forcing a
            # memtable flush and bypassing the cache on read.
            # In the past Scylla used to fail this flush because the sstable
            # layer refused to write empty partition keys to the sstable:
            nodetool.flush(cql, mv)
            # First try a full-table scan, and then try to read the
            # individual partition with the empty key:
            assert set(cql.execute(f"SELECT * FROM {mv} BYPASS CACHE")) == {
                (x[1], x[0])
                for x in rows
            }
            # Issue #9352 used to prevent us finding WHERE v='' here, even
            # when the data is known to exist (the above full-table scan
            # saw it!) and despite the fact that WHERE v='' is parsed
            # correctly because we tested above it works from memtables.
            assert list(
                cql.execute(
                    f"SELECT * FROM {mv} WHERE v='' BYPASS CACHE")) == [('',
                                                                         123)]
Example #10
0
def write_table_and_estimate_partitions(cql, test_keyspace, N):
    with new_test_table(cql, test_keyspace, 'k int PRIMARY KEY') as table:
        write = cql.prepare(f"INSERT INTO {table} (k) VALUES (?)")
        for i in range(N):
            cql.execute(write, [i])
        # Both Cassandra and Scylla do not include memtable data in their
        # estimates, so a nodetool.flush() is required to get a count.
        nodetool.flush(cql, table)
        # In Cassandra, the estimates may not be available until a
        # nodetool.refreshsizeestimates(). In Scylla it is not needed.
        nodetool.refreshsizeestimates(cql)
        # The size_estimates table has, for a keyspace/table partition, a
        # separate row for separate token ranges. We need to sum those up.
        table_name = table[len(test_keyspace)+1:]
        counts = [x.partitions_count for x in cql.execute(
            f"SELECT partitions_count FROM system.size_estimates WHERE keyspace_name = '{test_keyspace}' AND table_name = '{table_name}'")]
        count = sum(counts)
        print(counts)
        print(count)
        return count
Example #11
0
def test_partitions_estimate_full_overlap(cassandra_bug, cql, test_keyspace):
    N = 500
    with new_test_table(cql, test_keyspace, 'k int PRIMARY KEY') as table:
        write = cql.prepare(f"INSERT INTO {table} (k) VALUES (?)")
        for i in range(N):
            cql.execute(write, [i])
        nodetool.flush(cql, table)
        # And a second copy of the *same* data will end up in a second sstable:
        for i in range(N):
            cql.execute(write, [i])
        nodetool.flush(cql, table)
        # TODO: In Scylla we should use NullCompactionStrategy to avoid the two
        # sstables from immediately being compacted together.
        nodetool.refreshsizeestimates(cql)
        table_name = table[len(test_keyspace)+1:]
        counts = [x.partitions_count for x in cql.execute(
            f"SELECT partitions_count FROM system.size_estimates WHERE keyspace_name = '{test_keyspace}' AND table_name = '{table_name}'")]
        count = sum(counts)
        print(counts)
        print(count)
        assert count > N/1.5 and count < N*1.5
Example #12
0
def test_twcs_optimal_query_path(cql, test_keyspace, scylla_only):
    with new_test_table(
            cql, test_keyspace, "pk int, ck int, v int, PRIMARY KEY (pk, ck)",
            " WITH COMPACTION = {" + " 'compaction_window_size': '1'," +
            " 'compaction_window_unit': 'MINUTES'," +
            " 'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy' }"
    ) as table:
        cql.execute(f"INSERT INTO {table} (pk, ck, v) VALUES (0, 0, 0)")
        # Obviously, scanning the table should now return exactly one row:
        assert 1 == len(
            list(cql.execute(f"SELECT * FROM {table} WHERE pk = 0")))
        # We will now flush the memtable to disk, and execute the same
        # query again with BYPASS CACHE, to be sure to exercise the code that
        # reads from sstables. We will obviously expect to see the same one
        # result. Issue #8138 caused here zero results, as well as a crash
        # in the debug build.
        nodetool.flush(cql, table)
        assert 1 == len(
            list(
                cql.execute(
                    f"SELECT * FROM {table} WHERE pk = 0 BYPASS CACHE")))
Example #13
0
def flush(cql, table):
    nodetool.flush(cql, table)