def custom_null_indicator_template(self, indicator):
        """
        @param indicator the null indicator to be used in COPY

        A parametrized test that tests COPY with a given null indicator.
        """
        self.all_datatypes_prepare()
        self.session.execute("""
            CREATE TABLE testnullindicator (
                a int primary key,
                b text
            )""")
        insert_non_null = self.session.prepare("INSERT INTO testnullindicator (a, b) VALUES (?, ?)")
        execute_concurrent_with_args(self.session, insert_non_null,
                                     [(1, 'eggs'), (100, 'sausage')])
        insert_null = self.session.prepare("INSERT INTO testnullindicator (a) VALUES (?)")
        execute_concurrent_with_args(self.session, insert_null, [(2,), (200,)])

        self.tempfile = NamedTemporaryFile(delete=False)
        debug('Exporting to csv file: {name}'.format(name=self.tempfile.name))
        cmds = "COPY ks.testnullindicator TO '{name}'".format(name=self.tempfile.name)
        cmds += " WITH NULL = '{d}'".format(d=indicator)
        self.node1.run_cqlsh(cmds=cmds)

        results = list(self.session.execute("SELECT a, b FROM ks.testnullindicator"))
        results = [[indicator if value is None else value for value in row]
                   for row in results]

        self.assertCsvResultEqual(self.tempfile.name, results)
    def simple_test(self):
        """Test the SimpleStrategy on a 3 node cluster"""
        self.cluster.populate(3).start()
        time.sleep(5)
        node1 = self.cluster.nodelist()[0]
        self.conn = self.patient_cql_connection(node1)
        
        # Install a tracing cursor so we can get info about who the
        # coordinator is contacting: 
        self.conn.cursorclass = TracingCursor
        cursor = self.conn.cursor()

        replication_factor = 3
        self.create_ks(cursor, 'test', replication_factor)
        cursor.execute('CREATE TABLE test.test (id int PRIMARY KEY, value text)', trace=False)
        # Wait for table creation, otherwise trace times out -
        # CASSANDRA-5658
        time.sleep(5)

        for key, token in murmur3_hashes.items():
            cursor.execute("INSERT INTO test (id, value) VALUES (%s, 'asdf')" % key)
            time.sleep(5)
            trace = cursor.get_last_trace()
            stats = self.get_replicas_from_trace(trace)
            replicas_should_be = set(self.get_replicas_for_token(
                token, replication_factor))
            debug('\nreplicas should be: %s' % replicas_should_be)
            debug('replicas were: %s' % stats['replicas'])
            self.pprint_trace(trace)

            #Make sure the correct nodes are replicas:
            self.assertEqual(stats['replicas'], replicas_should_be)
            #Make sure that each replica node was contacted and
            #acknowledged the write:
            self.assertEqual(stats['nodes_sent_write'], stats['nodes_responded_write'])
 def setUp(self):
     self.validate_class_config()
     debug("Upgrade test beginning, setting CASSANDRA_VERSION to {}, and jdk to {}. (Prior values will be restored after test)."
           .format(self.UPGRADE_PATH.starting_version, self.UPGRADE_PATH.starting_meta.java_version))
     switch_jdks(self.UPGRADE_PATH.starting_meta.java_version)
     os.environ['CASSANDRA_VERSION'] = self.UPGRADE_PATH.starting_version
     super(UpgradeTester, self).setUp()
    def test_writing_use_header(self):
        """
        Test that COPY can write a CSV with a header by:

        - creating and populating a table,
        - exporting the contents of the table to a CSV file using COPY WITH
        HEADER = true
        - checking that the contents of the CSV file are the written values plus
        the header.
        """
        self.prepare()
        self.session.execute("""
            CREATE TABLE testheader (
                a int primary key,
                b int
            )""")
        insert_statement = self.session.prepare("INSERT INTO testheader (a, b) VALUES (?, ?)")
        args = [(1, 10), (2, 20), (3, 30)]
        execute_concurrent_with_args(self.session, insert_statement, args)

        self.tempfile = NamedTemporaryFile(delete=False)
        debug('Exporting to csv file: {name}'.format(name=self.tempfile.name))
        cmds = "COPY ks.testheader TO '{name}'".format(name=self.tempfile.name)
        cmds += " WITH HEADER = true"
        self.node1.run_cqlsh(cmds=cmds)

        with open(self.tempfile.name, 'r') as csvfile:
            csv_values = list(csv.reader(csvfile))

        self.assertItemsEqual(csv_values,
                              [['a', 'b'], ['1', '10'], ['2', '20'], ['3', '30']])
Example #5
0
    def test_basic_snapshot_and_restore(self):
        cluster = self.cluster
        cluster.populate(1).start()
        (node1,) = cluster.nodelist()
        cursor = self.patient_cql_connection(node1)
        self.create_ks(cursor, 'ks', 1)
        cursor.execute('CREATE TABLE ks.cf ( key int PRIMARY KEY, val text);')

        self.insert_rows(cursor, 0, 100)
        snapshot_dir = self.make_snapshot(node1, 'ks', 'cf', 'basic')

        # Write more data after the snapshot, this will get thrown
        # away when we restore:
        self.insert_rows(cursor, 100, 200)
        rows = cursor.execute('SELECT count(*) from ks.cf')
        self.assertEqual(rows[0][0], 200)

        # Drop the keyspace, make sure we have no data:
        cursor.execute('DROP KEYSPACE ks')
        self.create_ks(cursor, 'ks', 1)
        cursor.execute('CREATE TABLE ks.cf ( key int PRIMARY KEY, val text);')
        rows = cursor.execute('SELECT count(*) from ks.cf')
        self.assertEqual(rows[0][0], 0)

        # Restore data from snapshot:
        self.restore_snapshot(snapshot_dir, node1, 'ks', 'cf')
        node1.nodetool('refresh ks cf')
        rows = cursor.execute('SELECT count(*) from ks.cf')

        # clean up
        debug("removing snapshot_dir: " + snapshot_dir)
        shutil.rmtree(snapshot_dir)

        self.assertEqual(rows[0][0], 100)
Example #6
0
    def table_metric_mbeans_test(self):
        """
        Test some basic table metric mbeans with simple writes.
        """
        cluster = self.cluster
        cluster.populate(3)
        node1, node2, node3 = cluster.nodelist()
        remove_perf_disable_shared_mem(node1)
        cluster.start(wait_for_binary_proto=True)

        version = cluster.version()
        node1.stress(['write', 'n=10K', '-schema', 'replication(factor=3)'])

        typeName = "ColumnFamily" if version <= '2.2.X' else 'Table'
        debug('Version {} typeName {}'.format(version, typeName))

        # TODO the keyspace and table name are capitalized in 2.0
        memtable_size = make_mbean('metrics', type=typeName, keyspace='keyspace1', scope='standard1', name='AllMemtablesHeapSize')
        disk_size = make_mbean('metrics', type=typeName, keyspace='keyspace1', scope='standard1', name='LiveDiskSpaceUsed')
        sstable_count = make_mbean('metrics', type=typeName, keyspace='keyspace1', scope='standard1', name='LiveSSTableCount')

        with JolokiaAgent(node1) as jmx:
            mem_size = jmx.read_attribute(memtable_size, "Value")
            self.assertGreater(int(mem_size), 10000)

            on_disk_size = jmx.read_attribute(disk_size, "Count")
            self.assertEquals(int(on_disk_size), 0)

            node1.flush()

            on_disk_size = jmx.read_attribute(disk_size, "Count")
            self.assertGreater(int(on_disk_size), 10000)

            sstables = jmx.read_attribute(sstable_count, "Value")
            self.assertGreaterEqual(int(sstables), 1)
    def _deprecated_repair_jmx(self, method, arguments):
        cluster = self.cluster

        debug("Starting cluster..")
        cluster.populate([1, 1])
        node1, node2 = cluster.nodelist()
        remove_perf_disable_shared_mem(node1)
        cluster.start()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 2)
        self.create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL)

        # Run repair
        mbean = make_mbean('db', 'StorageService')
        with JolokiaAgent(node1) as jmx:
            # assert repair runs and returns valid cmd number
            self.assertEqual(jmx.execute_method(mbean, method, arguments), 1)
        # wait for log to start
        node1.watch_log_for("Starting repair command")
        # get repair parameters from the log
        l = node1.grep_log("Starting repair command #1, repairing keyspace ks with repair options \(parallelism: (?P<parallelism>\w+), primary range: (?P<pr>\w+), incremental: (?P<incremental>\w+), job threads: (?P<jobs>\d+), ColumnFamilies: (?P<cfs>.+), dataCenters: (?P<dc>.+), hosts: (?P<hosts>.+), # of ranges: (?P<ranges>\d+)\)")
        self.assertEqual(len(l), 1)
        line, m = l[0]
        return {"parallelism": m.group("parallelism"),
                "primary_range": m.group("pr"),
                "incremental": m.group("incremental"),
                "job_threads": m.group("jobs"),
                "column_families": m.group("cfs"),
                "data_centers": m.group("dc"),
                "hosts": m.group("hosts"),
                "ranges": m.group("ranges")}
    def decommission_node_test(self):
        debug("decommission_node_test()")
        cluster = self.cluster

        cluster.populate(1)
        # create and add a new node, I must not be a seed, otherwise
        # we get schema disagreement issues for awhile after decommissioning it.
        node2 = Node("node2", cluster, True, ("127.0.0.2", 9160), ("127.0.0.2", 7000), "7200", None)
        cluster.add(node2, False)

        [node1, node2] = cluster.nodelist()
        node1.start()
        node2.start()
        wait(2)

        cursor = self.cql_connection(node1).cursor()
        self.prepare_for_changes(cursor)

        node2.decommission()
        wait(30)

        self.validate_schema_consistent(node1)
        self.make_schema_changes(cursor, namespace="ns1")

        # create and add a new node
        node3 = Node("node3", cluster, True, ("127.0.0.3", 9160), ("127.0.0.3", 7000), "7300", None)

        cluster.add(node3, True)
        node3.start()

        wait(30)
        self.validate_schema_consistent(node1)
    def test_all_datatypes_read(self):
        """
        Test that, after COPYing a CSV file to a table containing all CQL
        datatypes, that the table contains the same values as the CSV by:

        - creating a table containing all datatypes,
        - writing a corresponding CSV file containing each datatype,
        - COPYing the CSV file into the table, and
        - asserting that the CSV file contains the same data as the table.
        """
        self.all_datatypes_prepare()

        self.tempfile = NamedTemporaryFile(delete=False)

        with open(self.tempfile.name, 'w') as csvfile:
            writer = csv.writer(csvfile)
            # serializing blob bytearray in friendly format
            data_set = list(self.data)
            data_set[2] = '0x{}'.format(''.join('%02x' % c for c in self.data[2]))
            writer.writerow(data_set)
            csvfile.close()

        debug('Importing from csv file: {name}'.format(name=self.tempfile.name))
        self.node1.run_cqlsh(cmds="COPY ks.testdatatype FROM '{name}'".format(name=self.tempfile.name))

        results = list(self.session.execute("SELECT * FROM testdatatype"))

        self.assertCsvResultEqual(self.tempfile.name, results)
    def test_all_datatypes_round_trip(self):
        """
        Test that a table containing all CQL datatypes successfully round-trips
        to and from a CSV file via COPY by:

        - creating and populating a table containing every datatype,
        - COPYing that table to a CSV file,
        - SELECTing the contents of the table,
        - TRUNCATEing the table,
        - COPYing the written CSV file back into the table, and
        - asserting that the previously-SELECTed contents of the table match the
        current contents of the table.
        """
        self.all_datatypes_prepare()

        insert_statement = self.session.prepare(
            """INSERT INTO testdatatype (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""")
        self.session.execute(insert_statement, self.data)

        self.tempfile = NamedTemporaryFile(delete=False)
        debug('Exporting to csv file: {name}'.format(name=self.tempfile.name))
        self.node1.run_cqlsh(cmds="COPY ks.testdatatype TO '{name}'".format(name=self.tempfile.name))

        exported_results = list(self.session.execute("SELECT * FROM testdatatype"))

        self.session.execute('TRUNCATE ks.testdatatype')

        self.node1.run_cqlsh(cmds="COPY ks.testdatatype FROM '{name}'".format(name=self.tempfile.name))

        imported_results = list(self.session.execute("SELECT * FROM testdatatype"))

        assert len(imported_results) == 1

        self.assertEqual(exported_results, imported_results)
    def changes_while_node_toggle_test(self):
        """
        makes schema changes while a node is down.

        Bring down 1 and change 2. 
        Bring down 2, bring up 1, and finally bring up 2. 
        1 should get the changes. 
        """
        debug("changes_while_node_toggle_test()")
        cluster = self.cluster
        cluster.populate(2).start()
        [node1, node2] = cluster.nodelist()
        wait(2)
        cursor = self.cql_connection(node1).cursor()

        cursor = self.cql_connection(node2).cursor()
        self.prepare_for_changes(cursor, namespace="ns2")
        node1.stop()
        wait(2)
        self.make_schema_changes(cursor, namespace="ns2")
        wait(2)
        node2.stop()
        wait(2)
        node1.start()
        node2.start()
        wait(20)
        self.validate_schema_consistent(node1)
Example #12
0
    def stop_commit_failure_policy_test(self):
        """ Test the stop_commit commitlog failure policy """
        self.prepare(configuration={
            'commit_failure_policy': 'stop_commit'
        })

        self.session1.execute("""
            INSERT INTO test (key, col1) VALUES (2, 2);
        """)

        self._provoke_commitlog_failure()
        failure = self.node1.grep_log("Failed .+ commit log segments. Commit disk failure policy is stop_commit; terminating thread")
        debug(failure)
        self.assertTrue(failure, "Cannot find the commitlog failure message in logs")
        self.assertTrue(self.node1.is_running(), "Node1 should still be running")

        # Cannot write anymore after the failure
        with self.assertRaises((OperationTimedOut, WriteTimeout)):
            self.session1.execute("""
              INSERT INTO test (key, col1) VALUES (2, 2);
            """)

        # Should be able to read
        assert_one(
            self.session1,
            "SELECT * FROM test where key=2;",
            [2, 2]
        )
    def _increment_counters(self, opcount=25000):
        debug("performing {opcount} counter increments".format(opcount=opcount))
        session = self.patient_cql_connection(self.node2, protocol_version=self.protocol_version)
        session.execute("use upgrade;")

        update_counter_query = ("UPDATE countertable SET c = c + 1 WHERE k1='{key1}' and k2={key2}")

        self.expected_counts = {}
        for i in range(10):
            self.expected_counts[uuid.uuid4()] = defaultdict(int)

        fail_count = 0

        for i in range(opcount):
            key1 = random.choice(self.expected_counts.keys())
            key2 = random.randint(1, 10)
            try:
                query = SimpleStatement(update_counter_query.format(key1=key1, key2=key2), consistency_level=ConsistencyLevel.ALL)
                session.execute(query)
            except WriteTimeout:
                fail_count += 1
            else:
                self.expected_counts[key1][key2] += 1
            if fail_count > 100:
                break

        self.assertLess(fail_count, 100, "Too many counter increment failures")
    def test_correct_dc_rack_in_nodetool_info(self):
        """
        @jira_ticket CASSANDRA-10382

        Test that nodetool info returns the correct rack and dc
        """

        cluster = self.cluster
        cluster.populate([2, 2])
        cluster.set_configuration_options(values={'endpoint_snitch': 'org.apache.cassandra.locator.GossipingPropertyFileSnitch'})

        for i, node in enumerate(cluster.nodelist()):
            with open(os.path.join(node.get_conf_dir(), 'cassandra-rackdc.properties'), 'w') as snitch_file:
                for line in ["dc={}".format(node.data_center), "rack=rack{}".format(i % 2)]:
                    snitch_file.write(line + os.linesep)

        cluster.start(wait_for_binary_proto=True)

        for i, node in enumerate(cluster.nodelist()):
            out, err = node.nodetool('info')
            self.assertEqual(0, len(err), err)
            debug(out)
            for line in out.split(os.linesep):
                if line.startswith('Data Center'):
                    self.assertTrue(line.endswith(node.data_center),
                                    "Expected dc {} for {} but got {}".format(node.data_center, node.address(), line.rsplit(None, 1)[-1]))
                elif line.startswith('Rack'):
                    rack = "rack{}".format(i % 2)
                    self.assertTrue(line.endswith(rack),
                                    "Expected rack {} for {} but got {}".format(rack, node.address(), line.rsplit(None, 1)[-1]))
    def create_lots_of_alters_concurrently_test(self):
        """
        create alters across multiple threads concurrently
        """
        cluster = self.cluster
        cluster.populate(3).start()

        node1, node2, node3 = cluster.nodelist()
        session = self.cql_connection(node1)
        session.execute("create keyspace lots_o_alters WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};")
        session.execute("use lots_o_alters")
        for n in range(10):
            session.execute("create table base_{0} (id uuid primary key)".format(n))
        wait(5)

        cmds = [("alter table base_{0} add c_{1} int".format(randrange(0, 10), n), ()) for n in range(500)]

        debug("executing 500 alters")
        results = execute_concurrent(session, cmds, raise_on_first_error=True, concurrency=150)

        for (success, result) in results:
            self.assertTrue(success, "didn't get success on table create: {}".format(result))

        debug("waiting for alters to propagate")
        wait(30)

        session.cluster.refresh_schema_metadata()
        table_meta = session.cluster.metadata.keyspaces["lots_o_alters"].tables
        column_ct = sum([len(table.columns) for table in table_meta.values()])

        # primary key + alters
        self.assertEqual(510, column_ct)
        self.validate_schema_consistent(node1)
        self.validate_schema_consistent(node2)
        self.validate_schema_consistent(node3)
    def test_read_old_sstables_after_upgrade(self):
        """ from 2.1 the location of sstables changed (CASSANDRA-5202), but existing sstables continue
        to be read from the old location. Verify that this works for index sstables as well as regular
        data column families (CASSANDRA-9116)
        """
        cluster = self.cluster

        # Forcing cluster version on purpose
        cluster.set_install_dir(version="2.0.12")
        if "memtable_allocation_type" in cluster._config_options:
            cluster._config_options.__delitem__("memtable_allocation_type")
        cluster.populate(1).start()

        [node1] = cluster.nodelist()
        session = self.patient_cql_connection(node1)
        self.create_ks(session, "index_upgrade", 1)
        session.execute("CREATE TABLE index_upgrade.table1 (k int PRIMARY KEY, v int)")
        session.execute("CREATE INDEX ON index_upgrade.table1(v)")
        session.execute("INSERT INTO index_upgrade.table1 (k,v) VALUES (0,0)")

        query = "SELECT * FROM index_upgrade.table1 WHERE v=0"
        assert_one(session, query, [0, 0])

        # Upgrade to the 2.1.x version
        node1.drain()
        node1.watch_log_for("DRAINED")
        node1.stop(wait_other_notice=False)
        debug("Upgrading to current version")
        self.set_node_to_current_version(node1)
        node1.start(wait_other_notice=True)

        [node1] = cluster.nodelist()
        session = self.patient_cql_connection(node1)
        debug(cluster.cassandra_version())
        assert_one(session, query, [0, 0])
 def _check_counter_values(self):
     debug("Checking counter values...")
     cursor = self.patient_cql_connection(self.node2).cursor()
     cursor.execute("use upgrade;")
     cursor.execute("SELECT c from countertable;")
     res = cursor.fetchall()[0][0]
     assert res == self.counter_val, "Counter not at expected value."
    def compaction_throughput_test(self):
        """
        Test setting compaction throughput.
        Set throughput, insert data and ensure compaction performance corresponds.
        """
        cluster = self.cluster
        cluster.populate(1).start(wait_for_binary_proto=True)
        [node1] = cluster.nodelist()

        # disableautocompaction only disables compaction for existing tables,
        # so initialize stress tables with stress first
        stress_write(node1, keycount=1)
        node1.nodetool('disableautocompaction')

        stress_write(node1, keycount=200000*cluster.data_dir_count)

        threshold = "5"
        node1.nodetool('setcompactionthroughput -- ' + threshold)

        matches = block_on_compaction_log(node1)
        stringline = matches[0]
        throughput_pattern = re.compile('''.*          # it doesn't matter what the line starts with
                                           =           # wait for an equals sign
                                           ([\s\d\.]*) # capture a decimal number, possibly surrounded by whitespace
                                           MB/s.*      # followed by 'MB/s'
                                        ''', re.X)

        avgthroughput = re.match(throughput_pattern, stringline).group(1).strip()
        debug(avgthroughput)

        self.assertGreaterEqual(float(threshold), float(avgthroughput))
 def _increment_counter_value(self):
     debug("incrementing counter...")
     cursor = self.patient_cql_connection(self.node2).cursor()
     cursor.execute("use upgrade;")
     update_counter_query = ("UPDATE countertable SET c = c + 1 WHERE k='www.datastax.com'")
     cursor.execute( update_counter_query )
     self.counter_val += 1
    def compaction_throughput_test(self):
        """
        Test setting compaction throughput.
        Set throughput, insert data and ensure compaction performance corresponds.
        """
        cluster = self.cluster
        cluster.populate(1).start(wait_for_binary_proto=True)
        [node1] = cluster.nodelist()

        # disableautocompaction only disables compaction for existing tables,
        # so initialize stress tables with stress first
        stress_write(node1, keycount=1)
        node1.nodetool('disableautocompaction')

        stress_write(node1, keycount=200000 * cluster.data_dir_count)

        threshold = "5"
        node1.nodetool('setcompactionthroughput -- ' + threshold)

        matches = block_on_compaction_log(node1)
        stringline = matches[0]
        units = 'MB/s' if cluster.version() < '3.6' else '(K|M|G)iB/s'
        throughput_pattern = re.compile('''.*           # it doesn't matter what the line starts with
                                           =            # wait for an equals sign
                                           ([\s\d\.]*)  # capture a decimal number, possibly surrounded by whitespace
                                           {}.*         # followed by units
                                        '''.format(units), re.X)

        avgthroughput = re.match(throughput_pattern, stringline).group(1).strip()
        debug(avgthroughput)

        # The throughput in the log is computed independantly from the throttling and on the output files while
        # throttling is on the input files, so while that throughput shouldn't be higher than the one set in
        # principle, a bit of wiggle room is expected
        self.assertGreaterEqual(float(threshold) + 0.5, float(avgthroughput))
    def user_defined_compaction_test(self):
        """
        Test a user defined compaction task by generating a few sstables with cassandra stress
        and autocompaction disabled, and then passing a list of sstable data files directly to nodetool compact.
        Check that after compaction there is only one sstable per node directory. Make sure to use sstableutil
        to list only final files because after a compaction some temporary files may still not have been deleted.

        @jira_ticket CASSANDRA-11765
        """
        cluster = self.cluster
        cluster.populate(1).start(wait_for_binary_proto=True)
        [node1] = cluster.nodelist()

        # disableautocompaction only disables compaction for existing tables,
        # so initialize stress tables with stress first
        stress_write(node1, keycount=1)
        node1.nodetool('disableautocompaction')

        stress_write(node1, keycount=500000)
        node1.nodetool('flush keyspace1 standard1')

        sstable_files = ' '.join(get_sstable_data_files(node1, 'keyspace1', 'standard1'))
        debug('Compacting {}'.format(sstable_files))
        node1.nodetool('compact --user-defined {}'.format(sstable_files))

        sstable_files = get_sstable_data_files(node1, 'keyspace1', 'standard1')
        self.assertEquals(len(node1.data_directories()), len(sstable_files),
                          'Expected one sstable data file per node directory but got {}'.format(sstable_files))
    def test_wrong_number_of_columns(self):
        """
        Test that a COPY statement will fail when trying to import from a CSV
        file with the wrong number of columns by:

        - creating a table with a single column,
        - writing a CSV file with two columns,
        - attempting to COPY the CSV file into the table, and
        - asserting that the COPY operation failed.
        """
        self.prepare()
        self.session.execute("""
            CREATE TABLE testcolumns (
                a int PRIMARY KEY,
                b int
            )""")

        data = [[1, 2, 3]]
        self.tempfile = NamedTemporaryFile(delete=False)
        write_rows_to_csv(self.tempfile.name, data)

        debug('Importing from csv file: {name}'.format(name=self.tempfile.name))
        out, err = self.node1.run_cqlsh("COPY ks.testcolumns FROM '{name}'".format(name=self.tempfile.name),
                                        return_output=True)

        self.assertFalse(self.session.execute("SELECT * FROM testcolumns"))
        self.assertIn('Aborting import', err)
    def create_lots_of_mv_concurrently_test(self):
        """
        create materialized views across multiple threads concurrently
        """
        cluster = self.cluster
        cluster.populate(3).start()
        node1, node2, node3 = cluster.nodelist()
        session = self.cql_connection(node1)
        session.execute("create keyspace lots_o_views WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};")
        session.execute("use lots_o_views")
        wait(10)
        session.execute("create table source_data (id uuid primary key, c1 int, c2 int, c3 int, c4 int, c5 int, c6 int, c7 int, c8 int, c9 int, c10 int);")
        insert_stmt = session.prepare("insert into source_data (id, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10) values (uuid(), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);")
        wait(10)
        for n in range(4000):
            session.execute(insert_stmt, [n] * 10)

        wait(10)
        for n in range(1, 11):
            session.execute(("CREATE MATERIALIZED VIEW src_by_c{0} AS SELECT * FROM source_data "
                             "WHERE c{0} IS NOT NULL AND id IS NOT NULL PRIMARY KEY (c{0}, id)".format(n)))
            session.cluster.control_connection.wait_for_schema_agreement()

        debug("waiting for indexes to fill in")
        wait(60)
        result = list(session.execute(("SELECT * FROM system_schema.views "
                                       "WHERE keyspace_name='lots_o_views' AND base_table_name='source_data' ALLOW FILTERING")))
        self.assertEqual(10, len(result), "missing some mv from source_data table")

        for n in range(1, 11):
            result = list(session.execute("select * from src_by_c{0}".format(n)))
            self.assertEqual(4000, len(result))
    def test_6924_dropping_cf(self):
        """Tests CASSANDRA-6924

        Data inserted immediately after dropping and recreating an
        indexed column family is not included in the index.
        """
        # Reproducing requires at least 3 nodes:
        cluster = self.cluster
        cluster.populate(3).start()
        node1, node2, node3 = cluster.nodelist()
        conn = self.patient_cql_connection(node1)
        session = conn
        self.create_ks(session, "ks", 1)

        # This only occurs when dropping and recreating with
        # the same name, so loop through this test a few times:
        for i in range(10):
            debug("round %s" % i)
            try:
                session.execute("DROP COLUMNFAMILY ks.cf")
            except InvalidRequest:
                pass

            session.execute("CREATE TABLE ks.cf (key text PRIMARY KEY, col1 text);")
            session.execute("CREATE INDEX on ks.cf (col1);")

            for r in range(10):
                stmt = "INSERT INTO ks.cf (key, col1) VALUES ('%s','asdf');" % r
                session.execute(stmt)

            self.wait_for_schema_agreement(session)

            rows = session.execute("select count(*) from ks.cf WHERE col1='asdf'")
            count = rows[0][0]
            self.assertEqual(count, 10)
    def test_tuple_data(self):
        """
        Tests the COPY TO command with the tuple datatype by:

        - populating a table with tuples of uuids,
        - exporting the table to a CSV file with COPY TO,
        - comparing the CSV file to the SELECTed contents of the table.
        """
        self.prepare()
        self.session.execute("""
            CREATE TABLE testtuple (
                a int primary key,
                b tuple<uuid, uuid, uuid>
            )""")

        insert_statement = self.session.prepare("INSERT INTO testtuple (a, b) VALUES (?, ?)")
        args = [(i, random_list(gen=uuid4, n=3)) for i in range(1000)]
        execute_concurrent_with_args(self.session, insert_statement, args)

        results = list(self.session.execute("SELECT * FROM testtuple"))

        self.tempfile = NamedTemporaryFile(delete=False)
        debug('Exporting to csv file: {name}'.format(name=self.tempfile.name))
        self.node1.run_cqlsh(cmds="COPY ks.testtuple TO '{name}'".format(name=self.tempfile.name))

        self.assertCsvResultEqual(self.tempfile.name, results)
    def upgrade_to_version(self, version_meta, partial=False, nodes=None):
        """
        Upgrade Nodes - if *partial* is True, only upgrade those nodes
        that are specified by *nodes*, otherwise ignore *nodes* specified
        and upgrade all nodes.
        """
        debug('Upgrading {nodes} to {version}'.format(nodes=[n.name for n in nodes] if nodes is not None else 'all nodes', version=version_meta.version))
        switch_jdks(version_meta.java_version)
        debug("JAVA_HOME: " + os.environ.get('JAVA_HOME'))
        if not partial:
            nodes = self.cluster.nodelist()

        for node in nodes:
            debug('Shutting down node: ' + node.name)
            node.drain()
            node.watch_log_for("DRAINED")
            node.stop(wait_other_notice=False)

        for node in nodes:
            node.set_install_dir(version=version_meta.version)
            debug("Set new cassandra dir for %s: %s" % (node.name, node.get_install_dir()))

        # hacky? yes. We could probably extend ccm to allow this publicly.
        # the topology file needs to be written before any nodes are started
        # otherwise they won't be grouped into dc's properly for multi-dc tests
        self.cluster._Cluster__update_topology_files()

        # Restart nodes on new version
        for node in nodes:
            debug('Starting %s on new version (%s)' % (node.name, version_meta.version))
            # Setup log4j / logback again (necessary moving from 2.0 -> 2.1):
            node.set_log_level("INFO")
            node.start(wait_other_notice=True, wait_for_binary_proto=True)
            node.nodetool('upgradesstables -a')
    def non_default_delimiter_template(self, delimiter):
        """
        @param delimiter the delimiter to use for the CSV file.

        Test exporting to CSV files using delimiters other than ',' by:

        - populating a table with integers,
        - exporting to a CSV file, specifying a delimiter, then
        - comparing the contents of the csv file to the SELECTed contents of the table.
        """

        self.prepare()
        self.session.execute("""
            CREATE TABLE testdelimiter (
                a int primary key
            )""")
        insert_statement = self.session.prepare("INSERT INTO testdelimiter (a) VALUES (?)")
        args = [(i,) for i in range(10000)]
        execute_concurrent_with_args(self.session, insert_statement, args)

        results = list(self.session.execute("SELECT * FROM testdelimiter"))

        self.tempfile = NamedTemporaryFile(delete=False)
        debug('Exporting to csv file: {name}'.format(name=self.tempfile.name))
        cmds = "COPY ks.testdelimiter TO '{name}'".format(name=self.tempfile.name)
        cmds += " WITH DELIMITER = '{d}'".format(d=delimiter)
        self.node1.run_cqlsh(cmds=cmds)

        self.assertCsvResultEqual(self.tempfile.name, results)
    def quorum_available_during_failure_test(self):
        CL = ConsistencyLevel.QUORUM
        RF = 3

        debug("Creating a ring")
        cluster = self.cluster
        if DISABLE_VNODES:
            cluster.populate(3).start()
        else:
            tokens = cluster.balanced_tokens(3)
            cluster.populate(3, tokens=tokens).start()
        node1, node2, node3 = cluster.nodelist()
        cluster.start()

        debug("Set to talk to node 2")
        session = self.patient_cql_connection(node2)
        self.create_ks(session, "ks", RF)
        create_c1c2_table(self, session)

        debug("Generating some data")
        insert_c1c2(session, n=100, consistency=CL)

        debug("Taking down node1")
        node1.stop(wait_other_notice=True)

        debug("Reading back data.")
        for n in xrange(100):
            query_c1c2(session, n, CL)
    def make_schema_changes(self, cursor, namespace="ns1"):
        """
        makes a heap of changes.

        create keyspace
        drop keyspace
        create column family
        drop column family
        update column family
        drop index
        create index (modify column family and add a key)
        rebuild index (via jmx)
        set default_validation_class
        """
        debug("make_schema_changes() " + str(namespace))
        cursor.execute("USE ks_%s" % namespace)
        # drop keyspace
        cursor.execute("DROP KEYSPACE ks2_%s" % namespace)
        wait(2)

        # create keyspace
        query = (
            """CREATE KEYSPACE ks3_%s WITH strategy_class=SimpleStrategy AND
                strategy_options:replication_factor=2"""
            % namespace
        )
        cursor.execute(query)

        wait(2)
        # drop column family
        cursor.execute("DROP COLUMNFAMILY cf2_%s" % namespace)

        # create column family
        query = """
            CREATE TABLE cf3_%s (
                col1 uuid PRIMARY KEY,
                col2 text,
                col3 text,
                col4 text
            );
        """ % (
            namespace
        )
        cursor.execute(query)

        # alter column family
        query = (
            """
            ALTER COLUMNFAMILY cf_%s
            ADD col4 text;
        """
            % namespace
        )
        cursor.execute(query)

        # add index
        cursor.execute("CREATE INDEX index2_%s ON cf_%s(col3)" % (namespace, namespace))

        # remove an index
        cursor.execute("DROP INDEX index_%s" % namespace)
    def changes_while_node_down_test(self):
        """
        makes schema changes while a node is down.
        Make schema changes to node 1 while node 2 is down. 
        Then bring up 2 and make sure it gets the changes. 
        """
        debug("changes_while_node_down_test()")
        cluster = self.cluster
        cluster.populate(2).start()
        [node1, node2] = cluster.nodelist()
        wait(2)
        cursor = self.cql_connection(node1).cursor()

        cursor = self.cql_connection(node2).cursor()
        self.prepare_for_changes(cursor, namespace="ns2")
        node1.stop()
        wait(2)
        self.make_schema_changes(cursor, namespace="ns2")
        wait(2)
        node2.stop()
        wait(2)
        node1.start()
        node2.start()
        wait(20)
        self.validate_schema_consistent(node1)
    def sstable_repairedset_test(self):
        """
        * Launch a two node cluster
        * Insert data with stress
        * Stop node2
        * Run sstablerepairedset against node2
        * Start node2
        * Run sstablemetadata on both nodes, pipe to a file
        * Verify the output of sstablemetadata shows no repairs have occurred
        * Stop node1
        * Insert more data with stress
        * Start node1
        * Issue an incremental repair
        * Run sstablemetadata on both nodes again, pipe to a new file
        * Verify repairs occurred and repairedAt was updated
        """
        cluster = self.cluster
        cluster.set_configuration_options(
            values={'hinted_handoff_enabled': False})
        cluster.populate(2).start()
        node1, node2 = cluster.nodelist()
        node1.stress([
            'write', 'n=10K', 'no-warmup', '-schema', 'replication(factor=2)',
            'compaction(strategy=SizeTieredCompactionStrategy,enabled=false)',
            '-rate', 'threads=50'
        ])

        node1.flush()
        node2.flush()

        node2.stop(gently=False)

        node2.run_sstablerepairedset(keyspace='keyspace1')
        node2.start(wait_for_binary_proto=True)

        initialOut1 = node1.run_sstablemetadata(keyspace='keyspace1').stdout
        initialOut2 = node2.run_sstablemetadata(keyspace='keyspace1').stdout

        matches = findall('(?<=Repaired at:).*',
                          '\n'.join([initialOut1, initialOut2]))
        debug("Repair timestamps are: {}".format(matches))

        uniquematches = set(matches)
        matchcount = Counter(matches)

        self.assertGreaterEqual(len(uniquematches), 2, uniquematches)

        self.assertGreaterEqual(max(matchcount), 1, matchcount)

        self.assertIn('Repaired at: 0', '\n'.join([initialOut1, initialOut2]))

        node1.stop()
        node2.stress([
            'write', 'n=15K', 'no-warmup', '-schema', 'replication(factor=2)'
        ])
        node2.flush()
        node1.start(wait_for_binary_proto=True)

        if cluster.version() >= "2.2":
            node1.repair()
        else:
            node1.nodetool("repair -par -inc")

        if cluster.version() >= '4.0':
            # sstables are compacted out of pending repair by a compaction
            for node in cluster.nodelist():
                node.nodetool('compact keyspace1 standard1')

        finalOut1 = node1.run_sstablemetadata(keyspace='keyspace1').stdout
        finalOut2 = node2.run_sstablemetadata(keyspace='keyspace1').stdout

        matches = findall('(?<=Repaired at:).*',
                          '\n'.join([finalOut1, finalOut2]))

        debug(matches)

        uniquematches = set(matches)
        matchcount = Counter(matches)

        self.assertGreaterEqual(len(uniquematches), 2)

        self.assertGreaterEqual(max(matchcount), 2)

        self.assertNotIn('Repaired at: 0', '\n'.join([finalOut1, finalOut2]))
Example #32
0
    def schema_changes_test(self):
        """
        @jira_ticket CASSANDRA-10328
        Creating, updating and dropping a keyspace, a table and a materialized view
        will generate the correct schema change notifications.
        """

        self.cluster.populate(2).start(wait_for_binary_proto=True)
        node1, node2 = self.cluster.nodelist()

        session = self.patient_cql_connection(node1)
        waiter = NotificationWaiter(self,
                                    node2, ["SCHEMA_CHANGE"],
                                    keyspace='ks')

        self.create_ks(session, 'ks', 3)
        session.execute("create TABLE t (k int PRIMARY KEY , v int)")
        session.execute("alter TABLE t add v1 int;")

        session.execute(
            "create MATERIALIZED VIEW mv as select * from t WHERE v IS NOT NULL AND t IS NOT NULL PRIMARY KEY (v, k)"
        )
        session.execute(
            " alter materialized view mv with min_index_interval = 100")

        session.execute("drop MATERIALIZED VIEW mv")
        session.execute("drop TABLE t")
        session.execute("drop KEYSPACE ks")

        debug("Waiting for notifications from {}".format(waiter.address, ))
        notifications = waiter.wait_for_notifications(timeout=60.0,
                                                      num_notifications=14)
        self.assertEquals(14, len(notifications))
        self.assertDictContainsSubset(
            {
                'change_type': u'CREATED',
                'target_type': u'KEYSPACE'
            }, notifications[0])
        self.assertDictContainsSubset(
            {
                'change_type': u'UPDATED',
                'target_type': u'KEYSPACE'
            }, notifications[1])
        self.assertDictContainsSubset(
            {
                'change_type': u'CREATED',
                'target_type': u'TABLE',
                u'table': u't'
            }, notifications[2])
        self.assertDictContainsSubset(
            {
                'change_type': u'UPDATED',
                'target_type': u'KEYSPACE'
            }, notifications[3])
        self.assertDictContainsSubset(
            {
                'change_type': u'UPDATED',
                'target_type': u'TABLE',
                u'table': u't'
            }, notifications[4])
        self.assertDictContainsSubset(
            {
                'change_type': u'UPDATED',
                'target_type': u'KEYSPACE'
            }, notifications[5])
        self.assertDictContainsSubset(
            {
                'change_type': u'CREATED',
                'target_type': u'TABLE',
                u'table': u'mv'
            }, notifications[6])
        self.assertDictContainsSubset(
            {
                'change_type': u'UPDATED',
                'target_type': u'KEYSPACE'
            }, notifications[7])
        self.assertDictContainsSubset(
            {
                'change_type': u'UPDATED',
                'target_type': u'TABLE',
                u'table': u'mv'
            }, notifications[8])
        self.assertDictContainsSubset(
            {
                'change_type': u'UPDATED',
                'target_type': u'KEYSPACE'
            }, notifications[9])
        self.assertDictContainsSubset(
            {
                'change_type': u'DROPPED',
                'target_type': u'TABLE',
                u'table': u'mv'
            }, notifications[10])
        self.assertDictContainsSubset(
            {
                'change_type': u'UPDATED',
                'target_type': u'KEYSPACE'
            }, notifications[11])
        self.assertDictContainsSubset(
            {
                'change_type': u'DROPPED',
                'target_type': u'TABLE',
                u'table': u't'
            }, notifications[12])
        self.assertDictContainsSubset(
            {
                'change_type': u'DROPPED',
                'target_type': u'KEYSPACE'
            }, notifications[13])
    def multiple_subsequent_repair_test(self):
        """
        @jira_ticket CASSANDRA-8366

        There is an issue with subsequent inc repairs increasing load size.
        So we perform several repairs and check that the expected amount of data exists.
        * Launch a three node cluster
        * Write 5M rows with stress
        * Wait for minor compactions to finish
        * Issue an incremental repair on each node, sequentially
        * Issue major compactions on each node
        * Sleep for a while so load size can be propagated between nodes
        * Verify the correct amount of data is on each node
        """
        cluster = self.cluster
        cluster.populate(3).start()
        node1, node2, node3 = cluster.nodelist()

        debug("Inserting data with stress")
        node1.stress([
            'write', 'n=5M', 'no-warmup', '-rate', 'threads=10', '-schema',
            'replication(factor=3)'
        ])

        debug("Flushing nodes")
        cluster.flush()

        debug("Waiting compactions to finish")
        cluster.wait_for_compactions()

        if self.cluster.version() >= '2.2':
            debug("Repairing node1")
            node1.nodetool("repair")
            debug("Repairing node2")
            node2.nodetool("repair")
            debug("Repairing node3")
            node3.nodetool("repair")
        else:
            debug("Repairing node1")
            node1.nodetool("repair -par -inc")
            debug("Repairing node2")
            node2.nodetool("repair -par -inc")
            debug("Repairing node3")
            node3.nodetool("repair -par -inc")

        # Using "print" instead of debug() here is on purpose.  The compactions
        # take a long time and don't print anything by default, which can result
        # in the test being timed out after 20 minutes.  These print statements
        # prevent it from being timed out.
        print "compacting node1"
        node1.compact()
        print "compacting node2"
        node2.compact()
        print "compacting node3"
        node3.compact()

        # wait some time to be sure the load size is propagated between nodes
        debug("Waiting for load size info to be propagated between nodes")
        time.sleep(45)

        load_size_in_kb = float(
            sum(map(lambda n: n.data_size(), [node1, node2, node3])))
        load_size = load_size_in_kb / 1024 / 1024
        debug("Total Load size: {}GB".format(load_size))

        # There is still some overhead, but it's lot better. We tolerate 25%.
        expected_load_size = 4.5  # In GB
        assert_almost_equal(load_size, expected_load_size, error=0.25)
Example #34
0
    def test_archive_and_restore_commitlog_repeatedly(self):
        """
        @jira_ticket CASSANDRA-10593
        Run archive commit log restoration test repeatedly to make sure it is idempotent
        and doesn't fail if done repeatedly
        """

        cluster = self.cluster
        cluster.populate(1)
        node1 = cluster.nodelist()[0]

        # Create a temp directory for storing commitlog archives:
        tmp_commitlog = safe_mkdtemp()
        debug("tmp_commitlog: {}".format(tmp_commitlog))

        # Edit commitlog_archiving.properties and set an archive
        # command:
        replace_in_file(
            os.path.join(node1.get_path(), 'conf',
                         'commitlog_archiving.properties'),
            [(r'^archive_command=.*$',
              'archive_command=ln %path {tmp_commitlog}/%name'.format(
                  tmp_commitlog=tmp_commitlog)),
             (r'^restore_command=.*$', 'restore_command=cp -f %from %to'),
             (r'^restore_directories=.*$',
              'restore_directories={tmp_commitlog}'.format(
                  tmp_commitlog=tmp_commitlog))])

        cluster.start(wait_for_binary_proto=True)

        debug("Creating initial connection")
        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 1)
        session.execute(
            'CREATE TABLE ks.cf ( key bigint PRIMARY KEY, val text);')
        debug("Writing 30,000 rows...")
        self.insert_rows(session, 0, 60000)

        try:
            # Check that there are at least one commit log backed up that
            # is not one of the active commit logs:
            commitlog_dir = os.path.join(node1.get_path(), 'commitlogs')
            debug("node1 commitlog dir: " + commitlog_dir)

            cluster.flush()

            self.assertGreater(
                len(
                    set(os.listdir(tmp_commitlog)) -
                    set(os.listdir(commitlog_dir))), 0)

            debug("Flushing and doing first restart")
            cluster.compact()
            node1.drain()
            # restart the node which causes the active commitlogs to be archived
            node1.stop()
            node1.start(wait_for_binary_proto=True)

            debug("Stopping and second restart")
            node1.stop()
            node1.start(wait_for_binary_proto=True)

            # Shouldn't be any additional data since it's replaying the same stuff repeatedly
            session = self.patient_cql_connection(node1)

            rows = session.execute('SELECT count(*) from ks.cf')
            self.assertEqual(rows[0][0], 60000)
        finally:
            debug("removing tmp_commitlog: " + tmp_commitlog)
            shutil.rmtree(tmp_commitlog)
    def multiple_repair_test(self):
        """
        * Launch a three node cluster
        * Create a keyspace with RF 3 and a table
        * Insert 49 rows
        * Stop node3
        * Insert 50 more rows
        * Restart node3
        * Issue an incremental repair on node3
        * Stop node2
        * Insert a final50 rows
        * Restart node2
        * Issue an incremental repair on node2
        * Replace node3 with a new node
        * Verify data integrity
        # TODO: Several more verifications of data need to be interspersed throughout the test. The final assertion is insufficient.
        @jira_ticket CASSANDRA-10644
        """
        cluster = self.cluster
        cluster.populate(3).start()
        node1, node2, node3 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 3)
        create_cf(session,
                  'cf',
                  read_repair=0.0,
                  columns={
                      'c1': 'text',
                      'c2': 'text'
                  })

        debug("insert data")

        insert_c1c2(session,
                    keys=range(1, 50),
                    consistency=ConsistencyLevel.ALL)
        node1.flush()

        debug("bringing down node 3")
        node3.flush()
        node3.stop(gently=False)

        debug("inserting additional data into node 1 and 2")
        insert_c1c2(session,
                    keys=range(50, 100),
                    consistency=ConsistencyLevel.TWO)
        node1.flush()
        node2.flush()

        debug("restarting and repairing node 3")
        node3.start(wait_for_binary_proto=True)

        if cluster.version() >= "2.2":
            node3.repair()
        else:
            node3.nodetool("repair -par -inc")

        # wait stream handlers to be closed on windows
        # after session is finished (See CASSANDRA-10644)
        if is_win:
            time.sleep(2)

        debug("stopping node 2")
        node2.stop(gently=False)

        debug("inserting data in nodes 1 and 3")
        insert_c1c2(session,
                    keys=range(100, 150),
                    consistency=ConsistencyLevel.TWO)
        node1.flush()
        node3.flush()

        debug("start and repair node 2")
        node2.start(wait_for_binary_proto=True)

        if cluster.version() >= "2.2":
            node2.repair()
        else:
            node2.nodetool("repair -par -inc")

        debug("replace node and check data integrity")
        node3.stop(gently=False)
        node5 = Node('node5', cluster, True, ('127.0.0.5', 9160),
                     ('127.0.0.5', 7000), '7500', '0', None,
                     ('127.0.0.5', 9042))
        cluster.add(node5, False)
        node5.start(replace_address='127.0.0.3', wait_other_notice=True)

        assert_one(session, "SELECT COUNT(*) FROM ks.cf LIMIT 200", [149])
    def test_friendly_unrecognized_table_handling(self):
        """
        After upgrading one of two nodes, create a new table (which will
        not be propagated to the old node) and check that queries against
        that table result in user-friendly warning logs.
        """
        cluster = self.cluster
        cluster.populate(2)
        cluster.start()

        node1, node2 = cluster.nodelist()
        original_version = node1.get_cassandra_version()
        if original_version.vstring.startswith('2.0'):
            upgraded_version = 'git:cassandra-2.1'
        elif original_version.vstring.startswith('2.1'):
            upgraded_version = 'git:cassandra-2.2'
        else:
            self.skip(
                "This test is only designed to work with 2.0 and 2.1 right now"
            )

        # start out with a major behind the previous version

        # upgrade node1
        node1.stop()
        node1.set_install_dir(version=upgraded_version)
        debug("Set new cassandra dir for %s: %s" %
              (node1.name, node1.get_install_dir()))

        node1.set_log_level("INFO")
        node1.start()

        session = self.patient_exclusive_cql_connection(node1)
        session.cluster.max_schema_agreement_wait = -1  # don't wait for schema agreement

        debug("Creating keyspace and table")
        session.execute(
            "CREATE KEYSPACE test_upgrades WITH replication={'class': 'SimpleStrategy', 'replication_factor': '2'}"
        )
        session.execute(
            "CREATE TABLE test_upgrades.foo (a int primary key, b int)")

        pattern = r".*Got .* command for nonexistent table test_upgrades.foo.*"

        try:
            session.execute(
                SimpleStatement("SELECT * FROM test_upgrades.foo",
                                consistency_level=ConsistencyLevel.ALL))
            self.fail("expected failure")
        except (ReadTimeout, OperationTimedOut):
            debug("Checking node2 for warning in log")
            node2.watch_log_for(pattern, timeout=10)

        # non-paged range slice
        try:
            session.execute(
                SimpleStatement("SELECT * FROM test_upgrades.foo",
                                consistency_level=ConsistencyLevel.ALL,
                                fetch_size=None))
            self.fail("expected failure")
        except (ReadTimeout, OperationTimedOut):
            debug("Checking node2 for warning in log")
            pattern = r".*Got .* command for nonexistent table test_upgrades.foo.*"
            node2.watch_log_for(pattern, timeout=10)

        # single-partition slice
        try:
            for i in range(20):
                session.execute(
                    SimpleStatement(
                        "SELECT * FROM test_upgrades.foo WHERE a = %d" % (i, ),
                        consistency_level=ConsistencyLevel.ALL,
                        fetch_size=None))
            self.fail("expected failure")
        except (ReadTimeout, OperationTimedOut):
            debug("Checking node2 for warning in log")
            pattern = r".*Got .* command for nonexistent table test_upgrades.foo.*"
            node2.watch_log_for(pattern, timeout=10)
Example #37
0
    def simple_bootstrap_test(self):
        cluster = self.cluster
        tokens = cluster.balanced_tokens(2)
        cluster.set_configuration_options(values={'num_tokens': 1})

        debug("[node1, node2] tokens: %r" % (tokens,))

        keys = 10000

        # Create a single node cluster
        cluster.populate(1)
        node1 = cluster.nodelist()[0]
        node1.set_configuration_options(values={'initial_token': tokens[0]})
        cluster.start(wait_other_notice=True)

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 1)
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        # record the size before inserting any of our own data
        empty_size = node1.data_size()
        debug("node1 empty size : %s" % float(empty_size))

        insert_statement = session.prepare("INSERT INTO ks.cf (key, c1, c2) VALUES (?, 'value1', 'value2')")
        execute_concurrent_with_args(session, insert_statement, [['k%d' % k] for k in range(keys)])

        node1.flush()
        node1.compact()
        initial_size = node1.data_size()
        debug("node1 size before bootstrapping node2: %s" % float(initial_size))

        # Reads inserted data all during the bootstrap process. We shouldn't
        # get any error
        reader = self.go(lambda _: query_c1c2(session, random.randint(0, keys - 1), ConsistencyLevel.ONE))

        # Bootstraping a new node
        node2 = new_node(cluster)
        node2.set_configuration_options(values={'initial_token': tokens[1]})
        node2.start(wait_for_binary_proto=True)
        node2.compact()

        reader.check()
        node1.cleanup()
        debug("node1 size after cleanup: %s" % float(node1.data_size()))
        node1.compact()
        debug("node1 size after compacting: %s" % float(node1.data_size()))
        time.sleep(.5)
        reader.check()

        debug("node2 size after compacting: %s" % float(node2.data_size()))

        size1 = float(node1.data_size())
        size2 = float(node2.data_size())
        assert_almost_equal(size1, size2, error=0.3)
        assert_almost_equal(float(initial_size - empty_size), 2 * (size1 - float(empty_size)))
Example #38
0
    def test_compactionstats(self):
        """
        @jira_ticket CASSANDRA-10504
        @jira_ticket CASSANDRA-10427

        Test that jmx MBean used by nodetool compactionstats
        properly updates the progress of a compaction
        """

        cluster = self.cluster
        cluster.populate(1)
        node = cluster.nodelist()[0]
        remove_perf_disable_shared_mem(node)
        cluster.start(wait_for_binary_proto=True)

        # Run a quick stress command to create the keyspace and table
        node.stress(['write', 'n=1', 'no-warmup'])
        # Disable compaction on the table
        node.nodetool('disableautocompaction keyspace1 standard1')
        node.nodetool('setcompactionthroughput 1')
        node.stress(['write', 'n=150K', 'no-warmup'])
        node.flush()
        # Run a major compaction. This will be the compaction whose
        # progress we track.
        node.nodetool_process('compact')
        # We need to sleep here to give compaction time to start
        # Why not do something smarter? Because if the bug regresses,
        # we can't rely on jmx to tell us that compaction started.
        time.sleep(5)

        compaction_manager = make_mbean('db', type='CompactionManager')
        with JolokiaAgent(node) as jmx:
            progress_string = jmx.read_attribute(compaction_manager,
                                                 'CompactionSummary')[0]

            # Pause in between reads
            # to allow compaction to move forward
            time.sleep(2)

            updated_progress_string = jmx.read_attribute(
                compaction_manager, 'CompactionSummary')[0]
            var = 'Compaction@{uuid}(keyspace1, standard1, {progress}/{total})bytes'
            progress = int(
                parse.search(var, progress_string).named['progress'])
            updated_progress = int(
                parse.search(var, updated_progress_string).named['progress'])

            debug(progress_string)
            debug(updated_progress_string)

            # We want to make sure that the progress is increasing,
            # and that values other than zero are displayed.
            self.assertGreater(updated_progress, progress)
            self.assertGreaterEqual(progress, 0)
            self.assertGreater(updated_progress, 0)

            # Block until the major compaction is complete
            # Otherwise nodetool will throw an exception
            # Give a timeout, in case compaction is broken
            # and never ends.
            start = time.time()
            max_query_timeout = 600
            debug("Waiting for compaction to finish:")
            while (len(
                    jmx.read_attribute(compaction_manager,
                                       'CompactionSummary')) >
                   0) and (time.time() - start < max_query_timeout):
                debug(
                    jmx.read_attribute(compaction_manager,
                                       'CompactionSummary'))
                time.sleep(2)
Example #39
0
    def run_archive_commitlog(self,
                              restore_point_in_time=False,
                              restore_archived_commitlog=True,
                              archive_active_commitlogs=False,
                              archive_command='cp'):
        """Run archive commit log restoration test"""

        cluster = self.cluster
        cluster.populate(1)
        (node1, ) = cluster.nodelist()

        # Create a temp directory for storing commitlog archives:
        tmp_commitlog = safe_mkdtemp()
        debug("tmp_commitlog: " + tmp_commitlog)

        # Edit commitlog_archiving.properties and set an archive
        # command:
        replace_in_file(
            os.path.join(node1.get_path(), 'conf',
                         'commitlog_archiving.properties'),
            [(r'^archive_command=.*$',
              'archive_command={archive_command} %path {tmp_commitlog}/%name'.
              format(tmp_commitlog=tmp_commitlog,
                     archive_command=archive_command))])

        cluster.start()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 1)
        session.execute(
            'CREATE TABLE ks.cf ( key bigint PRIMARY KEY, val text);')
        debug("Writing first 30,000 rows...")
        self.insert_rows(session, 0, 30000)
        # Record when this first set of inserts finished:
        insert_cutoff_times = [time.gmtime()]

        # Delete all commitlog backups so far:
        for f in glob.glob(tmp_commitlog + "/*"):
            os.remove(f)

        snapshot_dirs = self.make_snapshot(node1, 'ks', 'cf', 'basic')

        if self.cluster.version() >= '3.0':
            system_ks_snapshot_dirs = self.make_snapshot(
                node1, 'system_schema', 'keyspaces', 'keyspaces')
        else:
            system_ks_snapshot_dirs = self.make_snapshot(
                node1, 'system', 'schema_keyspaces', 'keyspaces')

        if self.cluster.version() >= '3.0':
            system_col_snapshot_dirs = self.make_snapshot(
                node1, 'system_schema', 'columns', 'columns')
        else:
            system_col_snapshot_dirs = self.make_snapshot(
                node1, 'system', 'schema_columns', 'columns')

        if self.cluster.version() >= '3.0':
            system_ut_snapshot_dirs = self.make_snapshot(
                node1, 'system_schema', 'types', 'usertypes')
        else:
            system_ut_snapshot_dirs = self.make_snapshot(
                node1, 'system', 'schema_usertypes', 'usertypes')

        if self.cluster.version() >= '3.0':
            system_cfs_snapshot_dirs = self.make_snapshot(
                node1, 'system_schema', 'tables', 'cfs')
        else:
            system_cfs_snapshot_dirs = self.make_snapshot(
                node1, 'system', 'schema_columnfamilies', 'cfs')

        try:
            # Write more data:
            debug("Writing second 30,000 rows...")
            self.insert_rows(session, 30000, 60000)
            node1.flush()
            time.sleep(10)
            # Record when this second set of inserts finished:
            insert_cutoff_times.append(time.gmtime())

            debug("Writing final 5,000 rows...")
            self.insert_rows(session, 60000, 65000)
            # Record when the third set of inserts finished:
            insert_cutoff_times.append(time.gmtime())

            rows = session.execute('SELECT count(*) from ks.cf')
            # Make sure we have the same amount of rows as when we snapshotted:
            self.assertEqual(rows[0][0], 65000)

            # Check that there are at least one commit log backed up that
            # is not one of the active commit logs:
            commitlog_dir = os.path.join(node1.get_path(), 'commitlogs')
            debug("node1 commitlog dir: " + commitlog_dir)

            self.assertTrue(
                len(
                    set(os.listdir(tmp_commitlog)) -
                    set(os.listdir(commitlog_dir))) > 0)

            cluster.flush()
            cluster.compact()
            node1.drain()
            if archive_active_commitlogs:
                # restart the node which causes the active commitlogs to be archived
                node1.stop()
                node1.start(wait_for_binary_proto=True)

            # Destroy the cluster
            cluster.stop()
            self.copy_logs(name=self.id().split(".")[0] + "_pre-restore")
            self._cleanup_cluster()
            cluster = self.cluster = self._get_cluster()
            cluster.populate(1)
            node1, = cluster.nodelist()

            # Restore schema from snapshots:
            for system_ks_snapshot_dir in system_ks_snapshot_dirs:
                if self.cluster.version() >= '3.0':
                    self.restore_snapshot(system_ks_snapshot_dir, node1,
                                          'system_schema', 'keyspaces',
                                          'keyspaces')
                else:
                    self.restore_snapshot(system_ks_snapshot_dir, node1,
                                          'system', 'schema_keyspaces',
                                          'keyspaces')
            for system_col_snapshot_dir in system_col_snapshot_dirs:
                if self.cluster.version() >= '3.0':
                    self.restore_snapshot(system_col_snapshot_dir, node1,
                                          'system_schema', 'columns',
                                          'columns')
                else:
                    self.restore_snapshot(system_col_snapshot_dir, node1,
                                          'system', 'schema_columns',
                                          'columns')
            for system_ut_snapshot_dir in system_ut_snapshot_dirs:
                if self.cluster.version() >= '3.0':
                    self.restore_snapshot(system_ut_snapshot_dir, node1,
                                          'system_schema', 'types',
                                          'usertypes')
                else:
                    self.restore_snapshot(system_ut_snapshot_dir, node1,
                                          'system', 'schema_usertypes',
                                          'usertypes')

            for system_cfs_snapshot_dir in system_cfs_snapshot_dirs:
                if self.cluster.version() >= '3.0':
                    self.restore_snapshot(system_cfs_snapshot_dir, node1,
                                          'system_schema', 'tables', 'cfs')
                else:
                    self.restore_snapshot(system_cfs_snapshot_dir, node1,
                                          'system', 'schema_columnfamilies',
                                          'cfs')
            for snapshot_dir in snapshot_dirs:
                self.restore_snapshot(snapshot_dir, node1, 'ks', 'cf', 'basic')

            cluster.start(wait_for_binary_proto=True)

            session = self.patient_cql_connection(node1)
            node1.nodetool('refresh ks cf')

            rows = session.execute('SELECT count(*) from ks.cf')
            # Make sure we have the same amount of rows as when we snapshotted:
            self.assertEqual(rows[0][0], 30000)

            # Edit commitlog_archiving.properties. Remove the archive
            # command  and set a restore command and restore_directories:
            if restore_archived_commitlog:
                replace_in_file(
                    os.path.join(node1.get_path(), 'conf',
                                 'commitlog_archiving.properties'),
                    [(r'^archive_command=.*$', 'archive_command='),
                     (r'^restore_command=.*$',
                      'restore_command=cp -f %from %to'),
                     (r'^restore_directories=.*$',
                      'restore_directories={tmp_commitlog}'.format(
                          tmp_commitlog=tmp_commitlog))])

                if restore_point_in_time:
                    restore_time = time.strftime("%Y:%m:%d %H:%M:%S",
                                                 insert_cutoff_times[1])
                    replace_in_file(
                        os.path.join(node1.get_path(), 'conf',
                                     'commitlog_archiving.properties'),
                        [(r'^restore_point_in_time=.*$',
                          'restore_point_in_time={restore_time}'.format(
                              **locals()))])

            debug("Restarting node1..")
            node1.stop()
            node1.start(wait_for_binary_proto=True)

            node1.nodetool('flush')
            node1.nodetool('compact')

            session = self.patient_cql_connection(node1)
            rows = session.execute('SELECT count(*) from ks.cf')
            # Now we should have 30000 rows from the snapshot + 30000 rows
            # from the commitlog backups:
            if not restore_archived_commitlog:
                self.assertEqual(rows[0][0], 30000)
            elif restore_point_in_time:
                self.assertEqual(rows[0][0], 60000)
            else:
                self.assertEqual(rows[0][0], 65000)

        finally:
            # clean up
            debug("removing snapshot_dir: " + ",".join(snapshot_dirs))
            for snapshot_dir in snapshot_dirs:
                shutil.rmtree(snapshot_dir)
            debug("removing snapshot_dir: " +
                  ",".join(system_ks_snapshot_dirs))
            for system_ks_snapshot_dir in system_ks_snapshot_dirs:
                shutil.rmtree(system_ks_snapshot_dir)
            debug("removing snapshot_dir: " +
                  ",".join(system_cfs_snapshot_dirs))
            for system_cfs_snapshot_dir in system_cfs_snapshot_dirs:
                shutil.rmtree(system_cfs_snapshot_dir)
            debug("removing snapshot_dir: " +
                  ",".join(system_ut_snapshot_dirs))
            for system_ut_snapshot_dir in system_ut_snapshot_dirs:
                shutil.rmtree(system_ut_snapshot_dir)
            debug("removing snapshot_dir: " +
                  ",".join(system_col_snapshot_dirs))
            for system_col_snapshot_dir in system_col_snapshot_dirs:
                shutil.rmtree(system_col_snapshot_dir)

            debug("removing tmp_commitlog: " + tmp_commitlog)
            shutil.rmtree(tmp_commitlog)