def test_init_with_positional_args(self):
     """
     Rate-limited functions can be initialized with the function and limit in that order.
     """
     self._assert_initialized_correctly_with_mocks(
         funcutils.get_rate_limited_function(self.mock_func, self.mock_limit)
     )
 def test_init_with_keyword_args(self):
     """
     Rate-limited functions can be initialized with limit and func keyword arguments.
     """
     self._assert_initialized_correctly_with_mocks(
         funcutils.get_rate_limited_function(limit=self.mock_limit, func=self.mock_func)
     )
Esempio n. 3
0
 def test_init_with_keyword_args(self):
     """
     Rate-limited functions can be initialized with limit and func keyword arguments.
     """
     self._assert_initialized_correctly_with_mocks(
         funcutils.get_rate_limited_function(limit=self.mock_limit,
                                             func=self.mock_func))
Esempio n. 4
0
 def test_init_with_positional_args(self):
     """
     Rate-limited functions can be initialized with the function and limit in that order.
     """
     self._assert_initialized_correctly_with_mocks(
         funcutils.get_rate_limited_function(self.mock_func,
                                             self.mock_limit))
Esempio n. 5
0
def advance_to_next_cl_segment(session, commitlog_dir,
                               keyspace_name='ks', table_name='junk_table',
                               timeout=60, debug=True):
    """
    This is a hack to work around problems like CASSANDRA-11811.

    The problem happens in commitlog-replaying tests, like the snapshot and CDC
    tests. If we replay the first commitlog that's created, we wind up
    replaying some mutations that initialize system tables, so this function
    advances the node to the next CL by filling up the first one.
    """
    if debug:
        _debug = dtest.debug
    else:
        def _debug(*args, **kwargs):
            """
            noop debug method
            """
            pass

    session.execute(
        'CREATE TABLE {ks}.{tab} ('
        'a uuid PRIMARY KEY, b uuid, c uuid, d uuid, '
        'e uuid, f uuid, g uuid, h uuid'
        ')'.format(ks=keyspace_name, tab=table_name)
    )
    prepared_insert = session.prepare(
        'INSERT INTO {ks}.{tab} '
        '(a, b, c, d, e, f, g, h) '
        'VALUES ('
        'uuid(), uuid(), uuid(), uuid(), '
        'uuid(), uuid(), uuid(), uuid()'
        ')'.format(ks=keyspace_name, tab=table_name)
    )

    # record segments that we want to advance past
    initial_cl_files = _files_in(commitlog_dir)

    start = time.time()
    stop_time = start + timeout
    rate_limited_debug = get_rate_limited_function(_debug, 5)
    _debug('attempting to write until we start writing to new CL segments: {}'.format(initial_cl_files))

    while _files_in(commitlog_dir) <= initial_cl_files:
        elapsed = time.time() - start
        rate_limited_debug('  commitlog-advancing load step has lasted {s:.2f}s'.format(s=elapsed))
        assert_less_equal(
            time.time(), stop_time,
            "It's been over a {s}s and we haven't written a new "
            "commitlog segment. Something is wrong.".format(s=timeout)
        )
        execute_concurrent(
            session,
            ((prepared_insert, ()) for _ in range(1000)),
            concurrency=500,
            raise_on_first_error=True,
        )

    _debug('present commitlog segments: {}'.format(_files_in(commitlog_dir)))
Esempio n. 6
0
def block_until_index_is_built(node, session, keyspace, table_name, idx_name):
    """
    Waits up to 30 seconds for a secondary index to be built, and raises
    DtestTimeoutError if it is not.
    """
    start = time.time()
    rate_limited_debug = get_rate_limited_function(debug, 5)
    while time.time() < start + 30:
        rate_limited_debug("waiting for index to build")
        time.sleep(1)
        if index_is_built(node, session, keyspace, table_name, idx_name):
            break
    else:
        raise DtestTimeoutError()
Esempio n. 7
0
def block_until_index_is_built(node, session, keyspace, table_name, idx_name):
    """
    Waits up to 30 seconds for a secondary index to be built, and raises
    DtestTimeoutError if it is not.
    """
    start = time.time()
    rate_limited_debug_logger = get_rate_limited_function(logger.debug, 5)
    while time.time() < start + 30:
        rate_limited_debug_logger("waiting for index to build")
        time.sleep(1)
        if index_is_built(node, session, keyspace, table_name, idx_name):
            break
    else:
        raise DtestTimeoutError()
Esempio n. 8
0
def _write_to_cdc_WriteFailure(session, insert_stmt):
    prepared = session.prepare(insert_stmt)
    start, rows_loaded, error_found = time.time(), 0, False
    rate_limited_debug = get_rate_limited_function(debug, 5)
    while not error_found:
        # We want to fail if inserting data takes too long. Locally this
        # takes about 10s, but let's be generous.
        assert_less_equal(
            (time.time() - start), 600,
            "It's taken more than 10 minutes to reach a WriteFailure trying "
            'to overrun the space designated for CDC commitlogs. This could '
            "be because data isn't being written quickly enough in this "
            'environment, or because C* is failing to reject writes when '
            'it should.')

        # If we haven't logged from here in the last 5s, do so.
        rate_limited_debug('  data load step has lasted {s:.2f}s, '
                           'loaded {r} rows'.format(s=(time.time() - start),
                                                    r=rows_loaded))

        batch_results = list(
            execute_concurrent(
                session,
                ((prepared, ()) for _ in range(1000)),
                concurrency=500,
                # Don't propagate errors to the main thread. We expect at least
                # one WriteFailure, so we handle it below as part of the
                # results recieved from this method.
                raise_on_first_error=False))

        # Here, we track the number of inserted values by getting the
        # number of successfully completed statements...
        rows_loaded += len([br for br in batch_results if br[0]])
        # then, we make sure that the only failures are the expected
        # WriteFailures.
        assert_equal([], [
            result for (success, result) in batch_results
            if not success and not isinstance(result, WriteFailure)
        ])
        # Finally, if we find a WriteFailure, that means we've inserted all
        # the CDC data we can and so we flip error_found to exit the loop.
        if any(
                isinstance(result, WriteFailure)
                for (_, result) in batch_results):
            debug("write failed (presumably because we've overrun "
                  'designated CDC commitlog space) after '
                  'loading {r} rows in {s:.2f}s'.format(r=rows_loaded,
                                                        s=time.time() - start))
            error_found = True
    return rows_loaded
Esempio n. 9
0
def _write_to_cdc_WriteFailure(session, insert_stmt):
    prepared = session.prepare(insert_stmt)
    start, rows_loaded, error_found = time.time(), 0, False
    rate_limited_debug = get_rate_limited_function(debug, 5)
    while not error_found:
        # We want to fail if inserting data takes too long. Locally this
        # takes about 10s, but let's be generous.
        assert_less_equal(
            (time.time() - start), 600,
            "It's taken more than 10 minutes to reach a WriteFailure trying "
            'to overrun the space designated for CDC commitlogs. This could '
            "be because data isn't being written quickly enough in this "
            'environment, or because C* is failing to reject writes when '
            'it should.'
        )

        # If we haven't logged from here in the last 5s, do so.
        rate_limited_debug(
            '  data load step has lasted {s:.2f}s, '
            'loaded {r} rows'.format(s=(time.time() - start), r=rows_loaded))

        batch_results = list(execute_concurrent(
            session,
            ((prepared, ()) for _ in range(1000)),
            concurrency=500,
            # Don't propagate errors to the main thread. We expect at least
            # one WriteFailure, so we handle it below as part of the
            # results recieved from this method.
            raise_on_first_error=False
        ))

        # Here, we track the number of inserted values by getting the
        # number of successfully completed statements...
        rows_loaded += len([br for br in batch_results if br[0]])
        # then, we make sure that the only failures are the expected
        # WriteFailures.
        assert_equal([],
                     [result for (success, result) in batch_results
                      if not success and not isinstance(result, WriteFailure)])
        # Finally, if we find a WriteFailure, that means we've inserted all
        # the CDC data we can and so we flip error_found to exit the loop.
        if any(isinstance(result, WriteFailure) for (_, result) in batch_results):
            debug("write failed (presumably because we've overrun "
                  'designated CDC commitlog space) after '
                  'loading {r} rows in {s:.2f}s'.format(
                      r=rows_loaded,
                      s=time.time() - start))
            error_found = True
    return rows_loaded
Esempio n. 10
0
    def test_insertion_and_commitlog_behavior_after_reaching_cdc_total_space(
            self):
        """
        Test that C* behaves correctly when CDC tables have consumed all the
        space available to them. In particular: after writing
        cdc_total_space_in_mb MB into CDC commitlogs:

        - CDC writes are rejected
        - non-CDC writes are accepted
        - on flush, CDC commitlogs are copied to cdc_raw
        - on flush, non-CDC commitlogs are not copied to cdc_raw

        This is a lot of behavior to validate in one test, but we do so to
        avoid running multiple tests that each write 1MB of data to fill
        cdc_total_space_in_mb.
        """
        ks_name = 'ks'
        full_cdc_table_info = TableInfo(ks_name=ks_name,
                                        table_name='full_cdc_tab',
                                        column_spec=_16_uuid_column_spec,
                                        insert_stmt=_get_16_uuid_insert_stmt(
                                            ks_name, 'full_cdc_tab'),
                                        options={'cdc': 'true'})

        configuration_overrides = {
            # Make CDC space as small as possible so we can fill it quickly.
            'cdc_total_space_in_mb': 4,
        }
        node, session = self.prepare(
            ks_name=ks_name, configuration_overrides=configuration_overrides)
        session.execute(full_cdc_table_info.create_stmt)

        # Later, we'll also make assertions about the behavior of non-CDC
        # tables, so we create one here.
        non_cdc_table_info = TableInfo(ks_name=ks_name,
                                       table_name='non_cdc_tab',
                                       column_spec=_16_uuid_column_spec,
                                       insert_stmt=_get_16_uuid_insert_stmt(
                                           ks_name, 'non_cdc_tab'))
        session.execute(non_cdc_table_info.create_stmt)
        # We'll also make assertions about the behavior of CDC tables when
        # other CDC tables have already filled the designated space for CDC
        # commitlogs, so we create the second CDC table here.
        empty_cdc_table_info = TableInfo(ks_name=ks_name,
                                         table_name='empty_cdc_tab',
                                         column_spec=_16_uuid_column_spec,
                                         insert_stmt=_get_16_uuid_insert_stmt(
                                             ks_name, 'empty_cdc_tab'),
                                         options={'cdc': 'true'})
        session.execute(empty_cdc_table_info.create_stmt)

        # Here, we insert values into the first CDC table until we get a
        # WriteFailure. This should happen when the CDC commitlogs take up 1MB
        # or more.
        debug('flushing non-CDC commitlogs')
        node.flush()
        # Then, we insert rows into the CDC table until we can't anymore.
        debug('beginning data insert to fill CDC commitlogs')
        rows_loaded = _write_to_cdc_WriteFailure(
            session, full_cdc_table_info.insert_stmt)

        self.assertLess(
            0, rows_loaded, 'No CDC rows inserted. This may happen when '
            'cdc_total_space_in_mb > commitlog_segment_size_in_mb')

        commitlog_dir = os.path.join(node.get_path(), 'commitlogs')
        commitlogs_size = size_of_files_in_dir(commitlog_dir)
        debug('Commitlog dir ({d}) is {b}B'.format(d=commitlog_dir,
                                                   b=commitlogs_size))

        # We should get a WriteFailure when trying to write to the CDC table
        # that's filled the designated CDC space...
        with self.assertRaises(WriteFailure):
            session.execute(full_cdc_table_info.insert_stmt)
        # or any CDC table.
        with self.assertRaises(WriteFailure):
            session.execute(empty_cdc_table_info.insert_stmt)

        # Now we test for behaviors of non-CDC tables when we've exceeded
        # cdc_total_space_in_mb.
        #
        # First, we drain and save the names of all the new discarded CDC
        # segments
        node.drain()
        session.cluster.shutdown()
        node.stop()
        node.start(wait_for_binary_proto=True)
        session = self.patient_cql_connection(node)
        pre_non_cdc_write_cdc_raw_segments = _get_cdc_raw_files(
            node.get_path())
        # save the names of all the commitlog segments written up to this
        # point:
        pre_non_cdc_write_segments = _get_commitlog_files(node.get_path())

        # Check that writing to non-CDC tables succeeds even when writes to CDC
        # tables are rejected:
        non_cdc_prepared_insert = session.prepare(
            non_cdc_table_info.insert_stmt)
        session.execute(non_cdc_prepared_insert,
                        ())  # should not raise an exception

        # Check the following property: any new commitlog segments written to
        # after cdc_raw has reached its maximum configured size should not be
        # moved to cdc_raw, on commitlog discard, because any such commitlog
        # segments are written to non-CDC tables.
        #
        # First, write to non-cdc tables.
        start, time_limit = time.time(), 600
        rate_limited_debug = get_rate_limited_function(debug, 5)
        debug('writing to non-cdc table')
        # We write until we get a new commitlog segment.
        while _get_commitlog_files(
                node.get_path()) <= pre_non_cdc_write_segments:
            elapsed = time.time() - start
            rate_limited_debug(
                '  non-cdc load step has lasted {s:.2f}s'.format(s=elapsed))
            self.assertLessEqual(
                elapsed, time_limit,
                "It's been over a {s}s and we haven't written a new "
                "commitlog segment. Something is wrong.".format(s=time_limit))
            execute_concurrent(
                session,
                ((non_cdc_prepared_insert, ()) for _ in range(1000)),
                concurrency=500,
                raise_on_first_error=True,
            )

        # Finally, we check that draining doesn't move any new segments to cdc_raw:
        node.drain()
        session.cluster.shutdown()
        self.assertEqual(pre_non_cdc_write_cdc_raw_segments,
                         _get_cdc_raw_files(node.get_path()))
Esempio n. 11
0
    def test_insertion_and_commitlog_behavior_after_reaching_cdc_total_space(
            self):
        """
        Test that C* behaves correctly when CDC tables have consumed all the
        space available to them. In particular: after writing
        cdc_total_space_in_mb MB into CDC commitlogs:
        - CDC writes are rejected
        - non-CDC writes are accepted
        - on flush, CDC commitlogs are copied to cdc_raw
        - on flush, non-CDC commitlogs are not copied to cdc_raw
        This is a lot of behavior to validate in one test, but we do so to
        avoid running multiple tests that each write 1MB of data to fill
        cdc_total_space_in_mb.
        """
        ks_name = 'ks'
        full_cdc_table_info = TableInfo(ks_name=ks_name,
                                        table_name='full_cdc_tab',
                                        column_spec=_16_uuid_column_spec,
                                        insert_stmt=_get_16_uuid_insert_stmt(
                                            ks_name, 'full_cdc_tab'),
                                        options={'cdc': 'true'})

        configuration_overrides = {
            # Make CDC space as small as possible so we can fill it quickly.
            'cdc_total_space_in_mb': 4,
        }
        node, session = self.prepare(
            ks_name=ks_name, configuration_overrides=configuration_overrides)
        session.execute(full_cdc_table_info.create_stmt)

        # Later, we'll also make assertions about the behavior of non-CDC
        # tables, so we create one here.
        non_cdc_table_info = TableInfo(ks_name=ks_name,
                                       table_name='non_cdc_tab',
                                       column_spec=_16_uuid_column_spec,
                                       insert_stmt=_get_16_uuid_insert_stmt(
                                           ks_name, 'non_cdc_tab'))
        session.execute(non_cdc_table_info.create_stmt)
        # We'll also make assertions about the behavior of CDC tables when
        # other CDC tables have already filled the designated space for CDC
        # commitlogs, so we create the second CDC table here.
        empty_cdc_table_info = TableInfo(ks_name=ks_name,
                                         table_name='empty_cdc_tab',
                                         column_spec=_16_uuid_column_spec,
                                         insert_stmt=_get_16_uuid_insert_stmt(
                                             ks_name, 'empty_cdc_tab'),
                                         options={'cdc': 'true'})
        session.execute(empty_cdc_table_info.create_stmt)

        # Here, we insert values into the first CDC table until we get a
        # WriteFailure. This should happen when the CDC commitlogs take up 1MB
        # or more.
        logger.debug('flushing non-CDC commitlogs')
        node.flush()
        # Then, we insert rows into the CDC table until we can't anymore.
        logger.debug('beginning data insert to fill CDC commitlogs')
        rows_loaded = _write_to_cdc_write_failure(
            session, full_cdc_table_info.insert_stmt)

        assert 0 < rows_loaded, (
            'No CDC rows inserted. This may happen when '
            'cdc_total_space_in_mb > commitlog_segment_size_in_mb')

        commitlog_dir = os.path.join(node.get_path(), 'commitlogs')
        commitlogs_size = size_of_files_in_dir(commitlog_dir)
        logger.debug('Commitlog dir ({d}) is {b}B'.format(d=commitlog_dir,
                                                          b=commitlogs_size))

        # We should get a WriteFailure when trying to write to the CDC table
        # that's filled the designated CDC space...
        try:
            session.execute(full_cdc_table_info.insert_stmt)
            raise Exception("WriteFailure expected")
        except WriteFailure:
            pass
        # or any CDC table.
        try:
            session.execute(empty_cdc_table_info.insert_stmt)
            raise Exception("WriteFailure expected")
        except WriteFailure:
            pass

        # Now we test for behaviors of non-CDC tables when we've exceeded
        # cdc_total_space_in_mb.
        #
        # First, we drain and save the names of all the new discarded CDC
        # segments
        node.drain()
        session.cluster.shutdown()
        node.stop()
        node.start(wait_for_binary_proto=True)
        session = self.patient_cql_connection(node)
        pre_non_cdc_write_cdc_raw_segments = _get_cdc_raw_files(
            node.get_path())

        # Snapshot the _cdc.idx file if > 4.0 for comparison at end
        before_cdc_state = []  # init empty here to quiet PEP
        if self.cluster.version() >= '4.0':
            # Create ReplayData objects for each index file found in loading cluster
            node1_path = os.path.join(node.get_path(), 'cdc_raw')
            before_cdc_state = [
                ReplayData.load(node1_path, name)
                for name in os.listdir(node1_path) if name.endswith('_cdc.idx')
            ]

        # save the names of all the commitlog segments written up to this
        # point:
        pre_non_cdc_write_segments = _get_commitlog_files(node.get_path())

        # Check that writing to non-CDC tables succeeds even when writes to CDC
        # tables are rejected:
        non_cdc_prepared_insert = session.prepare(
            non_cdc_table_info.insert_stmt)
        session.execute(non_cdc_prepared_insert,
                        ())  # should not raise an exception

        # Check the following property: any new commitlog segments written to
        # after cdc_raw has reached its maximum configured size should not be
        # moved to cdc_raw, on commitlog discard, because any such commitlog
        # segments are written to non-CDC tables.
        #
        # First, write to non-cdc tables.
        start, time_limit = time.time(), 600
        rate_limited_debug = get_rate_limited_function(logger.debug, 5)
        logger.debug('writing to non-cdc table')
        # We write until we get a new commitlog segment.
        while _get_commitlog_files(
                node.get_path()) <= pre_non_cdc_write_segments:
            elapsed = time.time() - start
            rate_limited_debug(
                '  non-cdc load step has lasted {s:.2f}s'.format(s=elapsed))
            assert (elapsed <= time_limit,
                    "It's been over a {s}s and we haven't written a new "
                    "commitlog segment. Something is wrong.".format(
                        s=time_limit))
            execute_concurrent(
                session,
                ((non_cdc_prepared_insert, ()) for _ in range(1000)),
                concurrency=500,
                raise_on_first_error=True,
            )

        # Finally, we check that draining doesn't move any new segments to cdc_raw:
        node.drain()
        session.cluster.shutdown()

        if self.cluster.version() < '4.0':
            assert pre_non_cdc_write_cdc_raw_segments == _get_cdc_raw_files(
                node.get_path())
        else:
            # Create ReplayData objects for each index file found in loading cluster
            node2_path = os.path.join(node.get_path(), 'cdc_raw')
            after_cdc_state = [
                ReplayData.load(node2_path, name)
                for name in os.listdir(node2_path) if name.endswith('_cdc.idx')
            ]

            # Confirm all indexes in 1st are accounted for and match corresponding entry in 2nd.
            found = True
            for idx in before_cdc_state:
                idx_found = False
                for idx_two in after_cdc_state:
                    if compare_replay_data(idx, idx_two):
                        idx_found = True
                if not idx_found:
                    found = False
                    break
            if not found:
                self._fail_and_print_sets(
                    before_cdc_state, after_cdc_state,
                    'Found CDC index in before not matched in after (non-CDC write test)'
                )

            # Now we confirm we don't have anything that showed up in 2nd not accounted for in 1st
            orphan_found = False
            for idx_two in after_cdc_state:
                index_found = False
                for idx in before_cdc_state:
                    if compare_replay_data(idx_two, idx):
                        index_found = True
                if not index_found:
                    orphan_found = True
                    break
            if orphan_found:
                self._fail_and_print_sets(
                    before_cdc_state, after_cdc_state,
                    'Found orphaned index file in after CDC state not in former.'
                )
Esempio n. 12
0
 def setUp(self):
     self.mock_func, self.mock_limit = Mock(name='func'), Mock(name='limit')
     self.rate_limited_func = funcutils.get_rate_limited_function(self.mock_func, self.mock_limit)
Esempio n. 13
0
    def test_insertion_and_commitlog_behavior_after_reaching_cdc_total_space(self):
        """
        Test that C* behaves correctly when CDC tables have consumed all the
        space available to them. In particular: after writing
        cdc_total_space_in_mb MB into CDC commitlogs:
        - CDC writes are rejected
        - non-CDC writes are accepted
        - on flush, CDC commitlogs are copied to cdc_raw
        - on flush, non-CDC commitlogs are not copied to cdc_raw
        This is a lot of behavior to validate in one test, but we do so to
        avoid running multiple tests that each write 1MB of data to fill
        cdc_total_space_in_mb.
        """
        ks_name = 'ks'
        full_cdc_table_info = TableInfo(
            ks_name=ks_name, table_name='full_cdc_tab',
            column_spec=_16_uuid_column_spec,
            insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'full_cdc_tab'),
            options={'cdc': 'true'}
        )

        configuration_overrides = {
            # Make CDC space as small as possible so we can fill it quickly.
            'cdc_total_space_in_mb': 4,
        }
        node, session = self.prepare(
            ks_name=ks_name,
            configuration_overrides=configuration_overrides
        )
        session.execute(full_cdc_table_info.create_stmt)

        # Later, we'll also make assertions about the behavior of non-CDC
        # tables, so we create one here.
        non_cdc_table_info = TableInfo(
            ks_name=ks_name, table_name='non_cdc_tab',
            column_spec=_16_uuid_column_spec,
            insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'non_cdc_tab')
        )
        session.execute(non_cdc_table_info.create_stmt)
        # We'll also make assertions about the behavior of CDC tables when
        # other CDC tables have already filled the designated space for CDC
        # commitlogs, so we create the second CDC table here.
        empty_cdc_table_info = TableInfo(
            ks_name=ks_name, table_name='empty_cdc_tab',
            column_spec=_16_uuid_column_spec,
            insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'empty_cdc_tab'),
            options={'cdc': 'true'}
        )
        session.execute(empty_cdc_table_info.create_stmt)

        # Here, we insert values into the first CDC table until we get a
        # WriteFailure. This should happen when the CDC commitlogs take up 1MB
        # or more.
        logger.debug('flushing non-CDC commitlogs')
        node.flush()
        # Then, we insert rows into the CDC table until we can't anymore.
        logger.debug('beginning data insert to fill CDC commitlogs')
        rows_loaded = _write_to_cdc_write_failure(session, full_cdc_table_info.insert_stmt)

        assert 0 < rows_loaded, ('No CDC rows inserted. This may happen when '
                                 'cdc_total_space_in_mb > commitlog_segment_size_in_mb')

        commitlog_dir = os.path.join(node.get_path(), 'commitlogs')
        commitlogs_size = size_of_files_in_dir(commitlog_dir)
        logger.debug('Commitlog dir ({d}) is {b}B'.format(d=commitlog_dir, b=commitlogs_size))

        # We should get a WriteFailure when trying to write to the CDC table
        # that's filled the designated CDC space...
        try:
            session.execute(full_cdc_table_info.insert_stmt)
            raise Exception("WriteFailure expected")
        except WriteFailure:
            pass
        # or any CDC table.
        try:
            session.execute(empty_cdc_table_info.insert_stmt)
            raise Exception("WriteFailure expected")
        except WriteFailure:
            pass

        # Now we test for behaviors of non-CDC tables when we've exceeded
        # cdc_total_space_in_mb.
        #
        # First, we drain and save the names of all the new discarded CDC
        # segments
        node.drain()
        session.cluster.shutdown()
        node.stop()
        node.start(wait_for_binary_proto=True)
        session = self.patient_cql_connection(node)
        pre_non_cdc_write_cdc_raw_segments = _get_cdc_raw_files(node.get_path())

        # Snapshot the _cdc.idx file if > 4.0 for comparison at end
        before_cdc_state = []  # init empty here to quiet PEP
        if self.cluster.version() >= '4.0':
            # Create ReplayData objects for each index file found in loading cluster
            node1_path = os.path.join(node.get_path(), 'cdc_raw')
            before_cdc_state = [ReplayData.load(node1_path, name)
                                for name in os.listdir(node1_path) if name.endswith('_cdc.idx')]

        # save the names of all the commitlog segments written up to this
        # point:
        pre_non_cdc_write_segments = _get_commitlog_files(node.get_path())

        # Check that writing to non-CDC tables succeeds even when writes to CDC
        # tables are rejected:
        non_cdc_prepared_insert = session.prepare(non_cdc_table_info.insert_stmt)
        session.execute(non_cdc_prepared_insert, ())  # should not raise an exception

        # Check the following property: any new commitlog segments written to
        # after cdc_raw has reached its maximum configured size should not be
        # moved to cdc_raw, on commitlog discard, because any such commitlog
        # segments are written to non-CDC tables.
        #
        # First, write to non-cdc tables.
        start, time_limit = time.time(), 600
        rate_limited_debug = get_rate_limited_function(logger.debug, 5)
        logger.debug('writing to non-cdc table')
        # We write until we get a new commitlog segment.
        while _get_commitlog_files(node.get_path()) <= pre_non_cdc_write_segments:
            elapsed = time.time() - start
            rate_limited_debug('  non-cdc load step has lasted {s:.2f}s'.format(s=elapsed))
            assert (elapsed <= time_limit, "It's been over a {s}s and we haven't written a new "
                                           "commitlog segment. Something is wrong.".format(s=time_limit))
            execute_concurrent(
                session,
                ((non_cdc_prepared_insert, ()) for _ in range(1000)),
                concurrency=500,
                raise_on_first_error=True,
            )

        # Finally, we check that draining doesn't move any new segments to cdc_raw:
        node.drain()
        session.cluster.shutdown()

        if self.cluster.version() < '4.0':
            assert pre_non_cdc_write_cdc_raw_segments == _get_cdc_raw_files(node.get_path())
        else:
            # Create ReplayData objects for each index file found in loading cluster
            node2_path = os.path.join(node.get_path(), 'cdc_raw')
            after_cdc_state = [ReplayData.load(node2_path, name)
                               for name in os.listdir(node2_path) if name.endswith('_cdc.idx')]

            # Confirm all indexes in 1st are accounted for and match corresponding entry in 2nd.
            found = True
            for idx in before_cdc_state:
                idx_found = False
                for idx_two in after_cdc_state:
                    if compare_replay_data(idx, idx_two):
                        idx_found = True
                if not idx_found:
                    found = False
                    break
            if not found:
                self._fail_and_print_sets(before_cdc_state, after_cdc_state,
                                          'Found CDC index in before not matched in after (non-CDC write test)')

            # Now we confirm we don't have anything that showed up in 2nd not accounted for in 1st
            orphan_found = False
            for idx_two in after_cdc_state:
                index_found = False
                for idx in before_cdc_state:
                    if compare_replay_data(idx_two, idx):
                        index_found = True
                if not index_found:
                    orphan_found = True
                    break
            if orphan_found:
                self._fail_and_print_sets(before_cdc_state, after_cdc_state,
                                          'Found orphaned index file in after CDC state not in former.')
Esempio n. 14
0
    def test_insertion_and_commitlog_behavior_after_reaching_cdc_total_space(self):
        """
        Test that C* behaves correctly when CDC tables have consumed all the
        space available to them. In particular: after writing
        cdc_total_space_in_mb MB into CDC commitlogs:

        - CDC writes are rejected
        - non-CDC writes are accepted
        - on flush, CDC commitlogs are copied to cdc_raw
        - on flush, non-CDC commitlogs are not copied to cdc_raw

        This is a lot of behavior to validate in one test, but we do so to
        avoid running multiple tests that each write 1MB of data to fill
        cdc_total_space_in_mb.
        """
        ks_name = 'ks'
        full_cdc_table_info = TableInfo(
            ks_name=ks_name, table_name='full_cdc_tab',
            column_spec=_16_uuid_column_spec,
            insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'full_cdc_tab'),
            options={'cdc': 'true'}
        )

        configuration_overrides = {
            # Make CDC space as small as possible so we can fill it quickly.
            'cdc_total_space_in_mb': 4,
        }
        node, session = self.prepare(
            ks_name=ks_name,
            configuration_overrides=configuration_overrides
        )
        session.execute(full_cdc_table_info.create_stmt)

        # Later, we'll also make assertions about the behavior of non-CDC
        # tables, so we create one here.
        non_cdc_table_info = TableInfo(
            ks_name=ks_name, table_name='non_cdc_tab',
            column_spec=_16_uuid_column_spec,
            insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'non_cdc_tab')
        )
        session.execute(non_cdc_table_info.create_stmt)
        # We'll also make assertions about the behavior of CDC tables when
        # other CDC tables have already filled the designated space for CDC
        # commitlogs, so we create the second CDC table here.
        empty_cdc_table_info = TableInfo(
            ks_name=ks_name, table_name='empty_cdc_tab',
            column_spec=_16_uuid_column_spec,
            insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'empty_cdc_tab'),
            options={'cdc': 'true'}
        )
        session.execute(empty_cdc_table_info.create_stmt)

        # Here, we insert values into the first CDC table until we get a
        # WriteFailure. This should happen when the CDC commitlogs take up 1MB
        # or more.
        debug('flushing non-CDC commitlogs')
        node.flush()
        # Then, we insert rows into the CDC table until we can't anymore.
        debug('beginning data insert to fill CDC commitlogs')
        rows_loaded = _write_to_cdc_WriteFailure(session, full_cdc_table_info.insert_stmt)

        self.assertLess(0, rows_loaded,
                        'No CDC rows inserted. This may happen when '
                        'cdc_total_space_in_mb > commitlog_segment_size_in_mb')

        commitlog_dir = os.path.join(node.get_path(), 'commitlogs')
        commitlogs_size = size_of_files_in_dir(commitlog_dir)
        debug('Commitlog dir ({d}) is {b}B'.format(d=commitlog_dir, b=commitlogs_size))

        # We should get a WriteFailure when trying to write to the CDC table
        # that's filled the designated CDC space...
        with self.assertRaises(WriteFailure):
            session.execute(full_cdc_table_info.insert_stmt)
        # or any CDC table.
        with self.assertRaises(WriteFailure):
            session.execute(empty_cdc_table_info.insert_stmt)

        # Now we test for behaviors of non-CDC tables when we've exceeded
        # cdc_total_space_in_mb.
        #
        # First, we drain and save the names of all the new discarded CDC
        # segments
        node.drain()
        session.cluster.shutdown()
        node.stop()
        node.start(wait_for_binary_proto=True)
        session = self.patient_cql_connection(node)
        pre_non_cdc_write_cdc_raw_segments = _get_cdc_raw_files(node.get_path())
        # save the names of all the commitlog segments written up to this
        # point:
        pre_non_cdc_write_segments = _get_commitlog_files(node.get_path())

        # Check that writing to non-CDC tables succeeds even when writes to CDC
        # tables are rejected:
        non_cdc_prepared_insert = session.prepare(non_cdc_table_info.insert_stmt)
        session.execute(non_cdc_prepared_insert, ())  # should not raise an exception

        # Check the following property: any new commitlog segments written to
        # after cdc_raw has reached its maximum configured size should not be
        # moved to cdc_raw, on commitlog discard, because any such commitlog
        # segments are written to non-CDC tables.
        #
        # First, write to non-cdc tables.
        start, time_limit = time.time(), 600
        rate_limited_debug = get_rate_limited_function(debug, 5)
        debug('writing to non-cdc table')
        # We write until we get a new commitlog segment.
        while _get_commitlog_files(node.get_path()) <= pre_non_cdc_write_segments:
            elapsed = time.time() - start
            rate_limited_debug('  non-cdc load step has lasted {s:.2f}s'.format(s=elapsed))
            self.assertLessEqual(
                elapsed, time_limit,
                "It's been over a {s}s and we haven't written a new "
                "commitlog segment. Something is wrong.".format(s=time_limit)
            )
            execute_concurrent(
                session,
                ((non_cdc_prepared_insert, ()) for _ in range(1000)),
                concurrency=500,
                raise_on_first_error=True,
            )

        # Finally, we check that draining doesn't move any new segments to cdc_raw:
        node.drain()
        session.cluster.shutdown()
        self.assertEqual(pre_non_cdc_write_cdc_raw_segments, _get_cdc_raw_files(node.get_path()))
Esempio n. 15
0
 def setUp(self):
     self.mock_func, self.mock_limit = Mock(name='func'), Mock(name='limit')
     self.rate_limited_func = funcutils.get_rate_limited_function(
         self.mock_func, self.mock_limit)