def run_archive_commitlog(self, restore_point_in_time=False, restore_archived_commitlog=True, archive_active_commitlogs=False, archive_command='cp'): """ Run archive commit log restoration test """ cluster = self.cluster cluster.populate(1) (node1, ) = cluster.nodelist() # Create a temp directory for storing commitlog archives: tmp_commitlog = safe_mkdtemp() logger.debug("tmp_commitlog: " + tmp_commitlog) # Edit commitlog_archiving.properties and set an archive # command: replace_in_file( os.path.join(node1.get_path(), 'conf', 'commitlog_archiving.properties'), [(r'^archive_command=.*$', 'archive_command={archive_command} %path {tmp_commitlog}/%name'. format(tmp_commitlog=tmp_commitlog, archive_command=archive_command))]) cluster.start() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 1) # Write until we get a new CL segment. This avoids replaying # initialization mutations from startup into system tables when # restoring snapshots. See CASSANDRA-11811. advance_to_next_cl_segment(session=session, commitlog_dir=os.path.join( node1.get_path(), 'commitlogs')) session.execute( 'CREATE TABLE ks.cf ( key bigint PRIMARY KEY, val text);') logger.debug("Writing first 30,000 rows...") self.insert_rows(session, 0, 30000) # Record when this first set of inserts finished: insert_cutoff_times = [time.gmtime()] # Delete all commitlog backups so far: for f in glob.glob(tmp_commitlog + "/*"): logger.debug('Removing {}'.format(f)) os.remove(f) snapshot_dirs = self.make_snapshot(node1, 'ks', 'cf', 'basic') if self.cluster.version() >= '3.0': system_ks_snapshot_dirs = self.make_snapshot( node1, 'system_schema', 'keyspaces', 'keyspaces') else: system_ks_snapshot_dirs = self.make_snapshot( node1, 'system', 'schema_keyspaces', 'keyspaces') if self.cluster.version() >= '3.0': system_col_snapshot_dirs = self.make_snapshot( node1, 'system_schema', 'columns', 'columns') else: system_col_snapshot_dirs = self.make_snapshot( node1, 'system', 'schema_columns', 'columns') if self.cluster.version() >= '3.0': system_ut_snapshot_dirs = self.make_snapshot( node1, 'system_schema', 'types', 'usertypes') else: system_ut_snapshot_dirs = self.make_snapshot( node1, 'system', 'schema_usertypes', 'usertypes') if self.cluster.version() >= '3.0': system_cfs_snapshot_dirs = self.make_snapshot( node1, 'system_schema', 'tables', 'cfs') else: system_cfs_snapshot_dirs = self.make_snapshot( node1, 'system', 'schema_columnfamilies', 'cfs') try: # Write more data: logger.debug("Writing second 30,000 rows...") self.insert_rows(session, 30000, 60000) node1.flush() time.sleep(10) # Record when this second set of inserts finished: insert_cutoff_times.append(time.gmtime()) logger.debug("Writing final 5,000 rows...") self.insert_rows(session, 60000, 65000) # Record when the third set of inserts finished: insert_cutoff_times.append(time.gmtime()) # Flush so we get an accurate view of commitlogs node1.flush() rows = session.execute('SELECT count(*) from ks.cf') # Make sure we have the same amount of rows as when we snapshotted: assert rows[0][0] == 65000 # Check that there are at least one commit log backed up that # is not one of the active commit logs: commitlog_dir = os.path.join(node1.get_path(), 'commitlogs') logger.debug("node1 commitlog dir: " + commitlog_dir) logger.debug("node1 commitlog dir contents: " + str(os.listdir(commitlog_dir))) logger.debug("tmp_commitlog contents: " + str(os.listdir(tmp_commitlog))) assert_directory_not_empty(tmp_commitlog, commitlog_dir) cluster.flush() cluster.compact() node1.drain() # Destroy the cluster cluster.stop() logger.debug("node1 commitlog dir contents after stopping: " + str(os.listdir(commitlog_dir))) logger.debug("tmp_commitlog contents after stopping: " + str(os.listdir(tmp_commitlog))) self.copy_logs(name=get_current_test_name() + "_pre-restore") self.fixture_dtest_setup.cleanup_and_replace_cluster() cluster = self.cluster cluster.populate(1) nodes = cluster.nodelist() assert len(nodes) == 1 node1 = nodes[0] # Restore schema from snapshots: for system_ks_snapshot_dir in system_ks_snapshot_dirs: if self.cluster.version() >= '3.0': self.restore_snapshot(system_ks_snapshot_dir, node1, 'system_schema', 'keyspaces', 'keyspaces') else: self.restore_snapshot(system_ks_snapshot_dir, node1, 'system', 'schema_keyspaces', 'keyspaces') for system_col_snapshot_dir in system_col_snapshot_dirs: if self.cluster.version() >= '3.0': self.restore_snapshot(system_col_snapshot_dir, node1, 'system_schema', 'columns', 'columns') else: self.restore_snapshot(system_col_snapshot_dir, node1, 'system', 'schema_columns', 'columns') for system_ut_snapshot_dir in system_ut_snapshot_dirs: if self.cluster.version() >= '3.0': self.restore_snapshot(system_ut_snapshot_dir, node1, 'system_schema', 'types', 'usertypes') else: self.restore_snapshot(system_ut_snapshot_dir, node1, 'system', 'schema_usertypes', 'usertypes') for system_cfs_snapshot_dir in system_cfs_snapshot_dirs: if self.cluster.version() >= '3.0': self.restore_snapshot(system_cfs_snapshot_dir, node1, 'system_schema', 'tables', 'cfs') else: self.restore_snapshot(system_cfs_snapshot_dir, node1, 'system', 'schema_columnfamilies', 'cfs') for snapshot_dir in snapshot_dirs: self.restore_snapshot(snapshot_dir, node1, 'ks', 'cf', 'basic') cluster.start() session = self.patient_cql_connection(node1) node1.nodetool('refresh ks cf') rows = session.execute('SELECT count(*) from ks.cf') # Make sure we have the same amount of rows as when we snapshotted: assert rows[0][0] == 30000 # Edit commitlog_archiving.properties. Remove the archive # command and set a restore command and restore_directories: if restore_archived_commitlog: replace_in_file( os.path.join(node1.get_path(), 'conf', 'commitlog_archiving.properties'), [(r'^archive_command=.*$', 'archive_command='), (r'^restore_command=.*$', 'restore_command=cp -f %from %to'), (r'^restore_directories=.*$', 'restore_directories={tmp_commitlog}'.format( tmp_commitlog=tmp_commitlog))]) if restore_point_in_time: restore_time = time.strftime("%Y:%m:%d %H:%M:%S", insert_cutoff_times[1]) replace_in_file( os.path.join(node1.get_path(), 'conf', 'commitlog_archiving.properties'), [(r'^restore_point_in_time=.*$', 'restore_point_in_time={restore_time}'.format( restore_time=restore_time))]) logger.debug("Restarting node1..") node1.stop() node1.start(wait_for_binary_proto=True) node1.nodetool('flush') node1.nodetool('compact') session = self.patient_cql_connection(node1) rows = session.execute('SELECT count(*) from ks.cf') # Now we should have 30000 rows from the snapshot + 30000 rows # from the commitlog backups: if not restore_archived_commitlog: assert rows[0][0] == 30000 elif restore_point_in_time: assert rows[0][0] == 60000 else: assert rows[0][0] == 65000 finally: # clean up logger.debug("removing snapshot_dir: " + ",".join(snapshot_dirs)) for snapshot_dir in snapshot_dirs: shutil.rmtree(snapshot_dir) logger.debug("removing snapshot_dir: " + ",".join(system_ks_snapshot_dirs)) for system_ks_snapshot_dir in system_ks_snapshot_dirs: shutil.rmtree(system_ks_snapshot_dir) logger.debug("removing snapshot_dir: " + ",".join(system_cfs_snapshot_dirs)) for system_cfs_snapshot_dir in system_cfs_snapshot_dirs: shutil.rmtree(system_cfs_snapshot_dir) logger.debug("removing snapshot_dir: " + ",".join(system_ut_snapshot_dirs)) for system_ut_snapshot_dir in system_ut_snapshot_dirs: shutil.rmtree(system_ut_snapshot_dir) logger.debug("removing snapshot_dir: " + ",".join(system_col_snapshot_dirs)) for system_col_snapshot_dir in system_col_snapshot_dirs: shutil.rmtree(system_col_snapshot_dir) logger.debug("removing tmp_commitlog: " + tmp_commitlog) shutil.rmtree(tmp_commitlog)
def test_cdc_data_available_in_cdc_raw(self): ks_name = 'ks' # First, create a new node just for data generation. generation_node, generation_session = self.prepare(ks_name=ks_name) cdc_table_info = TableInfo( ks_name=ks_name, table_name='cdc_tab', column_spec=_16_uuid_column_spec, insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'cdc_tab'), options={ 'cdc': 'true', # give table an explicit id so when we create it again it's the # same table and we can replay into it 'id': uuid.uuid4() }) # Write until we get a new CL segment to avoid replaying initialization # mutations from this node's startup into system tables in the other # node. See CASSANDRA-11811. advance_to_next_cl_segment(session=generation_session, commitlog_dir=os.path.join( generation_node.get_path(), 'commitlogs')) generation_session.execute(cdc_table_info.create_stmt) # insert 10000 rows inserted_rows = _insert_rows(generation_session, cdc_table_info.name, cdc_table_info.insert_stmt, repeat((), 10000)) # drain the node to guarantee all cl segements will be recycled debug('draining') generation_node.drain() debug('stopping') # stop the node and clean up all sessions attached to it generation_node.stop() generation_session.cluster.shutdown() # create a new node to use for cdc_raw cl segment replay loading_node = self._init_new_loading_node( ks_name, cdc_table_info.create_stmt, self.cluster.version() < '4') # move cdc_raw contents to commitlog directories, then start the # node again to trigger commitlog replay, which should replay the # cdc_raw files we moved to commitlogs into memtables. debug('moving cdc_raw and restarting node') _move_contents(os.path.join(generation_node.get_path(), 'cdc_raw'), os.path.join(loading_node.get_path(), 'commitlogs')) loading_node.start(wait_for_binary_proto=True) debug('node successfully started; waiting on log replay') loading_node.grep_log('Log replay complete') debug('log replay complete') # final assertions validation_session = self.patient_exclusive_cql_connection( loading_node) data_in_cdc_table_after_restart = rows_to_list( validation_session.execute('SELECT * FROM ' + cdc_table_info.name)) debug('found {cdc} values in CDC table'.format( cdc=len(data_in_cdc_table_after_restart))) # Then we assert that the CDC data that we expect to be there is there. # All data that was in CDC tables should have been copied to cdc_raw, # then used in commitlog replay, so it should be back in the cluster. self.assertEqual( inserted_rows, data_in_cdc_table_after_restart, # The message on failure is too long, since cdc_data is thousands # of items, so we print something else here msg='not all expected data selected')
def test_cdc_data_available_in_cdc_raw(self): ks_name = 'ks' # First, create a new node just for data generation. generation_node, generation_session = self.prepare(ks_name=ks_name) cdc_table_info = TableInfo( ks_name=ks_name, table_name='cdc_tab', column_spec=_16_uuid_column_spec, insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'cdc_tab'), options={ 'cdc': 'true', # give table an explicit id so when we create it again it's the # same table and we can replay into it 'id': uuid.uuid4() }) # Write until we get a new CL segment to avoid replaying initialization # mutations from this node's startup into system tables in the other # node. See CASSANDRA-11811. advance_to_next_cl_segment(session=generation_session, commitlog_dir=os.path.join( generation_node.get_path(), 'commitlogs')) generation_session.execute(cdc_table_info.create_stmt) # insert 10000 rows inserted_rows = _insert_rows(generation_session, cdc_table_info.name, cdc_table_info.insert_stmt, repeat((), 10000)) # drain the node to guarantee all cl segments will be recycled logger.debug('draining') generation_node.drain() logger.debug('stopping') # stop the node and clean up all sessions attached to it generation_session.cluster.shutdown() generation_node.stop() # We can rely on the existing _cdc.idx files to determine which .log files contain cdc data. source_path = os.path.join(generation_node.get_path(), 'cdc_raw') source_cdc_indexes = { ReplayData.load(source_path, name) for name in source_path if name.endswith('_cdc.idx') } # assertNotEqual(source_cdc_indexes, {}) assert source_cdc_indexes != {} # create a new node to use for cdc_raw cl segment replay loading_node = self._init_new_loading_node( ks_name, cdc_table_info.create_stmt, self.cluster.version() < '4') # move cdc_raw contents to commitlog directories, then start the # node again to trigger commitlog replay, which should replay the # cdc_raw files we moved to commitlogs into memtables. logger.debug('moving cdc_raw and restarting node') _move_commitlog_segments( os.path.join(generation_node.get_path(), 'cdc_raw'), os.path.join(loading_node.get_path(), 'commitlogs')) loading_node.start(wait_for_binary_proto=True) logger.debug('node successfully started; waiting on log replay') loading_node.grep_log('Log replay complete') logger.debug('log replay complete') # final assertions validation_session = self.patient_exclusive_cql_connection( loading_node) data_in_cdc_table_after_restart = rows_to_list( validation_session.execute('SELECT * FROM ' + cdc_table_info.name)) logger.debug('found {cdc} values in CDC table'.format( cdc=len(data_in_cdc_table_after_restart))) # Then we assert that the CDC data that we expect to be there is there. # All data that was in CDC tables should have been copied to cdc_raw, # then used in commitlog replay, so it should be back in the cluster. assert (inserted_rows == data_in_cdc_table_after_restart ), 'not all expected data selected' if self.cluster.version() >= '4.0': # Create ReplayData objects for each index file found in loading cluster loading_path = os.path.join(loading_node.get_path(), 'cdc_raw') dest_cdc_indexes = [ ReplayData.load(loading_path, name) for name in os.listdir(loading_path) if name.endswith('_cdc.idx') ] # Compare source replay data to dest to ensure replay process created both hard links and index files. for srd in source_cdc_indexes: # Confirm both log and index are in dest assert os.path.isfile(os.path.join(loading_path, srd.idx_name)) assert os.path.isfile(os.path.join(loading_path, srd.log_name)) # Find dest ReplayData that corresponds to the source (should be exactly 1) corresponding_dest_replay_datae = [ x for x in dest_cdc_indexes if srd.idx_name == x.idx_name ] assert_length_equal(corresponding_dest_replay_datae, 1) drd = corresponding_dest_replay_datae[0] # We can't compare equality on offsets since replay uses the raw file length as the written # cdc offset. We *can*, however, confirm that the offset in the replayed file is >= # the source file, ensuring clients are signaled to replay at least all the data in the # log. assert drd.offset >= srd.offset # Confirm completed flag is the same in both assert srd.completed == drd.completed # Confirm that the relationship between index files on the source # and destination looks like we expect. # First, grab the mapping between the two, make sure it's a 1-1 # mapping, and transform the dict to reflect that: src_to_dest_idx_map = { src_rd: [ dest_rd for dest_rd in dest_cdc_indexes if dest_rd.idx_name == src_rd.idx_name ] for src_rd in source_cdc_indexes } for src_rd, dest_rds in src_to_dest_idx_map.items(): assert_length_equal(dest_rds, 1) src_to_dest_idx_map[src_rd] = dest_rds[0] # All offsets in idx files that were copied should be >0 on the # destination node. assert ( 0 not in {i.offset for i in src_to_dest_idx_map.values()}),\ ('Found index offsets == 0 in an index file on the ' 'destination node that corresponds to an index file on the ' 'source node:\n' '{}').format(pformat(src_to_dest_idx_map)) # Offsets of all shared indexes should be >= on the destination # than on the source. for src_rd, dest_rd in src_to_dest_idx_map.items(): assert dest_rd.offset >= src_rd.offset src_to_dest_idx_map = { src_rd: [ dest_rd for dest_rd in dest_cdc_indexes if dest_rd.idx_name == src_rd.idx_name ] for src_rd in source_cdc_indexes } for k, v in src_to_dest_idx_map.items(): assert_length_equal(v, 1) assert k.offset >= v.offset
def run_archive_commitlog(self, restore_point_in_time=False, restore_archived_commitlog=True, archive_active_commitlogs=False, archive_command='cp'): """ Run archive commit log restoration test """ cluster = self.cluster cluster.populate(1) (node1,) = cluster.nodelist() # Create a temp directory for storing commitlog archives: tmp_commitlog = safe_mkdtemp() logger.debug("tmp_commitlog: " + tmp_commitlog) # Edit commitlog_archiving.properties and set an archive # command: replace_in_file(os.path.join(node1.get_path(), 'conf', 'commitlog_archiving.properties'), [(r'^archive_command=.*$', 'archive_command={archive_command} %path {tmp_commitlog}/%name'.format( tmp_commitlog=tmp_commitlog, archive_command=archive_command))]) cluster.start() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 1) # Write until we get a new CL segment. This avoids replaying # initialization mutations from startup into system tables when # restoring snapshots. See CASSANDRA-11811. advance_to_next_cl_segment( session=session, commitlog_dir=os.path.join(node1.get_path(), 'commitlogs') ) session.execute('CREATE TABLE ks.cf ( key bigint PRIMARY KEY, val text);') logger.debug("Writing first 30,000 rows...") self.insert_rows(session, 0, 30000) # Record when this first set of inserts finished: insert_cutoff_times = [time.gmtime()] # Delete all commitlog backups so far: for f in glob.glob(tmp_commitlog + "/*"): logger.debug('Removing {}'.format(f)) os.remove(f) snapshot_dirs = self.make_snapshot(node1, 'ks', 'cf', 'basic') if self.cluster.version() >= '3.0': system_ks_snapshot_dirs = self.make_snapshot(node1, 'system_schema', 'keyspaces', 'keyspaces') else: system_ks_snapshot_dirs = self.make_snapshot(node1, 'system', 'schema_keyspaces', 'keyspaces') if self.cluster.version() >= '3.0': system_col_snapshot_dirs = self.make_snapshot(node1, 'system_schema', 'columns', 'columns') else: system_col_snapshot_dirs = self.make_snapshot(node1, 'system', 'schema_columns', 'columns') if self.cluster.version() >= '3.0': system_ut_snapshot_dirs = self.make_snapshot(node1, 'system_schema', 'types', 'usertypes') else: system_ut_snapshot_dirs = self.make_snapshot(node1, 'system', 'schema_usertypes', 'usertypes') if self.cluster.version() >= '3.0': system_cfs_snapshot_dirs = self.make_snapshot(node1, 'system_schema', 'tables', 'cfs') else: system_cfs_snapshot_dirs = self.make_snapshot(node1, 'system', 'schema_columnfamilies', 'cfs') try: # Write more data: logger.debug("Writing second 30,000 rows...") self.insert_rows(session, 30000, 60000) node1.flush() time.sleep(10) # Record when this second set of inserts finished: insert_cutoff_times.append(time.gmtime()) logger.debug("Writing final 5,000 rows...") self.insert_rows(session, 60000, 65000) # Record when the third set of inserts finished: insert_cutoff_times.append(time.gmtime()) # Flush so we get an accurate view of commitlogs node1.flush() rows = session.execute('SELECT count(*) from ks.cf') # Make sure we have the same amount of rows as when we snapshotted: assert rows[0][0] == 65000 # Check that there are at least one commit log backed up that # is not one of the active commit logs: commitlog_dir = os.path.join(node1.get_path(), 'commitlogs') logger.debug("node1 commitlog dir: " + commitlog_dir) logger.debug("node1 commitlog dir contents: " + str(os.listdir(commitlog_dir))) logger.debug("tmp_commitlog contents: " + str(os.listdir(tmp_commitlog))) assert_directory_not_empty(tmp_commitlog, commitlog_dir) cluster.flush() cluster.compact() node1.drain() # Destroy the cluster cluster.stop() logger.debug("node1 commitlog dir contents after stopping: " + str(os.listdir(commitlog_dir))) logger.debug("tmp_commitlog contents after stopping: " + str(os.listdir(tmp_commitlog))) self.copy_logs(name=get_current_test_name() + "_pre-restore") self.fixture_dtest_setup.cleanup_and_replace_cluster() cluster = self.cluster cluster.populate(1) nodes = cluster.nodelist() assert len(nodes) == 1 node1 = nodes[0] # Restore schema from snapshots: for system_ks_snapshot_dir in system_ks_snapshot_dirs: if self.cluster.version() >= '3.0': self.restore_snapshot(system_ks_snapshot_dir, node1, 'system_schema', 'keyspaces', 'keyspaces') else: self.restore_snapshot(system_ks_snapshot_dir, node1, 'system', 'schema_keyspaces', 'keyspaces') for system_col_snapshot_dir in system_col_snapshot_dirs: if self.cluster.version() >= '3.0': self.restore_snapshot(system_col_snapshot_dir, node1, 'system_schema', 'columns', 'columns') else: self.restore_snapshot(system_col_snapshot_dir, node1, 'system', 'schema_columns', 'columns') for system_ut_snapshot_dir in system_ut_snapshot_dirs: if self.cluster.version() >= '3.0': self.restore_snapshot(system_ut_snapshot_dir, node1, 'system_schema', 'types', 'usertypes') else: self.restore_snapshot(system_ut_snapshot_dir, node1, 'system', 'schema_usertypes', 'usertypes') for system_cfs_snapshot_dir in system_cfs_snapshot_dirs: if self.cluster.version() >= '3.0': self.restore_snapshot(system_cfs_snapshot_dir, node1, 'system_schema', 'tables', 'cfs') else: self.restore_snapshot(system_cfs_snapshot_dir, node1, 'system', 'schema_columnfamilies', 'cfs') for snapshot_dir in snapshot_dirs: self.restore_snapshot(snapshot_dir, node1, 'ks', 'cf', 'basic') cluster.start(wait_for_binary_proto=True) session = self.patient_cql_connection(node1) node1.nodetool('refresh ks cf') rows = session.execute('SELECT count(*) from ks.cf') # Make sure we have the same amount of rows as when we snapshotted: assert rows[0][0] == 30000 # Edit commitlog_archiving.properties. Remove the archive # command and set a restore command and restore_directories: if restore_archived_commitlog: replace_in_file(os.path.join(node1.get_path(), 'conf', 'commitlog_archiving.properties'), [(r'^archive_command=.*$', 'archive_command='), (r'^restore_command=.*$', 'restore_command=cp -f %from %to'), (r'^restore_directories=.*$', 'restore_directories={tmp_commitlog}'.format( tmp_commitlog=tmp_commitlog))]) if restore_point_in_time: restore_time = time.strftime("%Y:%m:%d %H:%M:%S", insert_cutoff_times[1]) replace_in_file(os.path.join(node1.get_path(), 'conf', 'commitlog_archiving.properties'), [(r'^restore_point_in_time=.*$', 'restore_point_in_time={restore_time}'.format(restore_time=restore_time))]) logger.debug("Restarting node1..") node1.stop() node1.start(wait_for_binary_proto=True) node1.nodetool('flush') node1.nodetool('compact') session = self.patient_cql_connection(node1) rows = session.execute('SELECT count(*) from ks.cf') # Now we should have 30000 rows from the snapshot + 30000 rows # from the commitlog backups: if not restore_archived_commitlog: assert rows[0][0] == 30000 elif restore_point_in_time: assert rows[0][0] == 60000 else: assert rows[0][0] == 65000 finally: # clean up logger.debug("removing snapshot_dir: " + ",".join(snapshot_dirs)) for snapshot_dir in snapshot_dirs: shutil.rmtree(snapshot_dir) logger.debug("removing snapshot_dir: " + ",".join(system_ks_snapshot_dirs)) for system_ks_snapshot_dir in system_ks_snapshot_dirs: shutil.rmtree(system_ks_snapshot_dir) logger.debug("removing snapshot_dir: " + ",".join(system_cfs_snapshot_dirs)) for system_cfs_snapshot_dir in system_cfs_snapshot_dirs: shutil.rmtree(system_cfs_snapshot_dir) logger.debug("removing snapshot_dir: " + ",".join(system_ut_snapshot_dirs)) for system_ut_snapshot_dir in system_ut_snapshot_dirs: shutil.rmtree(system_ut_snapshot_dir) logger.debug("removing snapshot_dir: " + ",".join(system_col_snapshot_dirs)) for system_col_snapshot_dir in system_col_snapshot_dirs: shutil.rmtree(system_col_snapshot_dir) logger.debug("removing tmp_commitlog: " + tmp_commitlog) shutil.rmtree(tmp_commitlog)
def test_cdc_data_available_in_cdc_raw(self): ks_name = 'ks' # First, create a new node just for data generation. generation_node, generation_session = self.prepare(ks_name=ks_name) cdc_table_info = TableInfo( ks_name=ks_name, table_name='cdc_tab', column_spec=_16_uuid_column_spec, insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'cdc_tab'), options={ 'cdc': 'true', # give table an explicit id so when we create it again it's the # same table and we can replay into it 'id': uuid.uuid4() } ) # Write until we get a new CL segment to avoid replaying initialization # mutations from this node's startup into system tables in the other # node. See CASSANDRA-11811. advance_to_next_cl_segment( session=generation_session, commitlog_dir=os.path.join(generation_node.get_path(), 'commitlogs') ) generation_session.execute(cdc_table_info.create_stmt) # insert 10000 rows inserted_rows = _insert_rows(generation_session, cdc_table_info.name, cdc_table_info.insert_stmt, repeat((), 10000)) # drain the node to guarantee all cl segments will be recycled logger.debug('draining') generation_node.drain() logger.debug('stopping') # stop the node and clean up all sessions attached to it generation_session.cluster.shutdown() generation_node.stop() # We can rely on the existing _cdc.idx files to determine which .log files contain cdc data. source_path = os.path.join(generation_node.get_path(), 'cdc_raw') source_cdc_indexes = {ReplayData.load(source_path, name) for name in source_path if name.endswith('_cdc.idx')} # assertNotEqual(source_cdc_indexes, {}) assert source_cdc_indexes != {} # create a new node to use for cdc_raw cl segment replay loading_node = self._init_new_loading_node(ks_name, cdc_table_info.create_stmt, self.cluster.version() < '4') # move cdc_raw contents to commitlog directories, then start the # node again to trigger commitlog replay, which should replay the # cdc_raw files we moved to commitlogs into memtables. logger.debug('moving cdc_raw and restarting node') _move_commitlog_segments( os.path.join(generation_node.get_path(), 'cdc_raw'), os.path.join(loading_node.get_path(), 'commitlogs') ) loading_node.start(wait_for_binary_proto=True) logger.debug('node successfully started; waiting on log replay') loading_node.grep_log('Log replay complete') logger.debug('log replay complete') # final assertions validation_session = self.patient_exclusive_cql_connection(loading_node) data_in_cdc_table_after_restart = rows_to_list( validation_session.execute('SELECT * FROM ' + cdc_table_info.name) ) logger.debug('found {cdc} values in CDC table'.format( cdc=len(data_in_cdc_table_after_restart) )) # Then we assert that the CDC data that we expect to be there is there. # All data that was in CDC tables should have been copied to cdc_raw, # then used in commitlog replay, so it should be back in the cluster. assert (inserted_rows == data_in_cdc_table_after_restart), 'not all expected data selected' if self.cluster.version() >= '4.0': # Create ReplayData objects for each index file found in loading cluster loading_path = os.path.join(loading_node.get_path(), 'cdc_raw') dest_cdc_indexes = [ReplayData.load(loading_path, name) for name in os.listdir(loading_path) if name.endswith('_cdc.idx')] # Compare source replay data to dest to ensure replay process created both hard links and index files. for srd in source_cdc_indexes: # Confirm both log and index are in dest assert os.path.isfile(os.path.join(loading_path, srd.idx_name)) assert os.path.isfile(os.path.join(loading_path, srd.log_name)) # Find dest ReplayData that corresponds to the source (should be exactly 1) corresponding_dest_replay_datae = [x for x in dest_cdc_indexes if srd.idx_name == x.idx_name] assert_length_equal(corresponding_dest_replay_datae, 1) drd = corresponding_dest_replay_datae[0] # We can't compare equality on offsets since replay uses the raw file length as the written # cdc offset. We *can*, however, confirm that the offset in the replayed file is >= # the source file, ensuring clients are signaled to replay at least all the data in the # log. assert drd.offset >= srd.offset # Confirm completed flag is the same in both assert srd.completed == drd.completed # Confirm that the relationship between index files on the source # and destination looks like we expect. # First, grab the mapping between the two, make sure it's a 1-1 # mapping, and transform the dict to reflect that: src_to_dest_idx_map = { src_rd: [dest_rd for dest_rd in dest_cdc_indexes if dest_rd.idx_name == src_rd.idx_name] for src_rd in source_cdc_indexes } for src_rd, dest_rds in src_to_dest_idx_map.items(): assert_length_equal(dest_rds, 1) src_to_dest_idx_map[src_rd] = dest_rds[0] # All offsets in idx files that were copied should be >0 on the # destination node. assert ( 0 not in {i.offset for i in src_to_dest_idx_map.values()}),\ ('Found index offsets == 0 in an index file on the ' 'destination node that corresponds to an index file on the ' 'source node:\n' '{}').format(pformat(src_to_dest_idx_map)) # Offsets of all shared indexes should be >= on the destination # than on the source. for src_rd, dest_rd in src_to_dest_idx_map.items(): assert dest_rd.offset >= src_rd.offset src_to_dest_idx_map = { src_rd: [dest_rd for dest_rd in dest_cdc_indexes if dest_rd.idx_name == src_rd.idx_name] for src_rd in source_cdc_indexes } for k, v in src_to_dest_idx_map.items(): assert_length_equal(v, 1) assert k.offset >= v.offset
def test_cdc_data_available_in_cdc_raw(self): ks_name = 'ks' # First, create a new node just for data generation. generation_node, generation_session = self.prepare(ks_name=ks_name) cdc_table_info = TableInfo( ks_name=ks_name, table_name='cdc_tab', column_spec=_16_uuid_column_spec, insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'cdc_tab'), options={ 'cdc': 'true', # give table an explicit id so when we create it again it's the # same table and we can replay into it 'id': uuid.uuid4() } ) # Write until we get a new CL segment to avoid replaying initialization # mutations from this node's startup into system tables in the other # node. See CASSANDRA-11811. advance_to_next_cl_segment( session=generation_session, commitlog_dir=os.path.join(generation_node.get_path(), 'commitlogs') ) generation_session.execute(cdc_table_info.create_stmt) # insert 10000 rows inserted_rows = _insert_rows(generation_session, cdc_table_info.name, cdc_table_info.insert_stmt, repeat((), 10000)) # drain the node to guarantee all cl segements will be recycled debug('draining') generation_node.drain() debug('stopping') # stop the node and clean up all sessions attached to it generation_node.stop() generation_session.cluster.shutdown() # create a new node to use for cdc_raw cl segment replay loading_node = self._init_new_loading_node(ks_name, cdc_table_info.create_stmt, self.cluster.version() < '4') # move cdc_raw contents to commitlog directories, then start the # node again to trigger commitlog replay, which should replay the # cdc_raw files we moved to commitlogs into memtables. debug('moving cdc_raw and restarting node') _move_contents( os.path.join(generation_node.get_path(), 'cdc_raw'), os.path.join(loading_node.get_path(), 'commitlogs') ) loading_node.start(wait_for_binary_proto=True) debug('node successfully started; waiting on log replay') loading_node.grep_log('Log replay complete') debug('log replay complete') # final assertions validation_session = self.patient_exclusive_cql_connection(loading_node) data_in_cdc_table_after_restart = rows_to_list( validation_session.execute('SELECT * FROM ' + cdc_table_info.name) ) debug('found {cdc} values in CDC table'.format( cdc=len(data_in_cdc_table_after_restart) )) # Then we assert that the CDC data that we expect to be there is there. # All data that was in CDC tables should have been copied to cdc_raw, # then used in commitlog replay, so it should be back in the cluster. self.assertEqual( inserted_rows, data_in_cdc_table_after_restart, # The message on failure is too long, since cdc_data is thousands # of items, so we print something else here msg='not all expected data selected' )