def list_keys(interaction_pool, collection_id, versioned, prefix=None, max_keys=1000, delimiter="", marker=None): """ retrieve infromation about keys which are visible: not deleted, etc """ # ask for one more than max_keys, so we can tell if we are truncated max_keys = int(max_keys) request_count = max_keys + 1 sql_text = sql_factory.list_keys(collection_id, versioned=versioned, prefix=prefix, key_marker=marker, limit=request_count) args = {"collection_id" : collection_id, "prefix" : (prefix if prefix is not None else ""), "key_marker" : (marker if marker is not None else ""), } async_result = interaction_pool.run(interaction=sql_text.encode("utf-8"), interaction_args=args, pool=_local_node_name) result = async_result.get() truncated = len(result) == request_count key_list = list() for row in result[:max_keys]: key_list.append( {"key" : row["key"], "version_identifier" : row["unified_id"], "timestamp" : http_timestamp_str(row["timestamp"])}) if delimiter == "": return {"key_data" : key_list, "truncated" : truncated} # XXX: there may be some SQL way to do this efficiently prefix_set = set() offset = (len(prefix) if prefix is not None else 0) for key_entry in key_list: delimiter_pos = key_entry["key"].find(delimiter, offset) if delimiter_pos > 0: prefix_set.add(key_entry["key"][:delimiter_pos+1]) return {"prefixes" : list(prefix_set), "truncated" : truncated}
def test_version_for_key_find_all_same_rows(self): """ check that this can find all the same rows list_keys returns """ # XXX: this looks like it tests the same stuff as the previous # entry? log = logging.getLogger("test_version_for_key_find_all_same_rows") sql_text = list_keys(_test_collection_id, versioned=False, prefix=_test_prefix) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) list_keys_rows = cursor.fetchall() cursor.close() for list_keys_row in list_keys_rows: sql_text = version_for_key(_test_collection_id, versioned=False, key=list_keys_row["key"]) args = { "collection_id": _test_collection_id, "key": list_keys_row["key"], } cursor = self._connection.cursor() cursor.execute(sql_text, args) version_for_key_rows = cursor.fetchall() cursor.close() self.assertTrue(len(version_for_key_rows) > 0) for version_for_key_row in version_for_key_rows: self.assertEqual(version_for_key_row["key"], list_keys_row["key"]) self.assertEqual(version_for_key_row["unified_id"], list_keys_row["unified_id"])
def test_version_for_key_find_all_same_rows(self): """ check that this can find all the same rows list_keys returns """ # XXX: this looks like it tests the same stuff as the previous # entry? log = logging.getLogger("test_version_for_key_find_all_same_rows") sql_text = list_keys(_test_collection_id, versioned=False, prefix=_test_prefix) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) list_keys_rows = cursor.fetchall() cursor.close() for list_keys_row in list_keys_rows: sql_text = version_for_key(_test_collection_id, versioned=False, key=list_keys_row["key"]) args = {"collection_id" : _test_collection_id, "key" : list_keys_row["key"], } cursor = self._connection.cursor() cursor.execute(sql_text, args) version_for_key_rows = cursor.fetchall() cursor.close() self.assertTrue(len(version_for_key_rows) > 0) for version_for_key_row in version_for_key_rows: self.assertEqual(version_for_key_row["key"], list_keys_row["key"]) self.assertEqual(version_for_key_row["unified_id"], list_keys_row["unified_id"])
def test_list_keys_vs_list_versions(self): """ check that this can ONLY find the same rows list_versions returns above IF they are also in the result that list_keys returns (i.e. some of them should be findable, some not.) """ # Background: list_keys returns the newest version of every key. # list_versions returns every version of every key. # If a collection is unversioned, output from list_keys and list_versions # should find the same rows # (although the output from list_keys has an extra column.) # In other words, in a versioned collection, any version of a key # that isn't the newest version should be unreachable. # So, I was imagining the test to do this: # 1. get the full output from list_versions(test_colelction_id, versioned=True) and save it. # 2. get the full output from list_keys(test_collection_id, versioned=True) and save it # 3. compare the results to determine which keys are older versions # 4. For each row, call version_for_key with specific unified_id and versioned arguments # and verify finding (or correctly not finding) the result. # # Sothe rows that are in the output of list_versions but are NOT in # the output of list_keys should be rows that are older versions. # (You may have to discard that extra column from list_keys before # comparing results.) That's probably worth an assert or two to verify # that assumption once you have the lists. # Then, if we call version_for_key on those rows that are only in # list_versions with versioned=False and specify their unified_id when # calling version_for_key, they should not be reachable. # With versioned=True they should be reachable. # The rows that were in both list_versions output and list_keys output # should be reachable either with versioned=True or versioned=False. # 1. get the full output from list_versions(test_colelction_id, versioned=True) and save it. sql_text = list_versions(_test_collection_id, versioned=True, prefix=_test_prefix) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) list_versions_rows = cursor.fetchall() cursor.close() list_versions_set = set([(r["key"], r["unified_id"], ) \ for r in list_versions_rows]) # 2. get the full output from list_keys(test_collection_id, versioned=True) and save it sql_text = list_keys(_test_collection_id, versioned=True, prefix=_test_prefix) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) list_keys_rows = cursor.fetchall() cursor.close() list_keys_set = set([(r["key"], r["unified_id"], ) \ for r in list_keys_rows]) # find keys that are only reachable by list_keys when versioned=True # we need this below. sql_text = list_keys(_test_collection_id, versioned=False, prefix=_test_prefix) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) unversioned_list_keys_rows = cursor.fetchall() cursor.close() unversioned_list_keys_set = set([(r["key"], r["unified_id"], ) \ for r in unversioned_list_keys_rows]) versioned_only_reachable_set = \ list_keys_set - unversioned_list_keys_set # 3. compare the results to determine which keys are older versions older_version_set = list_versions_set - list_keys_set # Sothe rows that are in the output of list_versions but are NOT in # the output of list_keys should be rows that are older versions. # (You may have to discard that extra column from list_keys before # comparing results.) That's probably worth an assert or two to verify # that assumption once you have the lists. for list_versions_row in list_versions_rows: test_tuple = ( list_versions_row["key"], list_versions_row["unified_id"], ) self.assertIn(test_tuple, list_versions_set) if test_tuple in list_keys_set: self.assertNotIn(test_tuple, older_version_set) else: self.assertIn(test_tuple, older_version_set) # 4. For each row, call version_for_key with specific unified_id and versioned arguments # and verify finding (or correctly not finding) the result. # Then, if we call version_for_key on those rows that are only in # list_versions with versioned=False and specify their unified_id when # calling version_for_key, they should not be reachable. # With versioned=True they should be reachable. for key, unified_id in older_version_set: for versioned in [ False, True, ]: sql_text = version_for_key(_test_collection_id, versioned=versioned, key=key, unified_id=unified_id) args = { "collection_id": _test_collection_id, "key": key, "unified_id": unified_id } cursor = self._connection.cursor() cursor.execute(sql_text, args) test_rows = cursor.fetchall() cursor.close() if not versioned: self.assertEqual(len(test_rows), 0) else: self.assertTrue(len(test_rows) > 0) # The rows that were in both list_versions output and list_keys output # should be reachable either with versioned=True, but only reachable # with versioned=False if they are not in versioned_only_reachable_set. for key, unified_id in list_keys_set: for versioned in [ False, True, ]: sql_text = version_for_key(_test_collection_id, versioned=versioned, key=key, unified_id=unified_id) args = { "collection_id": _test_collection_id, "key": key, "unified_id": unified_id } cursor = self._connection.cursor() cursor.execute(sql_text, args) test_rows = cursor.fetchall() cursor.close() if (versioned is False and ( key, unified_id, ) in versioned_only_reachable_set): self.assertTrue( len(test_rows) == 0, "versioned={0} {1}".format(versioned, args)) else: self.assertTrue( len(test_rows) > 0, "versioned={0} {1}".format(versioned, args))
def test_version_for_key(self): """ version_for_key """ log = logging.getLogger("test_version_for_key") # check that for every row in list_keys, calling version_for_key with # unified_id=None should return the same row, regardless of it being # versioned or not. for versioned in [True, False]: sql_text = list_keys(_test_collection_id, versioned=versioned, prefix=_test_prefix) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) baseline_rows = cursor.fetchall() cursor.close() for row in baseline_rows: sql_text = version_for_key(_test_collection_id, versioned=versioned, key=row["key"]) args = { "collection_id": _test_collection_id, "key": row["key"] } cursor = self._connection.cursor() if _write_debug_sql: with open("/tmp/debug.sql", "w") as debug_sql_file: debug_sql_file.write(mogrify(sql_text, args)) cursor.execute(sql_text, args) test_rows = cursor.fetchall() cursor.close() # 2012-12-20 dougfort -- list_keys and list_versions only # retrieve one conjoined part, but version_for_key retrieves # all conjoined parts. So we may have more than one row here. self.assertTrue(len(test_rows) > 0) for test_row in test_rows: self.assertEqual(test_row["key"], row["key"], (test_row["key"], row["key"])) self.assertEqual( test_row["unified_id"], row["unified_id"], (test_row["unified_id"], row["unified_id"])) # check that these return empty for versioned in [True, False]: sql_text = version_for_key(_test_collection_id, versioned=versioned, key=_test_key, unified_id=_test_no_such_unified_id) args = { "collection_id": _test_collection_id, "key": row["key"], "unified_id": _test_no_such_unified_id } cursor = self._connection.cursor() cursor.execute(sql_text, args) test_rows = cursor.fetchall() cursor.close() self.assertEqual(len(test_rows), 0, test_rows)
def test_limits_and_markers(self): """ check that the limits and markers work correctly. perhaps take the result with limit=None, and run a series of queries with limit=1 for each of those rows, checking results. """ log = logging.getLogger("test_limits_and_markers") for versioned in [True, False]: sql_text = list_keys(_test_collection_id, versioned=versioned, prefix=_test_prefix) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) baseline_rows = cursor.fetchall() cursor.close() key_marker = None for row in baseline_rows: sql_text = list_keys(_test_collection_id, versioned=versioned, prefix=_test_prefix, key_marker=key_marker, limit=1) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, "key_marker": key_marker, "limit": 1 } cursor = self._connection.cursor() cursor.execute(sql_text, args) test_row = cursor.fetchone() cursor.close() self.assertEqual(test_row["key"], row["key"], (test_row["key"], row["key"])) self.assertEqual(test_row["unified_id"], row["unified_id"], (test_row["unified_id"], row["unified_id"])) key_marker = test_row["key"] for versioned in [True, False]: sql_text = list_versions(_test_collection_id, versioned=versioned, prefix=_test_prefix, limit=None) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, } if _write_debug_sql: with open("/tmp/debug_all.sql", "w") as debug_sql_file: debug_sql_file.write(mogrify(sql_text, args)) cursor = self._connection.cursor() cursor.execute(sql_text, args) baseline_rows = cursor.fetchall() cursor.close() baseline_set = set([( r["key"], r["unified_id"], ) for r in baseline_rows]) key_marker = None version_marker = None for row_idx, row in enumerate(baseline_rows): sql_text = list_versions(_test_collection_id, versioned=versioned, prefix=_test_prefix, key_marker=key_marker, version_marker=version_marker, limit=1) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, "limit": 1 } if key_marker is not None: args["key_marker"] = key_marker if version_marker is not None: args["version_marker"] = version_marker if _write_debug_sql: debug_filename = "/tmp/debug_%s.sql" % (row_idx, ) with open(debug_filename, "w") as debug_sql_file: debug_sql_file.write(mogrify(sql_text, args)) # this result should always be stable. is it? last_time = None for _ in range(5): cursor = self._connection.cursor() cursor.execute(sql_text, args) test_row = cursor.fetchone() cursor.close() if last_time is not None: assert test_row == last_time last_time = test_row # make sure it's in the result somewhere. below we test if it's # in the right order. self.assertEqual((test_row["key"], test_row["unified_id"]) in baseline_set, True) log.info("{0}, {1}".format(test_row["key"], row["key"])) log.debug(sql_text) self.assertEqual( test_row["key"], row["key"], (row_idx, versioned, test_row["key"], row["key"])) self.assertEqual(test_row["unified_id"], row["unified_id"], (row_idx, versioned, test_row["unified_id"], row["unified_id"])) key_marker = test_row["key"] version_marker = test_row["unified_id"]
def test_list(self): """ test listing keys and versions of keys """ log = logging.getLogger("test_list") versioned = False sql_text = list_versions(_test_collection_id, versioned=versioned, prefix=_test_prefix, limit=None) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, } if _write_debug_sql: with open("/tmp/debug_unversioned_rows.sql", "w") as debug_sql_file: debug_sql_file.write(mogrify(sql_text, args)) cursor = self._connection.cursor() cursor.execute(sql_text, args) unversioned_rows = cursor.fetchall() cursor.close() collectable_set = self._retrieve_collectables(versioned) test_set = set([( r["key"], r["unified_id"], ) for r in unversioned_rows]) collectable_intersection = test_set & collectable_set self.assertEqual(len(collectable_intersection), 0, collectable_intersection) # check that there's no more than one row per key for a non-versioned # collection # check that every row begins with prefix unversioned_key_counts = Counter() for row in unversioned_rows: unversioned_key_counts[row["key"]] += 1 self.assertTrue(row["key"].startswith(_test_prefix)) for key, value in unversioned_key_counts.items(): self.assertEqual(value, 1, (key, value)) versioned = True sql_text = list_versions(_test_collection_id, versioned=versioned, prefix=_test_prefix) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, } if _write_debug_sql: with open("/tmp/debug_versioned_rows.sql", "w") as debug_sql_file: debug_sql_file.write(mogrify(sql_text, args)) cursor = self._connection.cursor() cursor.execute(sql_text, args) versioned_rows = cursor.fetchall() cursor.close() collectable_set = self._retrieve_collectables(versioned) test_set = set([( r["key"], r["unified_id"], ) for r in versioned_rows]) collectable_intersection = test_set & collectable_set self.assertEqual(len(collectable_intersection), 0, collectable_intersection) latest_versioned_rows = OrderedDict() for row in versioned_rows[::-1]: latest_versioned_rows.setdefault(row["key"], row) latest_versioned_rows = latest_versioned_rows.values() latest_versioned_rows.reverse() assert len(latest_versioned_rows) <= len(versioned_rows) versioned_key_counts = Counter() for row in versioned_rows: versioned_key_counts[row["key"]] += 1 self.assertTrue(row["key"].startswith(_test_prefix)) # check that there's >= as many rows now as above. for key, value in versioned_key_counts.items(): self.assertTrue(value >= unversioned_key_counts[key], (key, value)) # check that the list keys result is consistent with list_versions in # above (although there could be extra columns.) Note that # list_keys(versioned=True) may have records that # list_versions(versioned=False) does not have, because there are more # ways for a segment to become eligible for garbage collection in an # unversioned collection. for versioned in [ False, True, ]: sql_text = list_keys(_test_collection_id, versioned=versioned, prefix=_test_prefix, limit=None) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) key_rows = cursor.fetchall() cursor.close() if _write_debug_sql: debug_filename = "/tmp/debug_key_rows_versioned_%r.sql" % ( versioned, ) with open(debug_filename, "w") as debug_sql_file: debug_sql_file.write(mogrify(sql_text, args)) collectable_set = self._retrieve_collectables(versioned) test_set = set([( r["key"], r["unified_id"], ) for r in key_rows]) collectable_intersection = test_set & collectable_set self.assertEqual(len(collectable_intersection), 0, collectable_intersection) if versioned: # a list of keys with versioning on may have keys that don't # show up in the list of unversioned rows. That's because in # an unversioned collection, keys end when another key is # added. So it's possible for that plus a tombstone to cause a # situation where an archive is not eligible for garbage # collection in a versioned collection, but it is eligible for # garbage collection in an unversioned collection. self.assertGreaterEqual(len(key_rows), len(unversioned_rows), ( len(key_rows), len(unversioned_rows), versioned, )) else: self.assertEqual(len(key_rows), len(unversioned_rows), ( len(key_rows), len(unversioned_rows), versioned, )) key_counts = Counter() for row in key_rows: key_counts[row["key"]] += 1 self.assertTrue(row["key"].startswith(_test_prefix)) for key, value in key_counts.items(): self.assertEqual(value, 1, (key, value)) if versioned: for key_row, version_row in zip(key_rows, latest_versioned_rows): self.assertEqual(key_row["key"], version_row["key"]) self.assertEqual(key_row["unified_id"], version_row["unified_id"]) else: for key_row, version_row in zip(key_rows, unversioned_rows): self.assertEqual(key_row["key"], version_row["key"]) self.assertEqual(key_row["unified_id"], version_row["unified_id"])
def test_list_keys_vs_list_versions(self): """ check that this can ONLY find the same rows list_versions returns above IF they are also in the result that list_keys returns (i.e. some of them should be findable, some not.) """ # Background: list_keys returns the newest version of every key. # list_versions returns every version of every key. # If a collection is unversioned, output from list_keys and list_versions # should find the same rows # (although the output from list_keys has an extra column.) # In other words, in a versioned collection, any version of a key # that isn't the newest version should be unreachable. # So, I was imagining the test to do this: # 1. get the full output from list_versions(test_colelction_id, versioned=True) and save it. # 2. get the full output from list_keys(test_collection_id, versioned=True) and save it # 3. compare the results to determine which keys are older versions # 4. For each row, call version_for_key with specific unified_id and versioned arguments # and verify finding (or correctly not finding) the result. # # Sothe rows that are in the output of list_versions but are NOT in # the output of list_keys should be rows that are older versions. # (You may have to discard that extra column from list_keys before # comparing results.) That's probably worth an assert or two to verify # that assumption once you have the lists. # Then, if we call version_for_key on those rows that are only in # list_versions with versioned=False and specify their unified_id when # calling version_for_key, they should not be reachable. # With versioned=True they should be reachable. # The rows that were in both list_versions output and list_keys output # should be reachable either with versioned=True or versioned=False. # 1. get the full output from list_versions(test_colelction_id, versioned=True) and save it. sql_text = list_versions(_test_collection_id, versioned=True, prefix=_test_prefix) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) list_versions_rows = cursor.fetchall() cursor.close() list_versions_set = set([(r["key"], r["unified_id"], ) \ for r in list_versions_rows]) # 2. get the full output from list_keys(test_collection_id, versioned=True) and save it sql_text = list_keys(_test_collection_id, versioned=True, prefix=_test_prefix) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) list_keys_rows = cursor.fetchall() cursor.close() list_keys_set = set([(r["key"], r["unified_id"], ) \ for r in list_keys_rows]) # find keys that are only reachable by list_keys when versioned=True # we need this below. sql_text = list_keys(_test_collection_id, versioned=False, prefix=_test_prefix) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) unversioned_list_keys_rows = cursor.fetchall() cursor.close() unversioned_list_keys_set = set([(r["key"], r["unified_id"], ) \ for r in unversioned_list_keys_rows]) versioned_only_reachable_set = \ list_keys_set - unversioned_list_keys_set # 3. compare the results to determine which keys are older versions older_version_set = list_versions_set - list_keys_set # Sothe rows that are in the output of list_versions but are NOT in # the output of list_keys should be rows that are older versions. # (You may have to discard that extra column from list_keys before # comparing results.) That's probably worth an assert or two to verify # that assumption once you have the lists. for list_versions_row in list_versions_rows: test_tuple = (list_versions_row["key"], list_versions_row["unified_id"], ) self.assertIn(test_tuple, list_versions_set) if test_tuple in list_keys_set: self.assertNotIn(test_tuple, older_version_set) else: self.assertIn(test_tuple, older_version_set) # 4. For each row, call version_for_key with specific unified_id and versioned arguments # and verify finding (or correctly not finding) the result. # Then, if we call version_for_key on those rows that are only in # list_versions with versioned=False and specify their unified_id when # calling version_for_key, they should not be reachable. # With versioned=True they should be reachable. for key, unified_id in older_version_set: for versioned in [False, True, ]: sql_text = version_for_key(_test_collection_id, versioned=versioned, key=key, unified_id=unified_id) args = {"collection_id" : _test_collection_id, "key" : key, "unified_id" : unified_id} cursor = self._connection.cursor() cursor.execute(sql_text, args) test_rows = cursor.fetchall() cursor.close() if not versioned: self.assertEqual(len(test_rows), 0) else: self.assertTrue(len(test_rows) > 0) # The rows that were in both list_versions output and list_keys output # should be reachable either with versioned=True, but only reachable # with versioned=False if they are not in versioned_only_reachable_set. for key, unified_id in list_keys_set: for versioned in [False, True, ]: sql_text = version_for_key(_test_collection_id, versioned=versioned, key=key, unified_id=unified_id) args = {"collection_id" : _test_collection_id, "key" : key, "unified_id" : unified_id} cursor = self._connection.cursor() cursor.execute(sql_text, args) test_rows = cursor.fetchall() cursor.close() if (versioned is False and (key, unified_id, ) in versioned_only_reachable_set ): self.assertTrue(len(test_rows) == 0, "versioned={0} {1}".format(versioned, args)) else: self.assertTrue(len(test_rows) > 0, "versioned={0} {1}".format(versioned, args))
def test_version_for_key(self): """ version_for_key """ log = logging.getLogger("test_version_for_key") # check that for every row in list_keys, calling version_for_key with # unified_id=None should return the same row, regardless of it being # versioned or not. for versioned in [True, False]: sql_text = list_keys(_test_collection_id, versioned=versioned, prefix=_test_prefix) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) baseline_rows = cursor.fetchall() cursor.close() for row in baseline_rows: sql_text = version_for_key(_test_collection_id, versioned=versioned, key=row["key"]) args = {"collection_id" : _test_collection_id, "key" : row["key"]} cursor = self._connection.cursor() if _write_debug_sql: with open("/tmp/debug.sql", "w") as debug_sql_file: debug_sql_file.write(mogrify(sql_text, args)) cursor.execute(sql_text, args) test_rows = cursor.fetchall() cursor.close() # 2012-12-20 dougfort -- list_keys and list_versions only # retrieve one conjoined part, but version_for_key retrieves # all conjoined parts. So we may have more than one row here. self.assertTrue(len(test_rows) > 0) for test_row in test_rows: self.assertEqual(test_row["key"], row["key"], (test_row["key"], row["key"])) self.assertEqual(test_row["unified_id"], row["unified_id"], (test_row["unified_id"], row["unified_id"])) # check that these return empty for versioned in [True, False]: sql_text = version_for_key(_test_collection_id, versioned=versioned, key=_test_key, unified_id=_test_no_such_unified_id) args = {"collection_id" : _test_collection_id, "key" : row["key"], "unified_id" : _test_no_such_unified_id} cursor = self._connection.cursor() cursor.execute(sql_text, args) test_rows = cursor.fetchall() cursor.close() self.assertEqual(len(test_rows), 0, test_rows)
def test_limits_and_markers(self): """ check that the limits and markers work correctly. perhaps take the result with limit=None, and run a series of queries with limit=1 for each of those rows, checking results. """ log = logging.getLogger("test_limits_and_markers") for versioned in [True, False]: sql_text = list_keys(_test_collection_id, versioned=versioned, prefix=_test_prefix) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) baseline_rows = cursor.fetchall() cursor.close() key_marker = None for row in baseline_rows: sql_text = list_keys(_test_collection_id, versioned=versioned, prefix=_test_prefix, key_marker=key_marker, limit=1) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, "key_marker" : key_marker, "limit" : 1} cursor = self._connection.cursor() cursor.execute(sql_text, args) test_row = cursor.fetchone() cursor.close() self.assertEqual(test_row["key"], row["key"], (test_row["key"], row["key"])) self.assertEqual(test_row["unified_id"], row["unified_id"], (test_row["unified_id"], row["unified_id"])) key_marker = test_row["key"] for versioned in [True, False]: sql_text = list_versions(_test_collection_id, versioned=versioned, prefix=_test_prefix, limit=None) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, } if _write_debug_sql: with open("/tmp/debug_all.sql", "w") as debug_sql_file: debug_sql_file.write(mogrify(sql_text, args)) cursor = self._connection.cursor() cursor.execute(sql_text, args) baseline_rows = cursor.fetchall() cursor.close() baseline_set = set([(r["key"], r["unified_id"], ) for r in baseline_rows]) key_marker = None version_marker = None for row_idx, row in enumerate(baseline_rows): sql_text = list_versions(_test_collection_id, versioned=versioned, prefix=_test_prefix, key_marker=key_marker, version_marker=version_marker, limit=1) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, "limit" : 1} if key_marker is not None: args["key_marker"] = key_marker if version_marker is not None: args["version_marker"] = version_marker if _write_debug_sql: debug_filename = "/tmp/debug_%s.sql" % (row_idx, ) with open(debug_filename, "w") as debug_sql_file: debug_sql_file.write(mogrify(sql_text, args)) # this result should always be stable. is it? last_time = None for _ in range(5): cursor = self._connection.cursor() cursor.execute(sql_text, args) test_row = cursor.fetchone() cursor.close() if last_time is not None: assert test_row == last_time last_time = test_row # make sure it's in the result somewhere. below we test if it's # in the right order. self.assertEqual( (test_row["key"], test_row["unified_id"]) in baseline_set, True) log.info("{0}, {1}".format(test_row["key"], row["key"])) log.debug(sql_text) self.assertEqual(test_row["key"], row["key"], (row_idx, versioned, test_row["key"], row["key"])) self.assertEqual(test_row["unified_id"], row["unified_id"], (row_idx, versioned, test_row["unified_id"], row["unified_id"])) key_marker = test_row["key"] version_marker = test_row["unified_id"]
def test_list(self): """ test listing keys and versions of keys """ log = logging.getLogger("test_list") versioned = False sql_text = list_versions(_test_collection_id, versioned=versioned, prefix=_test_prefix, limit=None) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, } if _write_debug_sql: with open("/tmp/debug_unversioned_rows.sql", "w") as debug_sql_file: debug_sql_file.write(mogrify(sql_text, args)) cursor = self._connection.cursor() cursor.execute(sql_text, args) unversioned_rows = cursor.fetchall() cursor.close() collectable_set = self._retrieve_collectables(versioned) test_set = set([(r["key"], r["unified_id"], ) for r in unversioned_rows]) collectable_intersection = test_set & collectable_set self.assertEqual(len(collectable_intersection), 0, collectable_intersection) # check that there's no more than one row per key for a non-versioned # collection # check that every row begins with prefix unversioned_key_counts = Counter() for row in unversioned_rows: unversioned_key_counts[row["key"]] += 1 self.assertTrue(row["key"].startswith(_test_prefix)) for key, value in unversioned_key_counts.items(): self.assertEqual(value, 1, (key, value)) versioned = True sql_text = list_versions(_test_collection_id, versioned=versioned, prefix=_test_prefix) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, } if _write_debug_sql: with open("/tmp/debug_versioned_rows.sql", "w") as debug_sql_file: debug_sql_file.write(mogrify(sql_text, args)) cursor = self._connection.cursor() cursor.execute(sql_text, args) versioned_rows = cursor.fetchall() cursor.close() collectable_set = self._retrieve_collectables(versioned) test_set = set([(r["key"], r["unified_id"], ) for r in versioned_rows]) collectable_intersection = test_set & collectable_set self.assertEqual(len(collectable_intersection), 0, collectable_intersection) latest_versioned_rows = OrderedDict() for row in versioned_rows[::-1]: latest_versioned_rows.setdefault(row["key"], row) latest_versioned_rows = latest_versioned_rows.values() latest_versioned_rows.reverse() assert len(latest_versioned_rows) <= len(versioned_rows) versioned_key_counts = Counter() for row in versioned_rows: versioned_key_counts[row["key"]] += 1 self.assertTrue(row["key"].startswith(_test_prefix)) # check that there's >= as many rows now as above. for key, value in versioned_key_counts.items(): self.assertTrue(value >= unversioned_key_counts[key], (key, value)) # check that the list keys result is consistent with list_versions in # above (although there could be extra columns.) Note that # list_keys(versioned=True) may have records that # list_versions(versioned=False) does not have, because there are more # ways for a segment to become eligible for garbage collection in an # unversioned collection. for versioned in [False, True, ]: sql_text = list_keys(_test_collection_id, versioned=versioned, prefix=_test_prefix, limit=None) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) key_rows = cursor.fetchall() cursor.close() if _write_debug_sql: debug_filename = "/tmp/debug_key_rows_versioned_%r.sql" % ( versioned, ) with open(debug_filename, "w") as debug_sql_file: debug_sql_file.write(mogrify(sql_text, args)) collectable_set = self._retrieve_collectables(versioned) test_set = set([(r["key"], r["unified_id"], ) for r in key_rows]) collectable_intersection = test_set & collectable_set self.assertEqual(len(collectable_intersection), 0, collectable_intersection) if versioned: # a list of keys with versioning on may have keys that don't # show up in the list of unversioned rows. That's because in # an unversioned collection, keys end when another key is # added. So it's possible for that plus a tombstone to cause a # situation where an archive is not eligible for garbage # collection in a versioned collection, but it is eligible for # garbage collection in an unversioned collection. self.assertGreaterEqual(len(key_rows), len(unversioned_rows), (len(key_rows), len(unversioned_rows), versioned, )) else: self.assertEqual(len(key_rows), len(unversioned_rows), (len(key_rows), len(unversioned_rows), versioned, )) key_counts = Counter() for row in key_rows: key_counts[row["key"]] += 1 self.assertTrue(row["key"].startswith(_test_prefix)) for key, value in key_counts.items(): self.assertEqual(value, 1, (key, value)) if versioned: for key_row, version_row in zip(key_rows, latest_versioned_rows): self.assertEqual(key_row["key"], version_row["key"]) self.assertEqual(key_row["unified_id"], version_row["unified_id"]) else: for key_row, version_row in zip(key_rows, unversioned_rows): self.assertEqual(key_row["key"], version_row["key"]) self.assertEqual(key_row["unified_id"], version_row["unified_id"])