def test_paginated_fetches_next_page(self): records = self.sample_records records.reverse() def get_all_mock(*args, **kwargs): this_records = records[:3] del records[:3] return this_records, len(this_records) self.storage.get_all.side_effect = get_all_mock list(paginated(self.storage, sorting=[Sort('id', -1)])) assert self.storage.get_all.call_args_list == [ mock.call(sorting=[Sort('id', -1)], limit=25, pagination_rules=None), mock.call( sorting=[Sort('id', -1)], limit=25, pagination_rules=[[Filter('id', 'record-03', COMPARISON.LT)]]), mock.call( sorting=[Sort('id', -1)], limit=25, pagination_rules=[[Filter('id', 'record-01', COMPARISON.LT)]]), ]
def test_paginated_fetches_next_page(self): objects = self.sample_objects objects.reverse() def list_all_mock(*args, **kwargs): this_objects = objects[:3] del objects[:3] return this_objects self.storage.list_all.side_effect = list_all_mock list(paginated(self.storage, sorting=[Sort("id", -1)])) assert self.storage.list_all.call_args_list == [ mock.call(sorting=[Sort("id", -1)], limit=25, pagination_rules=None), mock.call( sorting=[Sort("id", -1)], limit=25, pagination_rules=[[Filter("id", "object-03", COMPARISON.LT)]], ), mock.call( sorting=[Sort("id", -1)], limit=25, pagination_rules=[[Filter("id", "object-01", COMPARISON.LT)]], ), ]
def _extract_sorting(self, limit): """Extracts filters from QueryString parameters.""" specified = self.request.validated['querystring'].get('_sort', []) sorting = [] modified_field_used = self.model.modified_field in specified for field in specified: field = field.strip() m = re.match(r'^([\-+]?)([\w\.]+)$', field) if m: order, field = m.groups() if not self.is_known_field(field): error_details = { 'location': 'querystring', 'description': "Unknown sort field '{}'".format(field) } raise_invalid(self.request, **error_details) direction = -1 if order == '-' else 1 sorting.append(Sort(field, direction)) if not modified_field_used: # Add a sort by the ``modified_field`` in descending order # useful for pagination sorting.append(Sort(self.model.modified_field, -1)) return sorting
def test_get_all_handle_sorting_on_id(self): for x in range(3): self.create_record() sorting = [Sort('id', 1)] records, _ = self.storage.get_all(sorting=sorting, **self.storage_kw) self.assertTrue(records[0]['id'] < records[-1]['id'])
def _get_records(self, rc, last_modified=None): # If last_modified was specified, only retrieve items since then. storage_kwargs = {} if last_modified is not None: gt_last_modified = Filter(FIELD_LAST_MODIFIED, last_modified, COMPARISON.GT) storage_kwargs['filters'] = [ gt_last_modified, ] storage_kwargs['sorting'] = [Sort(FIELD_LAST_MODIFIED, 1)] parent_id = "/buckets/{bucket}/collections/{collection}".format(**rc) records, count = self.storage.get_all(parent_id=parent_id, collection_id='record', include_deleted=True, **storage_kwargs) if len(records) == count == 0: # When the collection empty (no records and no tombstones) collection_timestamp = None else: collection_timestamp = self.storage.collection_timestamp( parent_id=parent_id, collection_id='record') return records, collection_timestamp
def test_delete_all_supports_sorting(self): for i in range(5): self.create_record({'foo': i}) sorting = [Sort('foo', -1)] self.storage.delete_all(limit=2, sorting=sorting, **self.storage_kw) records, count = self.storage.get_all(sorting=sorting, **self.storage_kw) self.assertEqual(count, 3) self.assertEqual(records[0]['foo'], 2)
def test_get_all_can_filter_with_strings(self): for l in ["Rémy", "Alexis", "Marie"]: self.create_record({'name': l}) sorting = [Sort('name', 1)] filters = [Filter('name', "Mathieu", utils.COMPARISON.LT)] records, _ = self.storage.get_all(sorting=sorting, filters=filters, **self.storage_kw) self.assertEqual(records[0]['name'], "Alexis") self.assertEqual(records[1]['name'], "Marie") self.assertEqual(len(records), 2)
def test_get_all_handle_sorting_on_subobject(self): for x in range(10): record = dict(**self.record) record["person"] = dict(age=x) self.create_record(record) sorting = [Sort('person.age', 1)] records, _ = self.storage.get_all(sorting=sorting, **self.storage_kw) self.assertLess(records[0]['person']['age'], records[-1]['person']['age'])
def _extract_sorting(self, limit): # Permissions entries are not stored with timestamp, so do not # force it. result = super()._extract_sorting(limit) without_last_modified = [s for s in result if s.field != self.model.modified_field] # For pagination, there must be at least one sort criteria. # We use ``uri`` because its values are unique. if "uri" not in [s.field for s in without_last_modified]: without_last_modified.append(Sort("uri", -1)) return without_last_modified
def get_records(request, prefix, collection): resources = request.registry.amo_resources parent_id = PARENT_PATTERN.format(**resources[prefix][collection]) cid = "record" records, count = request.registry.storage.get_all( collection_id=cid, parent_id=parent_id, filters=[Filter('enabled', True, utils.COMPARISON.EQ)], sorting=[Sort('last_modified', 1)]) last_modified = records[-1]['last_modified'] if count > 1 else 0 return records, last_modified
def test_get_source_records_asks_storage_for_records(self): records = [] count = mock.sentinel.count self.storage.get_all.return_value = (records, count) self.updater.get_source_records(None) self.storage.get_all.assert_called_with( collection_id='record', parent_id='/buckets/sourcebucket/collections/sourcecollection', include_deleted=True, sorting=[Sort('last_modified', 1)])
def test_sorting_on_numeric_arbitrary_field(self): filters = self._get_last_modified_filters() for l in [1, 10, 6, 46]: self.create_record({'status': l}) sorting = [Sort('status', -1)] records, _ = self.storage.get_all(sorting=sorting, filters=filters, include_deleted=True, **self.storage_kw) self.assertEqual(records[0]['status'], 46) self.assertEqual(records[1]['status'], 10) self.assertEqual(records[2]['status'], 6) self.assertEqual(records[3]['status'], 1)
def test_sorting_on_arbitrary_field_groups_deleted_at_last(self): filters = self._get_last_modified_filters() self.create_record({'status': 0}) self.create_and_delete_record({'status': 1}) self.create_and_delete_record({'status': 2}) sorting = [Sort('status', 1)] records, _ = self.storage.get_all(sorting=sorting, filters=filters, include_deleted=True, **self.storage_kw) self.assertNotIn('deleted', records[0]) self.assertIn('deleted', records[1]) self.assertIn('deleted', records[2])
def test_support_sorting_on_deleted_field_groups_deleted_at_first(self): filters = self._get_last_modified_filters() # Respect boolean sort order self.create_and_delete_record() self.create_record() self.create_and_delete_record() sorting = [Sort('deleted', 1)] records, _ = self.storage.get_all(sorting=sorting, filters=filters, include_deleted=True, **self.storage_kw) self.assertIn('deleted', records[0]) self.assertIn('deleted', records[1]) self.assertNotIn('deleted', records[2])
def test_get_destination_records(self): # We want to test get_destination_records with some records. records = [{'id': idx, 'foo': 'bar %s' % idx} for idx in range(1, 4)] count = mock.sentinel.count self.storage.get_all.return_value = (records, count) self.updater.get_destination_records() self.storage.collection_timestamp.assert_called_with( collection_id='record', parent_id='/buckets/destbucket/collections/destcollection') self.storage.get_all.assert_called_with( collection_id='record', parent_id='/buckets/destbucket/collections/destcollection', include_deleted=True, sorting=[Sort('last_modified', 1)])
def test_sorting_on_last_modified_mixes_deleted_records(self): filters = self._get_last_modified_filters() self.create_and_delete_record() self.create_record() self.create_and_delete_record() sorting = [Sort('last_modified', 1)] records, _ = self.storage.get_all(sorting=sorting, filters=filters, include_deleted=True, **self.storage_kw) self.assertIn('deleted', records[0]) self.assertNotIn('deleted', records[1]) self.assertIn('deleted', records[2])
def test_sorting_on_last_modified_applies_to_deleted_items(self): filters = self._get_last_modified_filters() first = last = None for i in range(20, 0, -1): record = self.create_and_delete_record() first = record if i == 1 else first last = record if i == 20 else last sorting = [Sort('last_modified', -1)] records, _ = self.storage.get_all(sorting=sorting, filters=filters, include_deleted=True, **self.storage_kw) self.assertDictEqual(records[0], first) self.assertDictEqual(records[-1], last)
def test_pagination_rules_on_last_modified_apply_to_deleted_records(self): filters = self._get_last_modified_filters() for i in range(15): if i % 2 == 0: self.create_and_delete_record() else: self.create_record() pagination = [[Filter('last_modified', 314, utils.COMPARISON.GT)]] sorting = [Sort('last_modified', 1)] records, count = self.storage.get_all(sorting=sorting, pagination_rules=pagination, limit=5, filters=filters, include_deleted=True, **self.storage_kw) self.assertEqual(len(records), 5) self.assertEqual(count, 7) self.assertIn('deleted', records[0]) self.assertNotIn('deleted', records[1])
def test_get_all_can_filter_with_numeric_values(self): self.create_record({'missing': 'code'}) for l in [1, 10, 6, 46]: self.create_record({'code': l}) sorting = [Sort('code', 1)] filters = [Filter('code', 10, utils.COMPARISON.MAX)] records, _ = self.storage.get_all(sorting=sorting, filters=filters, **self.storage_kw) self.assertEqual(records[0]['code'], 1) self.assertEqual(records[1]['code'], 6) self.assertEqual(records[2]['code'], 10) self.assertEqual(len(records), 3) filters = [Filter('code', 10, utils.COMPARISON.LT)] records, _ = self.storage.get_all(sorting=sorting, filters=filters, **self.storage_kw) self.assertEqual(records[0]['code'], 1) self.assertEqual(records[1]['code'], 6) self.assertEqual(len(records), 2)
def get_paginated_records(storage, bucket_id, collection_id, limit=5000): # We can reach the storage_fetch_limit, so we use pagination. parent_id = "/buckets/%s/collections/%s" % (bucket_id, collection_id) sorting = [Sort('last_modified', -1)] pagination_rules = [] while "not gone through all pages": records, _ = storage.get_all(parent_id=parent_id, collection_id="record", pagination_rules=pagination_rules, sorting=sorting, limit=limit) yield records if len(records) < limit: break # Done. smallest_timestamp = records[-1]["last_modified"] pagination_rules = [[ Filter("last_modified", smallest_timestamp, COMPARISON.LT) ]]
""" kinto.plugins.quotas.scripts: scripts to maintain quotas and fix them when they're broken """ import logging from kinto.core.storage import Sort from kinto.core.storage.utils import paginated from .listener import BUCKET_QUOTA_OBJECT_ID, COLLECTION_QUOTA_OBJECT_ID from .utils import record_size logger = logging.getLogger(__name__) OLDEST_FIRST = Sort('last_modified', 1) def rebuild_quotas(storage, dry_run=False): for bucket in paginated(storage, collection_id='bucket', parent_id='', sorting=[OLDEST_FIRST]): bucket_id = bucket['id'] bucket_path = '/buckets/{}'.format(bucket['id']) bucket_collection_count = 0 bucket_record_count = 0 bucket_storage_size = record_size(bucket) for collection in paginated(storage, collection_id='collection', parent_id=bucket_path, sorting=[OLDEST_FIRST]): collection_info = rebuild_quotas_collection(storage, bucket_id, collection, dry_run) (collection_record_count, collection_storage_size) = collection_info bucket_collection_count += 1 bucket_record_count += collection_record_count bucket_storage_size += collection_storage_size
""" kinto.plugins.quotas.scripts: scripts to maintain quotas and fix them when they're broken """ import logging from kinto.core.storage import Sort from kinto.core.storage.utils import paginated from .listener import BUCKET_QUOTA_OBJECT_ID, COLLECTION_QUOTA_OBJECT_ID from .utils import record_size logger = logging.getLogger(__name__) OLDEST_FIRST = Sort("last_modified", 1) def rebuild_quotas(storage, dry_run=False): for bucket in paginated(storage, collection_id="bucket", parent_id="", sorting=[OLDEST_FIRST]): bucket_id = bucket["id"] bucket_path = "/buckets/{}".format(bucket["id"]) bucket_collection_count = 0 bucket_record_count = 0 bucket_storage_size = record_size(bucket) for collection in paginated(storage, collection_id="collection", parent_id=bucket_path, sorting=[OLDEST_FIRST]): collection_info = rebuild_quotas_collection(
def test_paginated_passes_batch_size(self): i = paginated(self.storage, sorting=[Sort('id', -1)], batch_size=17) next(i) # make the generator do anything self.storage.get_all.assert_called_with(sorting=[Sort('id', -1)], limit=17, pagination_rules=None)
def test_paginated_yields_records(self): iter = paginated(self.storage, sorting=[Sort('id', -1)]) assert next(iter) == {"id": "record-01", "flavor": "strawberry"}
class QuotasScriptsTest(unittest.TestCase): OLDEST_FIRST = Sort("last_modified", 1) BATCH_SIZE = 25 def setUp(self): self.storage = mock.Mock() def test_rebuild_quotas_updates_records(self): paginated_data = [ # get buckets iter([{"id": "bucket-1", "last_modified": 10}]), # get collections for first bucket iter( [ {"id": "collection-1", "last_modified": 100}, {"id": "collection-2", "last_modified": 200}, ] ), # get records for first collection iter([{"id": "record-1", "last_modified": 110}]), # get records for second collection iter([{"id": "record-1b", "last_modified": 210}]), ] def paginated_mock(*args, **kwargs): return paginated_data.pop(0) with mock.patch("kinto.plugins.quotas.scripts.logger") as mocked_logger: with mock.patch( "kinto.plugins.quotas.scripts.paginated", side_effect=paginated_mock ) as mocked_paginated: scripts.rebuild_quotas(self.storage) mocked_paginated.assert_any_call( self.storage, resource_name="bucket", parent_id="", sorting=[self.OLDEST_FIRST] ) mocked_paginated.assert_any_call( self.storage, resource_name="collection", parent_id="/buckets/bucket-1", sorting=[self.OLDEST_FIRST], ) mocked_paginated.assert_any_call( self.storage, resource_name="record", parent_id="/buckets/bucket-1/collections/collection-1", sorting=[self.OLDEST_FIRST], ) mocked_paginated.assert_any_call( self.storage, resource_name="record", parent_id="/buckets/bucket-1/collections/collection-2", sorting=[self.OLDEST_FIRST], ) self.storage.update.assert_any_call( resource_name="quota", parent_id="/buckets/bucket-1", object_id="bucket_info", obj={"record_count": 2, "storage_size": 193, "collection_count": 2}, ) self.storage.update.assert_any_call( resource_name="quota", parent_id="/buckets/bucket-1/collections/collection-1", object_id="collection_info", obj={"record_count": 1, "storage_size": 78}, ) self.storage.update.assert_any_call( resource_name="quota", parent_id="/buckets/bucket-1/collections/collection-2", object_id="collection_info", obj={"record_count": 1, "storage_size": 79}, ) mocked_logger.info.assert_any_call( "Bucket bucket-1, collection collection-1. " "Final size: 1 records, 78 bytes." ) mocked_logger.info.assert_any_call( "Bucket bucket-1, collection collection-2. " "Final size: 1 records, 79 bytes." ) mocked_logger.info.assert_any_call( "Bucket bucket-1. Final size: " "2 collections, 2 records, 193 bytes." ) def test_rebuild_quotas_doesnt_update_if_dry_run(self): paginated_data = [ # get buckets iter([{"id": "bucket-1", "last_modified": 10}]), # get collections for first bucket iter([{"id": "collection-1", "last_modified": 100}]), # get records for first collection iter([{"id": "record-1", "last_modified": 110}]), ] def paginated_mock(*args, **kwargs): return paginated_data.pop(0) with mock.patch("kinto.plugins.quotas.scripts.logger") as mocked: with mock.patch("kinto.plugins.quotas.scripts.paginated", side_effect=paginated_mock): scripts.rebuild_quotas(self.storage, dry_run=True) assert not self.storage.update.called mocked.info.assert_any_call( "Bucket bucket-1, collection collection-1. " "Final size: 1 records, 78 bytes." ) mocked.info.assert_any_call( "Bucket bucket-1. Final size: 1 collections, " "1 records, 114 bytes." )
def get_changeset(request): bid = request.matchdict["bid"] cid = request.matchdict["cid"] storage = request.registry.storage queryparams = request.validated["querystring"] limit = queryparams.get("_limit") filters = [] include_deleted = False if "_since" in queryparams: filters = [ Filter("last_modified", queryparams["_since"], COMPARISON.GT) ] # Include tombstones when querying with _since include_deleted = True if (bid, cid) == (MONITOR_BUCKET, CHANGES_COLLECTION): # Redirect old since, on monitor/changes only. _handle_old_since_redirect(request) if "bucket" in queryparams: filters.append( Filter("bucket", queryparams["bucket"], COMPARISON.EQ)) if "collection" in queryparams: filters.append( Filter("collection", queryparams["collection"], COMPARISON.EQ)) model = ChangesModel(request) metadata = {} timestamp = model.timestamp() changes = model.get_objects(filters=filters, limit=limit, include_deleted=include_deleted) else: bucket_uri = instance_uri(request, "bucket", id=bid) collection_uri = instance_uri(request, "collection", bucket_id=bid, id=cid) try: # We'll make sure that data isn't changed while we read metadata, changes, # etc. before = storage.resource_timestamp(resource_name="record", parent_id=collection_uri) # Fetch collection metadata. metadata = storage.get(resource_name="collection", parent_id=bucket_uri, object_id=cid) except storage_exceptions.ObjectNotFoundError: raise httpexceptions.HTTPNotFound() except storage_exceptions.BackendError as e: # The call to `resource_timestamp()` on an empty collection will try # initialize it. If the instance is read-only, it fails with a backend # error. Raise 404 in this case otherwise raise the original backend error. if "when running in readonly" in str(e): raise httpexceptions.HTTPNotFound() raise # Fetch list of changes. changes = storage.list_all( resource_name="record", parent_id=collection_uri, filters=filters, limit=limit, id_field="id", modified_field="last_modified", deleted_field="deleted", sorting=[Sort("last_modified", -1)], include_deleted=include_deleted, ) # Fetch current collection timestamp. timestamp = storage.resource_timestamp(resource_name="record", parent_id=collection_uri) # Do not serve inconsistent data. if before != timestamp: # pragma: no cover raise storage_exceptions.IntegrityError( message="Inconsistent data. Retry.") # Cache control. _handle_cache_expires(request, bid, cid) # Set Last-Modified response header (Pyramid takes care of converting). request.response.last_modified = timestamp / 1000.0 data = { "metadata": metadata, "timestamp": timestamp, "changes": changes, } return data
class QuotasScriptsTest(unittest.TestCase): OLDEST_FIRST = Sort('last_modified', 1) BATCH_SIZE = 25 def setUp(self): self.storage = mock.Mock() def test_rebuild_quotas_updates_records(self): paginated_data = [ # get buckets iter([{ "id": "bucket-1", "last_modified": 10 }]), # get collections for first bucket iter([{ "id": "collection-1", "last_modified": 100 }, { "id": "collection-2", "last_modified": 200 }]), # get records for first collection iter([{ "id": "record-1", "last_modified": 110 }]), # get records for second collection iter([{ "id": "record-1b", "last_modified": 210 }]), ] def paginated_mock(*args, **kwargs): return paginated_data.pop(0) with mock.patch( 'kinto.plugins.quotas.scripts.logger') as mocked_logger: with mock.patch('kinto.plugins.quotas.scripts.paginated', side_effect=paginated_mock) as mocked_paginated: scripts.rebuild_quotas(self.storage) mocked_paginated.assert_any_call( self.storage, collection_id='bucket', parent_id='', sorting=[self.OLDEST_FIRST], ) mocked_paginated.assert_any_call( self.storage, collection_id='collection', parent_id='/buckets/bucket-1', sorting=[self.OLDEST_FIRST], ) mocked_paginated.assert_any_call( self.storage, collection_id='record', parent_id='/buckets/bucket-1/collections/collection-1', sorting=[self.OLDEST_FIRST], ) mocked_paginated.assert_any_call( self.storage, collection_id='record', parent_id='/buckets/bucket-1/collections/collection-2', sorting=[self.OLDEST_FIRST], ) self.storage.update.assert_any_call(collection_id='quota', parent_id='/buckets/bucket-1', object_id='bucket_info', record={ 'record_count': 2, 'storage_size': 193, 'collection_count': 2 }) self.storage.update.assert_any_call( collection_id='quota', parent_id='/buckets/bucket-1/collections/collection-1', object_id='collection_info', record={ 'record_count': 1, 'storage_size': 78 }) self.storage.update.assert_any_call( collection_id='quota', parent_id='/buckets/bucket-1/collections/collection-2', object_id='collection_info', record={ 'record_count': 1, 'storage_size': 79 }) mocked_logger.info.assert_any_call( 'Bucket bucket-1, collection collection-1. ' 'Final size: 1 records, 78 bytes.') mocked_logger.info.assert_any_call( 'Bucket bucket-1, collection collection-2. ' 'Final size: 1 records, 79 bytes.') mocked_logger.info.assert_any_call( 'Bucket bucket-1. Final size: ' '2 collections, 2 records, 193 bytes.') def test_rebuild_quotas_doesnt_update_if_dry_run(self): paginated_data = [ # get buckets iter([{ "id": "bucket-1", "last_modified": 10 }]), # get collections for first bucket iter([{ "id": "collection-1", "last_modified": 100 }]), # get records for first collection iter([{ "id": "record-1", "last_modified": 110 }]), ] def paginated_mock(*args, **kwargs): return paginated_data.pop(0) with mock.patch('kinto.plugins.quotas.scripts.logger') as mocked: with mock.patch('kinto.plugins.quotas.scripts.paginated', side_effect=paginated_mock): scripts.rebuild_quotas(self.storage, dry_run=True) assert not self.storage.update.called mocked.info.assert_any_call( 'Bucket bucket-1, collection collection-1. ' 'Final size: 1 records, 78 bytes.') mocked.info.assert_any_call( 'Bucket bucket-1. Final size: 1 collections, ' '1 records, 114 bytes.')
def test_paginated_yields_objects(self): iter = paginated(self.storage, sorting=[Sort("id", -1)]) assert next(iter) == {"id": "object-01", "flavor": "strawberry"}
def test_paginated_passes_sort(self): i = paginated(self.storage, sorting=[Sort("id", -1)]) next(i) # make the generator do anything self.storage.list_all.assert_called_with(sorting=[Sort("id", -1)], limit=25, pagination_rules=None)