def _register_object( self, object_id: str, namespace: str, insertion_hash: str, deletion_hash: str, table_schema: TableSchema, rows_inserted: int, rows_deleted: int, changeset: Optional[Changeset] = None, extra_indexes: Optional[ExtraIndexInfo] = None, ) -> None: """ Registers a Splitgraph object in the object tree and indexes it :param object_id: Object ID :param namespace: Namespace that owns the object. In registry mode, only namespace owners can alter or delete objects. :param insertion_hash: Homomorphic hash of all rows inserted by this fragment :param deletion_hash: Homomorphic hash of the old values of all rows deleted by this fragment :param table_schema: List of (ordinal, name, type, is_pk) with the schema of the table that this object belongs to. :param changeset: For patches, changeset that produced this object. Must be a dictionary of {PK: (True for upserted/False for deleted, old row (if updated or deleted))}. The old values are used to generate the min/max index for an object to know if it removes/updates some rows that might be pertinent to a query. :param extra_indexes: Dictionary of {index_type: column: index_specific_kwargs}. """ object_size = self.object_engine.get_object_size(object_id) object_index = self.generate_object_index(object_id, table_schema, changeset, extra_indexes) self.register_objects( [ Object( object_id=object_id, format="FRAG", namespace=namespace, size=object_size, created=datetime.utcnow(), insertion_hash=insertion_hash, deletion_hash=deletion_hash, object_index=object_index, rows_inserted=rows_inserted, rows_deleted=rows_deleted, ) ] )
def test_metadata_constraints_table_objects(local_engine_empty): R = Repository("some", "repo") R.images.add(parent_id="0" * 64, image="cafecafe" * 8) R.objects.register_objects([ Object( object_id="o" + "a" * 62, format="FRAG", namespace="", size=42, created=datetime.utcnow(), insertion_hash="0" * 64, deletion_hash="0" * 64, object_index={}, rows_inserted=10, rows_deleted=2, ) ]) R.commit_engines() with pytest.raises(CheckViolation) as e: R.objects.register_tables(R, [("cafecafe" * 8, "table", [ (1, "key", "integer", True) ], ["object_doesnt_exist"])]) assert "Some objects in the object_ids array aren''t registered!" in str( e) with pytest.raises(CheckViolation) as e: R.objects.register_tables( R, [( "cafecafe" * 8, "table", [(1, "key", "integer", True)], [ "o" + "a" * 62, "previous_object_existed_but_this_one_doesnt" ], )], ) assert "Some objects in the object_ids array aren''t registered!" in str( e)
def test_metadata_constraints_object_ids_hashes(local_engine_empty): R = Repository("some", "repo") R.images.add(parent_id="0" * 64, image="cafecafe" * 8) R.commit_engines() with pytest.raises(CheckViolation): R.objects.register_objects( [ Object( object_id="broken", format="FRAG", namespace="", size=42, created=datetime.utcnow(), insertion_hash="0" * 64, deletion_hash="0" * 64, object_index={}, rows_inserted=10, rows_deleted=2, ) ] ) with pytest.raises(CheckViolation): R.objects.register_objects( [ Object( object_id="o12345", format="FRAG", namespace="", size=42, created=datetime.utcnow(), insertion_hash="0" * 64, deletion_hash="0" * 64, object_index={}, rows_inserted=10, rows_deleted=2, ) ] ) with pytest.raises(CheckViolation): R.objects.register_objects( [ Object( object_id="o" + "a" * 61 + "Z", format="FRAG", namespace="", size=42, created=datetime.utcnow(), insertion_hash="0" * 64, deletion_hash="0" * 64, object_index={}, rows_inserted=10, rows_deleted=2, ) ] ) with pytest.raises(CheckViolation): R.objects.register_objects( [ Object( object_id="o" + "a" * 62, format="FRAG", namespace="", size=42, created=datetime.utcnow(), insertion_hash="broken", deletion_hash="0" * 64, object_index={}, rows_inserted=10, rows_deleted=2, ) ] ) with pytest.raises(CheckViolation): R.objects.register_objects( [ Object( object_id="o" + "a" * 62, format="FRAG", namespace="", size=42, created=datetime.utcnow(), insertion_hash="0" * 64, deletion_hash="broken", object_index={}, rows_inserted=10, rows_deleted=2, ) ] )
def test_large_api_calls(unprivileged_pg_repo): # Test query chunking for API calls that exceed length/vararg limits # Make a fake object with 64KB of bloom index data (doesn't fit into the min query size # at all) fake_object = Object( object_id="o%062d" % 0, format="FRAG", namespace=unprivileged_pg_repo.namespace, size=42, created=datetime.utcnow(), insertion_hash="0" * 64, deletion_hash="0" * 64, object_index={"bloom": [42, "A" * API_MAX_QUERY_LENGTH]}, rows_inserted=10, rows_deleted=2, ) with pytest.raises(ValueError) as e: unprivileged_pg_repo.objects.register_objects( [fake_object], namespace=unprivileged_pg_repo.namespace ) assert "exceeds maximum query size" in str(e.value) # Make a bunch of fake objects and try registering them # Each object has 1KB of bloom index data (+ a few bytes of misc metadata) and we're # making 1000 objects -- check that queries get chunked up. objects = [ Object( object_id="o%062d" % i, format="FRAG", namespace=unprivileged_pg_repo.namespace, size=42, created=datetime.utcnow(), insertion_hash="0" * 64, deletion_hash="0" * 64, object_index={"bloom": [42, "A" * 1024]}, rows_inserted=42, rows_deleted=0, ) for i in range(2000) ] all_ids = [o.object_id for o in objects] # Check objects don't exist (query should also get chunked up) and register them new_objects = unprivileged_pg_repo.objects.get_new_objects(all_ids) assert new_objects == all_ids unprivileged_pg_repo.objects.register_objects(objects, namespace=unprivileged_pg_repo.namespace) # Get presigned URLs for these objects urls = get_object_upload_urls(unprivileged_pg_repo.engine, all_ids) assert len(urls) == 2000 # Get our objects back meta = unprivileged_pg_repo.objects.get_object_meta(all_ids) assert len(meta) == 2000 # Now make an image with a lot of objects image_hash = "0" * 63 + "1" unprivileged_pg_repo.images.add(parent_id=None, image=image_hash) # Two tables to test that register_tables chunks correctly with multiple tables unprivileged_pg_repo.objects.register_tables( unprivileged_pg_repo, [ (image_hash, "small_table", [(1, "key", "integer", True)], [all_ids[0]]), ( image_hash, "table", [(1, "key", "integer", True)], all_ids, ), ], ) # Get table back and check that it has the same objects (multiple add_table calls # add new objects to the table) table = unprivileged_pg_repo.images[image_hash].get_table("table") assert table.objects == all_ids small_table = unprivileged_pg_repo.images[image_hash].get_table("small_table") assert small_table.objects == [all_ids[0]]