Example #1
0
    def _register_object(
        self,
        object_id: str,
        namespace: str,
        insertion_hash: str,
        deletion_hash: str,
        table_schema: TableSchema,
        rows_inserted: int,
        rows_deleted: int,
        changeset: Optional[Changeset] = None,
        extra_indexes: Optional[ExtraIndexInfo] = None,
    ) -> None:
        """
        Registers a Splitgraph object in the object tree and indexes it

        :param object_id: Object ID
        :param namespace: Namespace that owns the object. In registry mode, only namespace owners can alter or delete
            objects.
        :param insertion_hash: Homomorphic hash of all rows inserted by this fragment
        :param deletion_hash: Homomorphic hash of the old values of all rows deleted by this fragment
        :param table_schema: List of (ordinal, name, type, is_pk) with the schema of the table that this object
            belongs to.
        :param changeset: For patches, changeset that produced this object. Must be a dictionary of
            {PK: (True for upserted/False for deleted, old row (if updated or deleted))}. The old values
            are used to generate the min/max index for an object to know if it removes/updates some rows
            that might be pertinent to a query.
        :param extra_indexes: Dictionary of {index_type: column: index_specific_kwargs}.
        """
        object_size = self.object_engine.get_object_size(object_id)
        object_index = self.generate_object_index(object_id, table_schema, changeset, extra_indexes)
        self.register_objects(
            [
                Object(
                    object_id=object_id,
                    format="FRAG",
                    namespace=namespace,
                    size=object_size,
                    created=datetime.utcnow(),
                    insertion_hash=insertion_hash,
                    deletion_hash=deletion_hash,
                    object_index=object_index,
                    rows_inserted=rows_inserted,
                    rows_deleted=rows_deleted,
                )
            ]
        )
Example #2
0
def test_metadata_constraints_table_objects(local_engine_empty):
    R = Repository("some", "repo")
    R.images.add(parent_id="0" * 64, image="cafecafe" * 8)
    R.objects.register_objects([
        Object(
            object_id="o" + "a" * 62,
            format="FRAG",
            namespace="",
            size=42,
            created=datetime.utcnow(),
            insertion_hash="0" * 64,
            deletion_hash="0" * 64,
            object_index={},
            rows_inserted=10,
            rows_deleted=2,
        )
    ])
    R.commit_engines()

    with pytest.raises(CheckViolation) as e:
        R.objects.register_tables(R, [("cafecafe" * 8, "table", [
            (1, "key", "integer", True)
        ], ["object_doesnt_exist"])])

        assert "Some objects in the object_ids array aren''t registered!" in str(
            e)

    with pytest.raises(CheckViolation) as e:
        R.objects.register_tables(
            R,
            [(
                "cafecafe" * 8,
                "table",
                [(1, "key", "integer", True)],
                [
                    "o" + "a" * 62,
                    "previous_object_existed_but_this_one_doesnt"
                ],
            )],
        )

        assert "Some objects in the object_ids array aren''t registered!" in str(
            e)
Example #3
0
def test_metadata_constraints_object_ids_hashes(local_engine_empty):
    R = Repository("some", "repo")
    R.images.add(parent_id="0" * 64, image="cafecafe" * 8)
    R.commit_engines()

    with pytest.raises(CheckViolation):
        R.objects.register_objects(
            [
                Object(
                    object_id="broken",
                    format="FRAG",
                    namespace="",
                    size=42,
                    created=datetime.utcnow(),
                    insertion_hash="0" * 64,
                    deletion_hash="0" * 64,
                    object_index={},
                    rows_inserted=10,
                    rows_deleted=2,
                )
            ]
        )

    with pytest.raises(CheckViolation):
        R.objects.register_objects(
            [
                Object(
                    object_id="o12345",
                    format="FRAG",
                    namespace="",
                    size=42,
                    created=datetime.utcnow(),
                    insertion_hash="0" * 64,
                    deletion_hash="0" * 64,
                    object_index={},
                    rows_inserted=10,
                    rows_deleted=2,
                )
            ]
        )

    with pytest.raises(CheckViolation):
        R.objects.register_objects(
            [
                Object(
                    object_id="o" + "a" * 61 + "Z",
                    format="FRAG",
                    namespace="",
                    size=42,
                    created=datetime.utcnow(),
                    insertion_hash="0" * 64,
                    deletion_hash="0" * 64,
                    object_index={},
                    rows_inserted=10,
                    rows_deleted=2,
                )
            ]
        )

    with pytest.raises(CheckViolation):
        R.objects.register_objects(
            [
                Object(
                    object_id="o" + "a" * 62,
                    format="FRAG",
                    namespace="",
                    size=42,
                    created=datetime.utcnow(),
                    insertion_hash="broken",
                    deletion_hash="0" * 64,
                    object_index={},
                    rows_inserted=10,
                    rows_deleted=2,
                )
            ]
        )

    with pytest.raises(CheckViolation):
        R.objects.register_objects(
            [
                Object(
                    object_id="o" + "a" * 62,
                    format="FRAG",
                    namespace="",
                    size=42,
                    created=datetime.utcnow(),
                    insertion_hash="0" * 64,
                    deletion_hash="broken",
                    object_index={},
                    rows_inserted=10,
                    rows_deleted=2,
                )
            ]
        )
Example #4
0
def test_large_api_calls(unprivileged_pg_repo):
    # Test query chunking for API calls that exceed length/vararg limits

    # Make a fake object with 64KB of bloom index data (doesn't fit into the min query size
    # at all)
    fake_object = Object(
        object_id="o%062d" % 0,
        format="FRAG",
        namespace=unprivileged_pg_repo.namespace,
        size=42,
        created=datetime.utcnow(),
        insertion_hash="0" * 64,
        deletion_hash="0" * 64,
        object_index={"bloom": [42, "A" * API_MAX_QUERY_LENGTH]},
        rows_inserted=10,
        rows_deleted=2,
    )

    with pytest.raises(ValueError) as e:
        unprivileged_pg_repo.objects.register_objects(
            [fake_object], namespace=unprivileged_pg_repo.namespace
        )
    assert "exceeds maximum query size" in str(e.value)

    # Make a bunch of fake objects and try registering them
    # Each object has 1KB of bloom index data (+ a few bytes of misc metadata) and we're
    # making 1000 objects -- check that queries get chunked up.
    objects = [
        Object(
            object_id="o%062d" % i,
            format="FRAG",
            namespace=unprivileged_pg_repo.namespace,
            size=42,
            created=datetime.utcnow(),
            insertion_hash="0" * 64,
            deletion_hash="0" * 64,
            object_index={"bloom": [42, "A" * 1024]},
            rows_inserted=42,
            rows_deleted=0,
        )
        for i in range(2000)
    ]
    all_ids = [o.object_id for o in objects]
    # Check objects don't exist (query should also get chunked up) and register them
    new_objects = unprivileged_pg_repo.objects.get_new_objects(all_ids)
    assert new_objects == all_ids
    unprivileged_pg_repo.objects.register_objects(objects, namespace=unprivileged_pg_repo.namespace)

    # Get presigned URLs for these objects
    urls = get_object_upload_urls(unprivileged_pg_repo.engine, all_ids)
    assert len(urls) == 2000

    # Get our objects back
    meta = unprivileged_pg_repo.objects.get_object_meta(all_ids)
    assert len(meta) == 2000

    # Now make an image with a lot of objects
    image_hash = "0" * 63 + "1"
    unprivileged_pg_repo.images.add(parent_id=None, image=image_hash)

    # Two tables to test that register_tables chunks correctly with multiple tables
    unprivileged_pg_repo.objects.register_tables(
        unprivileged_pg_repo,
        [
            (image_hash, "small_table", [(1, "key", "integer", True)], [all_ids[0]]),
            (
                image_hash,
                "table",
                [(1, "key", "integer", True)],
                all_ids,
            ),
        ],
    )

    # Get table back and check that it has the same objects (multiple add_table calls
    # add new objects to the table)
    table = unprivileged_pg_repo.images[image_hash].get_table("table")
    assert table.objects == all_ids
    small_table = unprivileged_pg_repo.images[image_hash].get_table("small_table")
    assert small_table.objects == [all_ids[0]]