コード例 #1
0
    def by_tag(self, tag: str, raise_on_none: bool = True) -> Optional[Image]:
        """
        Returns an image with a given tag

        :param tag: Tag. 'latest' is a special case: it returns the most recent image in the repository.
        :param raise_on_none: Whether to raise an error or return None if the tag doesn't exist.
        """
        engine = self.engine
        if not repository_exists(self.repository):
            raise RepositoryNotFoundError("Unknown repository %s!" %
                                          str(self.repository))

        if tag == "latest":
            # Special case, return the latest commit from the repository.
            result = self.engine.run_sql(
                select(
                    "get_images",
                    ",".join(IMAGE_COLS),
                    schema=SPLITGRAPH_API_SCHEMA,
                    table_args="(%s,%s)",
                ) + SQL(" ORDER BY created DESC LIMIT 1"),
                (self.repository.namespace, self.repository.repository),
                return_shape=ResultShape.ONE_MANY,
            )
            if result is None:
                raise ImageNotFoundError("No images found in %s!" %
                                         self.repository.to_schema())
            return self._make_image(result)

        result = engine.run_sql(
            select(
                "get_tagged_images",
                "image_hash",
                "tag = %s",
                schema=SPLITGRAPH_API_SCHEMA,
                table_args="(%s,%s)",
            ),
            (self.repository.namespace, self.repository.repository, tag),
            return_shape=ResultShape.ONE_ONE,
        )
        if result is None:
            if raise_on_none:
                schema = self.repository.to_schema()
                if tag == "HEAD":
                    raise ImageNotFoundError(
                        'No current checked out revision found for %s. Check one out with "sgr '
                        'checkout %s:image_hash".' % (schema, schema))
                raise ImageNotFoundError("Tag %s not found in repository %s" %
                                         (tag, schema))
            return None
        return self.by_hash(result)
コード例 #2
0
    def by_hash(self, image_hash: str) -> Image:
        """
        Returns an image corresponding to a given (possibly shortened) image hash. If the image hash
        is ambiguous, raises an error. If the image does not exist, raises an error or returns None.

        :param image_hash: Image hash (can be shortened).
        :return: Image
        """
        result = self.engine.run_sql(
            select(
                "get_image",
                ",".join(IMAGE_COLS),
                schema=SPLITGRAPH_API_SCHEMA,
                table_args="(%s, %s, %s)",
            ),
            (self.repository.namespace, self.repository.repository,
             image_hash.lower()),
            return_shape=ResultShape.MANY_MANY,
        )
        if not result:
            raise ImageNotFoundError("No images starting with %s found!" %
                                     image_hash)
        if len(result) > 1:
            result = "Multiple suitable candidates found: \n * " + "\n * ".join(
                [r[0] for r in result])
            raise ImageNotFoundError(result)
        return self._make_image(result[0])
コード例 #3
0
    def get_length(self) -> int:
        """
        Get the number of rows in this table.

        This might be smaller than the total number of rows in all objects belonging to this
        table as some objects might overwrite each other.

        :return: Number of rows in table
        """
        return cast(
            int,
            self.repository.engine.run_sql(
                select(
                    "get_table_length", table_args="(%s,%s,%s,%s)", schema=SPLITGRAPH_API_SCHEMA
                ),
                (
                    self.repository.namespace,
                    self.repository.repository,
                    self.image.image_hash,
                    self.table_name,
                ),
                return_shape=ResultShape.ONE_ONE,
            )
            or 0,
        )
コード例 #4
0
def alter_table(
    repository: "Repository", table_name: str, rows_added: int, rows_deleted: int, rows_updated: int
) -> None:
    """
    Alters the example table, adding/updating/deleting a certain number of rows.

    :param repository: Checked-out Repository object.
    :param table_name: Name of the table
    :param rows_added: Number of rows to add
    :param rows_deleted: Number of rows to remove
    :param rows_updated: Number of rows to update
    """
    from splitgraph.engine import ResultShape
    from splitgraph.core.sql import select
    from psycopg2.sql import Identifier, SQL

    keys = repository.run_sql(
        select(table_name, "key", schema=repository.to_schema()), return_shape=ResultShape.MANY_ONE
    )
    last = repository.run_sql(
        select(table_name, "MAX(key)", schema=repository.to_schema()),
        return_shape=ResultShape.ONE_ONE,
    )

    # Delete first N rows
    click.echo("Deleting %d rows..." % rows_deleted)
    repository.engine.run_sql_batch(
        SQL("DELETE FROM {} WHERE key = %s").format(Identifier(table_name)),
        [(k,) for k in keys[:rows_deleted]],
        schema=repository.to_schema(),
    )

    # Update next N rows
    click.echo("Updating %d rows..." % rows_updated)
    repository.engine.run_sql_batch(
        SQL("UPDATE {} SET value = %s WHERE key = %s").format(Identifier(table_name)),
        [(_hash(k) + "_UPDATED", k) for k in keys[rows_updated : rows_updated * 2]],
        schema=repository.to_schema(),
    )

    # Insert rows at the end
    click.echo("Adding %d rows..." % rows_added)
    repository.engine.run_sql_batch(
        SQL("INSERT INTO {} VALUES (%s, %s)").format(Identifier(table_name)),
        [(k, _hash(k)) for k in range(last + 1, last + rows_added + 1)],
        schema=repository.to_schema(),
    )
コード例 #5
0
def test_no_direct_table_access(unprivileged_pg_repo):
    # Canary to check users can't manipulate splitgraph_meta tables directly
    for table in META_TABLES:
        with pytest.raises(psycopg2.Error) as e:
            unprivileged_pg_repo.engine.run_sql(select(table, "1"))

        with pytest.raises(psycopg2.Error) as e:
            unprivileged_pg_repo.engine.run_sql(
                SQL("DELETE FROM {}.{} WHERE 1 = 2").format(
                    Identifier(SPLITGRAPH_META_SCHEMA), Identifier(table)))
コード例 #6
0
def get_installed_version(
    engine: "PsycopgEngine", schema_name: str, version_table: str = "version"
) -> Optional[Tuple[str, datetime]]:
    if not engine.table_exists(schema_name, version_table):
        return None
    return cast(
        Optional[Tuple[str, datetime]],
        engine.run_sql(
            select(version_table, "version,installed", schema=schema_name)
            + SQL("ORDER BY installed DESC LIMIT 1"),
            return_shape=ResultShape.ONE_MANY,
        ),
    )
コード例 #7
0
 def __call__(self) -> List[Image]:
     """Get all Image objects in the repository, ordered by their creation time (earliest first)."""
     result = []
     for image in self.engine.run_sql(
             select(
                 "get_images",
                 ",".join(IMAGE_COLS),
                 schema=SPLITGRAPH_API_SCHEMA,
                 table_args="(%s, %s)",
             ),
         (self.repository.namespace, self.repository.repository),
     ):
         result.append(self._make_image(image))
     return result
コード例 #8
0
def filter_range_index(
    metadata_engine: "PsycopgEngine",
    object_ids: List[str],
    quals: Any,
    column_types: Dict[str, str],
) -> List[str]:
    clause, args = _quals_to_clause(quals, column_types)
    query = (select("get_object_meta",
                    "object_id",
                    table_args="(%s)",
                    schema=SPLITGRAPH_API_SCHEMA) + SQL(" WHERE ") + clause)

    return cast(
        List[str],
        metadata_engine.run_chunked_sql(query, [object_ids] + list(args),
                                        return_shape=ResultShape.MANY_ONE,
                                        chunk_position=0),
    )
コード例 #9
0
    def get_size(self) -> int:
        """
        Get the physical size used by the table's objects (including those shared with other tables).

        This is calculated from the metadata, the on-disk footprint might be smaller if not all of table's
        objects have been downloaded.

        :return: Size of the table in bytes.
        """
        return cast(
            int,
            self.repository.engine.run_sql(
                select("get_table_size", table_args="(%s,%s,%s,%s)", schema=SPLITGRAPH_API_SCHEMA),
                (
                    self.repository.namespace,
                    self.repository.repository,
                    self.image.image_hash,
                    self.table_name,
                ),
                return_shape=ResultShape.ONE_ONE,
            )
            or 0,
        )
コード例 #10
0
ファイル: common.py プロジェクト: splitgraph/splitgraph
def _gather_sync_metadata(target, source, overwrite_objects, overwrite_tags,
                          single_image) -> Any:
    # Currently, images can't be altered once pushed out. We intend to relax this:
    # same image hash means same contents and same tables but the composition of an image
    # can change (if we refragment a table so that querying it is faster). But it's frowned
    # upon.
    single_image_hash: Optional[str] = None
    if single_image:
        image = source.images[single_image]
        single_image_hash = image.image_hash
        try:
            # If the image already exists on the target, we shouldn't overwrite it.
            # The user can get around this by deleting the image manually.
            _ = target.images[single_image_hash]
            new_images = []
            new_image_hashes = []
        except ImageNotFoundError:
            new_images = [image]
            new_image_hashes = [single_image_hash]
        except RepositoryNotFoundError:
            new_images = [image]
            new_image_hashes = [single_image_hash]
    else:
        # If an image hasn't been specified, get/push all non-existing images.
        target_images = {i.image_hash: i for i in target.images()}
        source_images = {i.image_hash: i for i in source.images()}
        new_image_hashes = [i for i in source_images if i not in target_images]
        new_images = [source_images[i] for i in new_image_hashes]

    # Get the meta for all tables we'll need to fetch.
    table_meta = []

    # Also grab the list of all objects in this repository in case overwrite_objects=True
    all_objects: Set[str] = set()
    for t in source.engine.run_sql(
            select(
                "get_all_tables",
                "image_hash, table_name, table_schema, object_ids",
                schema=SPLITGRAPH_API_SCHEMA,
                table_args="(%s,%s)",
            ),
        (source.namespace, source.repository),
    ):
        if t[0] in new_image_hashes:
            all_objects = all_objects.union(t[-1])
            table_meta.append(t)
        elif t[0] == single_image_hash:
            # Overwrite objects from existing image if it was
            # passed as single_image (similar behaviour to tags).
            all_objects = all_objects.union(t[-1])

    # Get the tags
    existing_tags = [t for s, t in target.get_all_hashes_tags()]
    tags = {
        t: s
        for s, t in source.get_all_hashes_tags() if (
            # Only get new tags (unless we're overwriting them)
            t not in existing_tags or overwrite_tags) and
        (
            # Only get tags for the new image (unless we're pulling the whole repo)
            not single_image or s == single_image_hash)
    }

    # Get objects that don't exist on the target
    table_objects = list({o for table in table_meta for o in table[3]})
    new_objects = list(set(target.objects.get_new_objects(table_objects)))

    # Ignore overwrite_objects for calculating which objects to upload the flag
    # is only for overwriting metadata).
    if new_objects:
        object_locations = source.objects.get_external_object_locations(
            new_objects)
    else:
        object_locations = []

    if overwrite_objects:
        new_objects = list(all_objects)

    if new_objects:
        object_meta = source.objects.get_object_meta(new_objects)
    else:
        object_meta = {}
    return new_images, table_meta, object_locations, object_meta, tags
コード例 #11
0
def _get_last_used(object_manager, object_id):
    return object_manager.object_engine.run_sql(
        select("object_cache_status", "last_used", "object_id = %s"),
        (object_id, ),
        return_shape=ResultShape.ONE_ONE,
    )
コード例 #12
0
def _get_refcount(object_manager, object_id):
    return object_manager.object_engine.run_sql(
        select("object_cache_status", "refcount", "object_id = %s"),
        (object_id, ),
        return_shape=ResultShape.ONE_ONE,
    )