コード例 #1
0
    def _create_foreign_tables(self, schema: str, server_id: str,
                               tables: TableInfo) -> List[MountError]:
        from sodapy import Socrata
        from psycopg2.sql import SQL

        logging.info("Getting Socrata metadata")
        client = Socrata(domain=self.params["domain"],
                         app_token=self.credentials.get("app_token"))

        tables = self.tables or tables
        if isinstance(tables, list):
            sought_ids = tables
        else:
            sought_ids = [t[1]["socrata_id"] for t in tables.values()]

        try:
            datasets = client.datasets(ids=sought_ids, only=["dataset"])
        except Exception as e:
            if "Unknown response format: text/html" in str(e):
                # If the Socrata dataset/domain isn't found, sodapy doesn't catch it directly
                # and instead stumbles on an unexpected content-type of the 404 page it's served.
                # We catch that and reraise a more friendly message.
                raise RepositoryNotFoundError(
                    "Socrata domain or dataset not found!") from e
            raise

        if not datasets:
            raise RepositoryNotFoundError(
                "Socrata domain or dataset not found!")

        mount_statements, mount_args = generate_socrata_mount_queries(
            sought_ids, datasets, schema, server_id, tables)

        self.engine.run_sql(SQL(";").join(mount_statements), mount_args)
        return []
コード例 #2
0
ファイル: engine.py プロジェクト: yanyu510/splitgraph
def lookup_repository(name: str, include_local: bool = False) -> "Repository":
    """
    Queries the SG engines on the lookup path to locate one hosting the given repository.

    :param name: Repository name
    :param include_local: If True, also queries the local engine

    :return: Local or remote Repository object
    """
    from splitgraph.core.repository import Repository

    template = Repository.from_schema(name)

    if name in _LOOKUP_PATH_OVERRIDE:
        return Repository(
            template.namespace, template.repository, get_engine(_LOOKUP_PATH_OVERRIDE[name])
        )

    # Currently just check if the schema with that name exists on the remote.
    if include_local and repository_exists(template):
        return template

    for engine in _LOOKUP_PATH:
        candidate = Repository(template.namespace, template.repository, get_engine(engine))
        if repository_exists(candidate):
            return candidate
        candidate.engine.close()

    raise RepositoryNotFoundError("Unknown repository %s!" % name)
コード例 #3
0
    def convert(self, value: str, param: Optional[Parameter],
                ctx: Optional[Context]) -> "Repository":
        from splitgraph.core.repository import Repository

        result = Repository.from_schema(value)
        if self.exists:
            from splitgraph.core.engine import repository_exists

            if not repository_exists(result):
                raise RepositoryNotFoundError("Unknown repository %s" % result)
        return result
コード例 #4
0
    def by_tag(self, tag: str, raise_on_none: bool = True) -> Optional[Image]:
        """
        Returns an image with a given tag

        :param tag: Tag. 'latest' is a special case: it returns the most recent image in the repository.
        :param raise_on_none: Whether to raise an error or return None if the tag doesn't exist.
        """
        engine = self.engine
        if not repository_exists(self.repository):
            raise RepositoryNotFoundError("Unknown repository %s!" %
                                          str(self.repository))

        if tag == "latest":
            # Special case, return the latest commit from the repository.
            result = self.engine.run_sql(
                select(
                    "get_images",
                    ",".join(IMAGE_COLS),
                    schema=SPLITGRAPH_API_SCHEMA,
                    table_args="(%s,%s)",
                ) + SQL(" ORDER BY created DESC LIMIT 1"),
                (self.repository.namespace, self.repository.repository),
                return_shape=ResultShape.ONE_MANY,
            )
            if result is None:
                raise ImageNotFoundError("No images found in %s!" %
                                         self.repository.to_schema())
            return self._make_image(result)

        result = engine.run_sql(
            select(
                "get_tagged_images",
                "image_hash",
                "tag = %s",
                schema=SPLITGRAPH_API_SCHEMA,
                table_args="(%s,%s)",
            ),
            (self.repository.namespace, self.repository.repository, tag),
            return_shape=ResultShape.ONE_ONE,
        )
        if result is None:
            if raise_on_none:
                schema = self.repository.to_schema()
                if tag == "HEAD":
                    raise ImageNotFoundError(
                        'No current checked out revision found for %s. Check one out with "sgr '
                        'checkout %s:image_hash".' % (schema, schema))
                raise ImageNotFoundError("Tag %s not found in repository %s" %
                                         (tag, schema))
            return None
        return self.by_hash(result)
コード例 #5
0
    def convert(
        self, value: str, param: Optional[Parameter], ctx: Optional[Context]
    ) -> Tuple["Repository", Optional[Union["Image", str]]]:
        """
        Image specification must have the format [NAMESPACE/]REPOSITORY[:HASH_OR_TAG].

        The parser returns a tuple of (repository object, tag or hash).
        """
        from splitgraph.core.output import parse_repo_tag_or_hash

        repo, tag_or_hash = parse_repo_tag_or_hash(value, default=self.default)

        if self.get_image or self.repository_exists:
            # Check image/repo exists if we're asked (or if we need to produce
            # an actual Image object)
            from splitgraph.core.engine import repository_exists

            if not repository_exists(repo):
                raise RepositoryNotFoundError("Unknown repository %s" % repo)

        if tag_or_hash is not None and self.get_image:
            return repo, repo.images[tag_or_hash]
        else:
            return repo, tag_or_hash
コード例 #6
0
ファイル: mount.py プロジェクト: yanyu510/splitgraph
def mount_socrata(
    mountpoint: str,
    server,
    port,
    username,
    password,
    domain: str,
    tables: Optional[Dict[str, Any]] = None,
    app_token: Optional[str] = None,
    batch_size: Optional[int] = 10000,
) -> None:
    """
    Mount a Socrata dataset.

    Mounts a remote Socrata dataset and forwards queries to it
    \b

    :param domain: Socrata domain, for example, data.albanyny.gov. Required.
    :param tables: A dictionary mapping PostgreSQL table names to Socrata table IDs. For example,
        {"salaries": "xzkq-xp2w"}. If skipped, ALL tables in the Socrata endpoint will be mounted.
    :param app_token: Socrata app token. Optional.
    :param batch_size: Amount of rows to fetch from Socrata per request (limit parameter). Maximum 50000.
    """
    from splitgraph.engine import get_engine
    from sodapy import Socrata
    from psycopg2.sql import Identifier, SQL

    engine = get_engine()
    logging.info("Mounting Socrata domain...")
    server_id = mountpoint + "_server"

    options: Dict[str, Optional[str]] = {
        "wrapper": "splitgraph.ingestion.socrata.fdw.SocrataForeignDataWrapper",
    }

    if domain:
        options["domain"] = domain
    if app_token:
        options["app_token"] = app_token
    if batch_size:
        options["batch_size"] = str(batch_size)

    init_fdw(
        engine, server_id=server_id, wrapper="multicorn", server_options=options,
    )

    engine.run_sql(SQL("CREATE SCHEMA IF NOT EXISTS {}").format(Identifier(mountpoint)))

    logging.info("Getting Socrata metadata")
    client = Socrata(domain=domain, app_token=app_token)
    sought_ids = tables.values() if tables else []

    try:
        datasets = client.datasets(ids=sought_ids, only=["dataset"])
    except Exception as e:
        if "Unknown response format: text/html" in str(e):
            # If the Socrata dataset/domain isn't found, sodapy doesn't catch it directly
            # and instead stumbles on an unexpected content-type of the 404 page it's served.
            # We catch that and reraise a more friendly message.
            raise RepositoryNotFoundError("Socrata domain or dataset not found!") from e

    mount_statements, mount_args = generate_socrata_mount_queries(
        sought_ids, datasets, mountpoint, server_id, tables
    )

    engine.run_sql(SQL(";").join(mount_statements), mount_args)