Exemple #1
0
def add_engine_c(image, port, username, no_init, no_sgconfig, inject_source,
                 no_pull, name, password, set_default):
    """
    Create and start a Splitgraph engine.

    This will pull the Splitgraph engine image, start it, create a Postgres user and initialize
    the engine.

    This also creates Docker volumes required to persist data/metadata.

    The engine Docker container by default will be named `splitgraph_engine_default` and
    its data and metadata volumes will have names `splitgraph_engine_default_data` and
    `splitgraph_engine_default_metadata`.
    """
    from splitgraph.engine.postgres.engine import PostgresEngine
    from splitgraph.config import CONFIG
    from docker.types import Mount

    client = get_docker_client()

    if not no_pull:
        click.echo("Pulling image %s..." % image)
        _pretty_pull(client, image)

    container_name = _get_container_name(name)
    data_name = _get_data_volume_name(name)
    metadata_name = _get_metadata_volume_name(name)

    # Setup required mounts for data/metadata
    data_volume = Mount(target="/var/lib/splitgraph/objects",
                        source=data_name,
                        type="volume")
    metadata_volume = Mount(target="/var/lib/postgresql/data",
                            source=metadata_name,
                            type="volume")
    mounts = [data_volume, metadata_volume]

    click.echo("Creating container %s." % container_name)
    click.echo("Data volume: %s." % data_name)
    click.echo("Metadata volume: %s." % metadata_name)

    if inject_source:
        source_path = _convert_source_path(
            os.getenv(
                "SG_SOURCE_ROOT",
                os.path.abspath(os.path.join(os.path.dirname(__file__),
                                             "../"))))
        source_volume = Mount(target="/splitgraph/splitgraph",
                              source=source_path,
                              type="bind")
        mounts.append(source_volume)
        click.echo("Source path: %s" % source_path)

    container = client.containers.run(
        image=image,
        detach=True,
        name=container_name,
        ports={"5432/tcp": port},
        mounts=mounts,
        environment={
            "POSTGRES_USER": username,
            "POSTGRES_PASSWORD": password,
            "POSTGRES_DB": "splitgraph",
            # Actual config to be injected later
            "SG_CONFIG_FILE": "/.sgconfig",
        },
    )

    click.echo("Container created, ID %s" % container.short_id)

    # Extract the host that we can reach the container on
    # (might be different from localhost if docker-machine is used)
    hostname = urlparse(client.api.base_url).hostname

    conn_params: Dict[str, str] = {
        "SG_ENGINE_HOST": hostname,
        "SG_ENGINE_PORT": str(port),
        # Even if the engine is exposed on a different port on the host,
        # need to make sure that it uses the default 5432 port to connect
        # to itself.
        "SG_ENGINE_FDW_HOST": "localhost",
        "SG_ENGINE_FDW_PORT": "5432",
        "SG_ENGINE_USER": username,
        "SG_ENGINE_PWD": password,
        "SG_ENGINE_DB_NAME": "splitgraph",
        "SG_ENGINE_POSTGRES_DB_NAME": "postgres",
        "SG_ENGINE_ADMIN_USER": username,
        "SG_ENGINE_ADMIN_PWD": password,
    }

    if not no_sgconfig:
        if name != DEFAULT_ENGINE and not set_default:
            config_patch = {"remotes": {name: conn_params}}
        else:
            config_patch = conn_params

        config_path = patch_and_save_config(CONFIG, config_patch)
    else:
        config_path = CONFIG["SG_CONFIG_FILE"]

    if not no_init:
        engine = PostgresEngine(name=name, conn_params=conn_params)
        engine.initialize()
        engine.commit()
        click.echo("Engine initialized successfully.")

    inject_config_into_engines(CONFIG["SG_ENGINE_PREFIX"], config_path)
    click.echo("Done.")
    def write(self, value_: Any, **kwargs: Any) -> Result:
        """
        Writes the result to a repository on Splitgraph


        Args:
            - value_ (Any): the value to write; will then be stored as the `value` attribute
                of the returned `Result` instance
            - **kwargs (optional): if provided, will be used to format the `table`, `comment`, and `tag`

        Returns:
            - Result: returns a new `Result` with both `value`, `comment`, `table`, and `tag` attributes
        """

        cfg = patch_config(create_config_dict(), self.env or dict())
        engine = PostgresEngine(name='SplitgraphResult', conn_params=cfg)
        engine.initialize()
        repo = Repository(namespace=self.namespace, repository=self.repo_name, engine=engine)

        assert isinstance(value_, pd.DataFrame)
        assert engine.connected

        if not repository_exists(repo) and self.auto_init_repo:
            self.logger.info("Creating repo {}/{}...".format(repo.namespace, repo.repository))
            repo.init()

        # TODO: Retrieve the repo from bedrock first

        new = self.format(**kwargs)
        new.value = value_

        self.logger.info("Starting to upload result to {}...".format(new.table))

        with self.atomic(engine):
            self.logger.info("checkout")
            img = repo.head
            img.checkout(force=True)

            self.logger.info("df to table")
            df_to_table(new.value, repository=repo, table=new.table, if_exists='replace')

            self.logger.info("commit")
            new_img = repo.commit(comment=new.comment, chunk_size=10000)
            new_img.tag(new.tag)


        # if (repo.diff(new.table, img, new_img)):
        if self.auto_push:
            self.logger.info("push")
            repo.push(
                self.get_upstream(repo),
                handler="S3",
                overwrite_objects=True,
                overwrite_tags=True,
                reupload_objects=True,
            )

        engine.close()
        self.logger.info("Finished uploading result to {}...".format(new.table))

        return new