def test_repo_lookup_override(remote_engine): test_repo = Repository("overridden", "repo", engine=remote_engine) try: test_repo.init() assert lookup_repository("overridden/repo") == test_repo finally: test_repo.delete(unregister=True, uncheckout=True)
def pg_repo_remote_registry(local_engine_empty, remote_engine_registry, clean_minio): staging = Repository("test", "pg_mount_staging") staging = make_pg_repo(get_engine(), staging) result = staging.push( Repository(REMOTE_NAMESPACE, "pg_mount", engine=remote_engine_registry), handler="S3", handler_options={}, ) staging.delete() staging.objects.cleanup() yield result
def build_repo(): repo = Repository(namespace="abc", repository="1234") repo.delete() repo.init() df_to_table(fake_data(8), repository=repo, table="unit_test", if_exists='replace') new_img = repo.commit() new_img.checkout() return repo
def _execute_from( node: Node, output: Repository) -> Tuple[Repository, Optional[ProvenanceLine]]: interesting_nodes = extract_nodes(node, ["repo_source", "repository"]) repo_source = get_first_or_none(interesting_nodes, "repo_source") output_node = get_first_or_none(interesting_nodes, "repository") provenance: Optional[ProvenanceLine] = None if output_node: # AS (output) detected, change the current output repository to it. output = Repository.from_schema(output_node.match.group(0)) logging.info("Changed output repository to %s" % str(output)) # NB this destroys all data in the case where we ran some commands in the Splitfile and then # did FROM (...) without AS repository if repository_exists(output): logging.info("Clearing all output from %s" % str(output)) output.delete() if not repository_exists(output): output.init() if repo_source: repository, tag_or_hash = parse_image_spec(repo_source) source_repo = lookup_repository(repository.to_schema(), include_local=True) if source_repo.engine.name == "LOCAL": # For local repositories, make sure to update them if they've an upstream if source_repo.upstream: source_repo.pull() # Get the target image hash from the source repo: otherwise, if the tag is, say, 'latest' and # the output has just had the base commit (000...) created in it, that commit will be the latest. clone(source_repo, local_repository=output, download_all=False) source_hash = source_repo.images[tag_or_hash].image_hash output.images.by_hash(source_hash).checkout() provenance = { "type": "FROM", "source_namespace": source_repo.namespace, "source": source_repo.repository, "source_hash": source_hash, } else: # FROM EMPTY AS repository -- initializes an empty repository (say to create a table or import # the results of a previous stage in a multistage build. # In this case, if AS repository has been specified, it's already been initialized. If not, this command # literally does nothing if not output_node: raise SplitfileError( "FROM EMPTY without AS (repository) does nothing!") return output, provenance
def test_mount_elasticsearch(local_engine_empty): # No ES running in this stack: this is just a test that we can instantiate the FDW. repo = Repository("test", "es_mount") try: mount( repo.to_schema(), "elasticsearch", dict( username=None, password=None, server="elasticsearch", port=9200, table_spec={ "table_1": { "schema": { "id": "text", "@timestamp": "timestamp", "query": "text", "col_1": "text", "col_2": "boolean", }, "index": "index-pattern*", "rowid_column": "id", "query_column": "query", } }, ), ) assert get_engine().get_full_table_schema(repo.to_schema(), "table_1") == [ TableColumn(ordinal=1, name="id", pg_type="text", is_pk=False, comment=None), TableColumn( ordinal=2, name="@timestamp", pg_type="timestamp without time zone", is_pk=False, comment=None, ), TableColumn(ordinal=3, name="query", pg_type="text", is_pk=False, comment=None), TableColumn(ordinal=4, name="col_1", pg_type="text", is_pk=False, comment=None), TableColumn(ordinal=5, name="col_2", pg_type="boolean", is_pk=False, comment=None), ] finally: repo.delete()