def materialize( self, destination: str, destination_schema: Optional[str] = None, lq_server: Optional[str] = None, ) -> None: """ Materializes a Splitgraph table in the target schema as a normal Postgres table, potentially downloading all required objects and using them to reconstruct the table. :param destination: Name of the destination table. :param destination_schema: Name of the destination schema. :param lq_server: If set, sets up a layered querying FDW for the table instead using this foreign server. """ destination_schema = destination_schema or self.repository.to_schema() engine = self.repository.object_engine object_manager = self.repository.objects engine.delete_table(destination_schema, destination) if not lq_server: # Materialize by applying fragments to one another in their dependency order. with object_manager.ensure_objects( table=self, objects=self.objects) as required_objects: engine.create_table( schema=destination_schema, table=destination, schema_spec=self.table_schema, include_comments=True, unlogged=True, ) if required_objects: logging.debug("Applying %s...", pluralise("fragment", len(required_objects))) table_size = self.get_size() progress_every: Optional[int] if table_size > _PROGRESS_EVERY: progress_every = int( ceil( len(required_objects) * _PROGRESS_EVERY / float(table_size))) else: progress_every = None engine.apply_fragments( [(SPLITGRAPH_META_SCHEMA, d) for d in cast(List[str], required_objects)], destination_schema, destination, progress_every=progress_every, ) else: query, args = create_foreign_table(destination_schema, lq_server, self.table_name, self.table_schema) engine.run_sql(query, args)
def generate_socrata_mount_queries(sought_ids, datasets, mountpoint, server_id, tables): # Local imports since this module gets run from commandline entrypoint on startup. from splitgraph.core.output import slugify from splitgraph.core.output import truncate_list from splitgraph.core.output import pluralise from splitgraph.core.table import create_foreign_table from splitgraph.ingestion.socrata.querying import socrata_to_sg_schema found_ids = set(d["resource"]["id"] for d in datasets) logging.info("Loaded metadata for %s", pluralise("Socrata table", len(found_ids))) if tables: missing_ids = [d for d in found_ids if d not in sought_ids] if missing_ids: raise ValueError( "Some Socrata tables couldn't be found! Missing tables: %s" % truncate_list(missing_ids) ) tables_inv = {s: p for p, s in tables.items()} else: tables_inv = {} mount_statements = [] mount_args = [] for dataset in datasets: socrata_id = dataset["resource"]["id"] table_name = tables_inv.get(socrata_id) or slugify( dataset["resource"]["name"] ) + "_" + socrata_id.replace("-", "_") schema_spec, column_map = socrata_to_sg_schema(dataset) sql, args = create_foreign_table( schema=mountpoint, server=server_id, table_name=table_name, schema_spec=schema_spec, internal_table_name=socrata_id, extra_options={"column_map": json.dumps(column_map)}, ) description = dataset["resource"].get("description") if description: sql += SQL("COMMENT ON FOREIGN TABLE {}.{} IS %s").format( Identifier(mountpoint), Identifier(table_name) ) args.append(description) mount_statements.append(sql) mount_args.extend(args) return mount_statements, mount_args
def generate_socrata_mount_queries(sought_ids, datasets, mountpoint, server_id, tables: TableInfo): # Local imports since this module gets run from commandline entrypoint on startup. from splitgraph.core.output import slugify from splitgraph.core.output import pluralise from splitgraph.ingestion.socrata.querying import socrata_to_sg_schema found_ids = set(d["resource"]["id"] for d in datasets) logging.info("Loaded metadata for %s", pluralise("Socrata table", len(found_ids))) tables_inv = _get_table_map(found_ids, sought_ids, tables) mount_statements = [] mount_args = [] for dataset in datasets: socrata_id = dataset["resource"]["id"] table_name = tables_inv.get(socrata_id) or slugify( dataset["resource"]["name"]) + "_" + socrata_id.replace("-", "_") schema_spec, column_map = socrata_to_sg_schema(dataset) sql, args = create_foreign_table( schema=mountpoint, server=server_id, table_name=table_name, schema_spec=schema_spec, extra_options={ "column_map": json.dumps(column_map), "table": socrata_id }, ) description = dataset["resource"].get("description") if description: sql += SQL("COMMENT ON FOREIGN TABLE {}.{} IS %s").format( Identifier(mountpoint), Identifier(table_name)) args.append(description) mount_statements.append(sql) mount_args.extend(args) return mount_statements, mount_args
def reindex_c(image_spec, table_name, index_options, ignore_patch_objects): """ Run extra indexes on a table. This will merge the indexing results for all objects that a table is formed from with the current object indexes. For explanation of what indexes do, see the documentation for `sgr commit`. If the objects haven't been downloaded yet, this will download them. Currently reindexing objects that change other objects is unsupported and will raise an error. Pass `-o` to ignore these objects and only reindex supported objects. Image spec must be of the format ``[NAMESPACE/]REPOSITORY[:HASH_OR_TAG]``. If no tag is specified, ``HEAD`` is used. """ from splitgraph.core.output import pluralise repository, image = image_spec table = image.get_table(table_name) click.echo("Reindexing table %s:%s/%s" % (repository.to_schema(), image.image_hash, table_name)) reindexed = table.reindex(extra_indexes=index_options, raise_on_patch_objects=not ignore_patch_objects) click.echo("Reindexed %s" % pluralise("object", len(reindexed)))