def objects_c(local): """ List objects known to this engine. """ from splitgraph.core.object_manager import ObjectManager from splitgraph.engine import get_engine om = ObjectManager(get_engine()) if local: objects = om.get_downloaded_objects() else: objects = om.get_all_objects() click.echo("\n".join(sorted(objects)))
def test_uninitialized_engine_error(local_engine_empty): # Test things like the audit triggers/splitgraph meta schema missing raise # uninitialized engine errors rather than generic SQL errors. try: local_engine_empty.run_sql("DROP SCHEMA splitgraph_meta CASCADE") with pytest.raises(EngineInitializationError) as e: lookup_repository("some/repo", include_local=True) assert "splitgraph_meta" in str(e.value) local_engine_empty.initialize() local_engine_empty.commit() local_engine_empty.run_sql("DROP SCHEMA splitgraph_api CASCADE") with pytest.raises(EngineInitializationError) as e: ObjectManager(local_engine_empty).get_downloaded_objects() assert "splitgraph_api" in str(e.value) local_engine_empty.initialize() local_engine_empty.commit() local_engine_empty.run_sql("DROP SCHEMA splitgraph_audit CASCADE") with pytest.raises(EngineInitializationError) as e: local_engine_empty.discard_pending_changes("some/repo") assert "Audit triggers" in str(e.value) finally: local_engine_empty.initialize() local_engine_empty.commit()
def remote_engine_registry(test_remote_engine): set_info_key(test_remote_engine, "registry_mode", False) setup_registry_mode(test_remote_engine) for mountpoint, _ in get_current_repositories(test_remote_engine): mountpoint.delete(uncheckout=False) ObjectManager(test_remote_engine).cleanup_metadata() test_remote_engine.commit() test_remote_engine.close() try: yield test_remote_engine finally: test_remote_engine.rollback() for mountpoint, _ in get_current_repositories(test_remote_engine): mountpoint.delete(uncheckout=False) ObjectManager(test_remote_engine).cleanup_metadata() test_remote_engine.commit() test_remote_engine.close()
def clean_out_engine(engine): logging.info("Cleaning out engine %r", engine) for mountpoint, _ in get_current_repositories(engine): mountpoint.delete() engine.run_sql( SQL(";").join( SQL("DROP SCHEMA IF EXISTS {} CASCADE").format( Identifier(mountpoint.to_schema())) for mountpoint in TEST_MOUNTPOINTS)) ObjectManager(engine).cleanup() engine.commit()
def cleanup_c(): """ Prune unneeded objects from the engine. This deletes all objects from the cache that aren't required by any local repository. """ from splitgraph.core.object_manager import ObjectManager from splitgraph.engine import get_engine from ..core.output import pluralise deleted = ObjectManager(get_engine()).cleanup() click.echo("Deleted %s." % pluralise("object", len(deleted)))
def _eval(command, args): # appease PyCharm # noinspection PyUnresolvedReferences from splitgraph.core.repository import Repository from splitgraph.engine import get_engine from splitgraph.core.object_manager import ObjectManager engine = get_engine() object_manager = ObjectManager(object_engine=engine, metadata_engine=engine) command_locals = locals().copy() command_locals.update({k: v for k, v in args}) exec(command, globals(), command_locals)
def object_c(object_id): """ Show information about a Splitgraph object. Objects, or fragments, are building blocks of Splitgraph tables: each table consists of multiple immutable fragments that can partially overwrite each other. Each fragment might have a parent that it depends on. In addition, the smallest and largest values for every column are stored in the fragment's metadata. This information is used to choose which objects to download in order to execute a query against a table. """ from splitgraph.core.object_manager import ObjectManager from splitgraph.engine import get_engine, ResultShape from ..core.output import pretty_size from ..core.sql import select from splitgraph.core.indexing.bloom import describe object_manager = ObjectManager(get_engine()) object_meta = object_manager.get_object_meta([object_id]) if not object_meta: raise click.BadParameter("Object %s does not exist!" % object_id) sg_object = object_meta[object_id] click.echo("Object ID: %s" % object_id) click.echo() click.echo("Namespace: %s" % sg_object.namespace) click.echo("Format: %s" % sg_object.format) click.echo("Size: %s" % pretty_size(sg_object.size)) click.echo("Created: %s" % sg_object.created) click.echo("Rows inserted: %s" % sg_object.rows_inserted) click.echo("Insertion hash: %s" % sg_object.insertion_hash) click.echo("Rows deleted: %s" % sg_object.rows_deleted) click.echo("Deletion hash: %s" % sg_object.deletion_hash) click.echo("Column index:") for col_name, col_range in sg_object.object_index["range"].items(): click.echo(" %s: [%r, %r]" % (col_name, col_range[0], col_range[1])) if "bloom" in sg_object.object_index: click.echo("Bloom index: ") for col_name, col_bloom in sg_object.object_index["bloom"].items(): click.echo(" %s: %s" % (col_name, describe(col_bloom))) if object_manager.object_engine.registry: # Don't try to figure out the object's location if we're talking # to the registry. return click.echo() object_in_cache = object_manager.object_engine.run_sql( select("object_cache_status", "1", "object_id = %s"), (object_id,), return_shape=ResultShape.ONE_ONE, ) object_downloaded = object_id in object_manager.get_downloaded_objects(limit_to=[object_id]) object_external = object_manager.get_external_object_locations([object_id]) if object_downloaded and not object_in_cache: click.echo("Location: created locally") else: original_location = ( ("%s (%s)" % (object_external[0][1], object_external[0][2])) if object_external else "remote engine" ) if object_in_cache: click.echo("Location: cached locally") click.echo("Original location: " + original_location) else: click.echo("Location: " + original_location)