def import_data(self, **kwargs): """ Imports data from an external source to create the test data :return a two item tuple containing the region that was imported and a list of the imported projects """ # Calculate a sample lat/lon box of the config_entity config_entity = self.config_entity if self.test: bounds = chop_geom(config_entity.bounds, 0.90) logger.info( u"Creating subselection with extents: {0}. This will be used to crop any table that doesn't have a sample version" .format(bounds)) conn = psycopg2.connect( **pg_connection_parameters(settings.DATABASES['default'])) conn.set_isolation_level( psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) cursor = conn.cursor() for db_entity in self.db_entities: # This is the index on wkb_geometry. spatial_index_name = '{schema}_{key}_geom_idx'.format( schema=db_entity.schema, key=db_entity.key) table = db_entity.table if db_entity.has_file_url: # Remove any table of the same name from the import schema. This is unlikley since imported # tables have timestamps drop_table('"%s"."%s"' % (settings.IMPORT_SCHEMA, db_entity.key)) sql_file_path = file_url_to_path(db_entity.url) # Create a command that pipes shp2pgsql to psql db_entity.srid = db_entity.srid or '4326' logger.info("verifying SRID {0}".format(db_entity.srid)) verify_srid(db_entity.srid) # Create the import schema if needed PGNamespace.objects.create_schema(settings.IMPORT_SCHEMA) # Import the table import_sql_command = '/usr/bin/psql {0} -f {1}'.format( self.target_database_connection, sql_file_path) stdin = "{0}\n{1}".format( self.arguments.get('password', None), self.target_database.get('PASSWORD', None)) results = self.command_execution.run(import_sql_command, stdin=stdin) if results.returncode: raise Exception(results.stderr.text) # We expect a table in the public schema with a named based on db_entity.key # Move the table from the public schema to the db_entity schema move_to_schema = "alter table {0}.{1} set schema {2};".format( settings.IMPORT_SCHEMA, db_entity.key, db_entity.schema) logger.info("Moving import file table to schema: %s" % move_to_schema) cursor.execute(move_to_schema) # Drop the constraint that enforces the srid of the wkb_geometry if one exists drop_constraint = '''alter table {0}.{1} drop constraint if exists enforce_srid_wkb_geometry'''.format( db_entity.schema, db_entity.key) logger.info("Dropping constraint on wkb_geometry: %s" % drop_constraint) cursor.execute(drop_constraint) # Note we're not creating an index on wkb_geometry # here because imported files already have an index # created. elif db_entity.has_db_url: # The import database currently stores tables as # public.[config_entity.key]_[feature_class._meta.db_table (with schema removed)][_sample (for samples)] # # We always use the table name without the word sample for the target table name if settings.USE_SAMPLE_DATA_SETS or self.test: source_table = "{0}_{1}_{2}".format( config_entity.import_key or config_entity.key, db_entity.table, 'sample') else: source_table = "{0}_{1}".format( config_entity.import_key or config_entity.key, db_entity.table) connection_dict = postgres_url_to_connection_dict( db_entity.url) self._dump_tables_to_target('-t %s' % source_table, source_schema='public', target_schema=db_entity.schema, source_table=source_table, target_table=table, connection_dict=connection_dict) # Create a spatial index spatial_index = '''create index {index_name} on {schema}.{key} using GIST (wkb_geometry);'''.format( index_name=spatial_index_name, schema=db_entity.schema, key=db_entity.key) cursor.execute(spatial_index) # Whether the table comes from our server or an upload, we want to transform the SRID to 4326 transform_to_4326 = 'ALTER TABLE {schema}.{table} ALTER COLUMN wkb_geometry ' \ 'TYPE Geometry(geometry, 4326) ' \ 'USING ST_Transform(ST_Force_2d(wkb_geometry), 4326);'.format logger.info("Transforming to 4326: %s" % transform_to_4326( schema=db_entity.schema, table=db_entity.table)) cursor.execute( transform_to_4326(schema=db_entity.schema, table=db_entity.table)) # Now cluster the data and vacuum so that future joins are faster: # * CLUSTER rewrites the data on disk so that rows that are spatially near each # other are also near each other on disk # * VACUUM cleans up disk space, removing sparse holes on disk. # * ANALYZE regenerates statistics about wkb_geometry so that the query planner can make # better decisions. logger.info('Clustering %s.%s to optimize spatial joins', db_entity.schema, table) cluster = 'CLUSTER {index_name} ON {target_schema}.{target_table};'.format( index_name=spatial_index_name, target_schema=db_entity.schema, target_table=table) cursor.execute(cluster) logger.info('Vacuuming and analyzing %s.%s.', db_entity.schema, table) analyze = 'VACUUM ANALYZE {target_schema}.{target_table};'.format( target_schema=db_entity.schema, target_table=table) cursor.execute(analyze) logger.info( "Finished importing data for DbEntity table {0}.{1}".format( db_entity.schema, db_entity.key))
def import_data(self, **kwargs): """ Imports data from an external source to create the test data :return a two item tuple containing the region that was imported and a list of the imported projects """ # Calculate a sample lat/lon box of the config_entity config_entity = self.config_entity if self.test: bounds = chop_geom(config_entity.bounds, 0.90) logger.info(u"Creating subselection with extents: {0}. This will be used to crop any table that doesn't have a sample version".format(bounds)) conn = psycopg2.connect(**pg_connection_parameters(settings.DATABASES['default'])) conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) cursor = conn.cursor() for db_entity in self.db_entities: # This is the index on wkb_geometry. spatial_index_name = '{schema}_{key}_geom_idx'.format(schema=db_entity.schema, key=db_entity.key) table = db_entity.table if db_entity.has_file_url: # Remove any table of the same name from the import schema. This is unlikley since imported # tables have timestamps drop_table('"%s"."%s"' % (settings.IMPORT_SCHEMA, db_entity.key)) sql_file_path = file_url_to_path(db_entity.url) # Create a command that pipes shp2pgsql to psql db_entity.srid = db_entity.srid or '4326' logger.info("verifying SRID {0}".format(db_entity.srid)) verify_srid(db_entity.srid) # Create the import schema if needed PGNamespace.objects.create_schema(settings.IMPORT_SCHEMA) # Import the table import_sql_command = '/usr/bin/psql {0} -f {1}'.format(self.target_database_connection, sql_file_path) stdin = "{0}\n{1}".format(self.arguments.get('password', None), self.target_database.get('PASSWORD', None)) results = self.command_execution.run(import_sql_command, stdin=stdin) if results.returncode: raise Exception(results.stderr.text) # We expect a table in the public schema with a named based on db_entity.key # Move the table from the public schema to the db_entity schema move_to_schema = "alter table {0}.{1} set schema {2};".format(settings.IMPORT_SCHEMA, db_entity.key, db_entity.schema) logger.info("Moving import file table to schema: %s" % move_to_schema) cursor.execute(move_to_schema) # Drop the constraint that enforces the srid of the wkb_geometry if one exists drop_constraint = '''alter table {0}.{1} drop constraint if exists enforce_srid_wkb_geometry'''.format(db_entity.schema, db_entity.key) logger.info("Dropping constraint on wkb_geometry: %s" % drop_constraint) cursor.execute(drop_constraint) # Note we're not creating an index on wkb_geometry # here because imported files already have an index # created. elif db_entity.has_db_url: # The import database currently stores tables as # public.[config_entity.key]_[feature_class._meta.db_table (with schema removed)][_sample (for samples)] # # We always use the table name without the word sample for the target table name if settings.USE_SAMPLE_DATA_SETS or self.test: source_table = "{0}_{1}_{2}".format( config_entity.import_key or config_entity.key, db_entity.table, 'sample') else: source_table = "{0}_{1}".format(config_entity.import_key or config_entity.key, db_entity.table) connection_dict = postgres_url_to_connection_dict(db_entity.url) self._dump_tables_to_target( '-t %s' % source_table, source_schema='public', target_schema=db_entity.schema, source_table=source_table, target_table=table, connection_dict=connection_dict) # Create a spatial index spatial_index = '''create index {index_name} on {schema}.{key} using GIST (wkb_geometry);'''.format( index_name=spatial_index_name, schema=db_entity.schema, key=db_entity.key) cursor.execute(spatial_index) # Whether the table comes from our server or an upload, we want to transform the SRID to 4326 transform_to_4326 = 'ALTER TABLE {schema}.{table} ALTER COLUMN wkb_geometry ' \ 'TYPE Geometry(geometry, 4326) ' \ 'USING ST_Transform(ST_Force_2d(wkb_geometry), 4326);'.format logger.info("Transforming to 4326: %s" % transform_to_4326(schema=db_entity.schema, table=db_entity.table)) cursor.execute(transform_to_4326(schema=db_entity.schema, table=db_entity.table)) # Now cluster the data and vacuum so that future joins are faster: # * CLUSTER rewrites the data on disk so that rows that are spatially near each # other are also near each other on disk # * VACUUM cleans up disk space, removing sparse holes on disk. # * ANALYZE regenerates statistics about wkb_geometry so that the query planner can make # better decisions. logger.info('Clustering %s.%s to optimize spatial joins', db_entity.schema, table) cluster = 'CLUSTER {index_name} ON {target_schema}.{target_table};'.format( index_name=spatial_index_name, target_schema=db_entity.schema, target_table=table) cursor.execute(cluster) logger.info('Vacuuming and analyzing %s.%s.', db_entity.schema, table) analyze = 'VACUUM ANALYZE {target_schema}.{target_table};'.format( target_schema=db_entity.schema, target_table=table) cursor.execute(analyze) logger.info("Finished importing data for DbEntity table {0}.{1}".format(db_entity.schema, db_entity.key))
def importer(self, config_entity, db_entity, **kwargs): """ Replaces the normal ImportProcessor importer with one to import a sql from disk """ if InformationSchema.objects.table_exists(db_entity.schema, db_entity.table): # The table already exists. Skip the import an log a warning logger.warn("The target table for the feature table import already exists. Skipping table import.") else: # We don't store the upload_id alone, so pull it off the url upload_id = db_entity.url.replace('file:///tmp/', '').replace('.sql.zip', '') # Unpack the zipfile and return the path the sql file was placed at if db_entity.url.startswith('file://'): file_path = db_entity.url[len('file://'):] logger.warn(file_path) path = unpack_zipfile(file_path, upload_id) # The file is always the name of the table defined therein table_name = path.split('/')[-1].split('.')[0].lower() db_entity.url = 'file://%s' % path # Update the db_entity.url from the zip file url to the file_path # This lets ImportData find it. logger.info("Url of DbEntity is %s" % db_entity.url) db_entity.save() # Perform some sed updates to get the sql file ready for import regex_substitutions = [] sql_file_path = file_url_to_path(db_entity.url) # Add IF EXISTS to the drop table to prevent an error if IF EXISTS doesn't exist yet regex_substitutions.append((r'DROP TABLE (?!IF EXISTS)', r'DROP TABLE IF EXISTS')) # TODO temp, fix an AC bug. It seems that using a capitalized column is problematic (?) # The suggested solution is to double quote it, but quotes cause other problems, so we simply lowercase regex_substitutions.append((r' OGC_FID ', ' ogc_fid ', (4, 4))) # only line 4 regex_substitutions.append((r'PRIMARY KEY \(ogc_fid\)', 'PRIMARY KEY (ogc_fid)', (4, 4))) # only line 4 # TODO end temp fix # Update the index name to include the schema. This format matches that created for preconfigured feature # tables (see import_data.py) spatial_index_name = '{schema}_{key}_geom_idx'.format(schema=db_entity.schema, key=db_entity.key) regex_substitutions.append((r'CREATE INDEX ".*" ON', 'CREATE INDEX "%s" ON' % spatial_index_name, (6, 6))) # only line 6 6 # Remove the reference to the geometry_columns, since we use a materialized view regex_substitutions.append((r'^DELETE FROM geometry_columns', '--DELETE FROM geometry_columns', (2, 2))) # Update the sql to have a unique table name which matches the DbEntity key # Also change public to our import schema to keep it from causing trouble in the public schema # Otherwise we run into all kinds of trouble trying to get the SQL into the system regex_substitutions.append((r'"public"."%s"' % table_name, '"import"."%s"' % db_entity.key)) regex_substitutions.append((r"'%s'" % table_name, "'%s'" % db_entity.key, (2, 5))) regex_substitutions.append((r'"%s_pk"' % table_name, '"%s_pk"' % db_entity.key, (4, 4))) # Update public to the import schema regex_substitutions.append((r"AddGeometryColumn\('public'", "AddGeometryColumn('%s'" % settings.IMPORT_SCHEMA, (5, 5))) regex_substitutions.append((r'"%s_wkb_geometry_geom_idx"' % table_name, '"%s_wkb_geometry_geom_idx"' % db_entity.key, (6, 6))) for command in regex_substitutions: logger.info("Applying the following substitution %s" % ', '.join(command[0:2])) apply_regexes_to_file(sql_file_path, regex_substitutions) ImportData(config_entity=config_entity, db_entity_key=db_entity.key).run() # Add our normal primary key in the id column if negit eded add_primary_key_if_needed(db_entity) feature_class_creator = FeatureClassCreator(config_entity, db_entity) # Inspect the imported table to create the feature_class_configuration feature_class_configuration = feature_class_creator.feature_class_configuration_from_introspection() # Merge the created feature_class_configuration with the on already defined for the db_entity feature_class_creator.update_db_entity(feature_class_configuration) logger.info("Finished import for DbEntity: %s, feature_class_configuration: %s" % (db_entity, db_entity.feature_class_configuration)) # Create association classes and tables and populate them with data create_and_populate_relations(config_entity, feature_class_creator.db_entity)