def update_result_table_geometry_column(self, options): pSql = '''alter table {vmt_schema}.{vmt_result_table} add column wkb_geometry geometry (GEOMETRY, 4326); '''.format(**options) execute_sql(pSql) pSql = ''' update {vmt_schema}.{vmt_result_table} b set wkb_geometry = a.wkb_geometry from (select id, wkb_geometry from {uf_canvas_schema}.{uf_canvas_table}) a where cast(a.id as int) = cast(b.id as int); CREATE INDEX ON {vmt_schema}.{vmt_result_table} USING gist (wkb_geometry); '''.format(**options) execute_sql(pSql) updated = datetime.datetime.now() truncate_table(options['vmt_schema'] + '.' + options['vmt_rel_table']) pSql = ''' insert into {vmt_schema}.{rel_table} ({rel_column}, updated) select id, '{updated}' from {vmt_schema}.{vmt_result_table};'''.format( vmt_schema=options['vmt_schema'], vmt_result_table=options['vmt_result_table'], rel_table=options['vmt_rel_table'], rel_column=options['vmt_rel_column'], updated=updated) execute_sql(pSql) from footprint.main.publishing.data_import_publishing import create_and_populate_relations create_and_populate_relations( options['config_entity'], options['config_entity'].computed_db_entities( key=DbEntityKey.VMT)[0])
def update_result_table_geometry_column(self, options): pSql = '''alter table {vmt_schema}.{vmt_result_table} add column wkb_geometry geometry (GEOMETRY, 4326); '''.format(**options) execute_sql(pSql) pSql = ''' update {vmt_schema}.{vmt_result_table} b set wkb_geometry = a.wkb_geometry from (select id, wkb_geometry from {uf_canvas_schema}.{uf_canvas_table}) a where cast(a.id as int) = cast(b.id as int); CREATE INDEX ON {vmt_schema}.{vmt_result_table} USING gist (wkb_geometry); '''.format(**options) execute_sql(pSql) updated = datetime.datetime.now() truncate_table(options['vmt_schema'] + '.' + options['vmt_rel_table']) pSql = ''' insert into {vmt_schema}.{rel_table} ({rel_column}, updated) select id, '{updated}' from {vmt_schema}.{vmt_result_table};'''.format( vmt_schema=options['vmt_schema'], vmt_result_table=options['vmt_result_table'], rel_table=options['vmt_rel_table'], rel_column=options['vmt_rel_column'], updated=updated) execute_sql(pSql) from footprint.main.publishing.data_import_publishing import create_and_populate_relations create_and_populate_relations(options['config_entity'], options['config_entity'].computed_db_entities(key=DbEntityKey.VMT)[0])
def importer(self, config_entity, db_entity, **kwargs): """ Creates various GeojsonFeature classes by importing geojson and saving it to the database via a dynamic subclass of GeojsonFeature :schema: The optional schema to use for the dynamic subclass's meta db_table attribute, which will allow the class's table to be saved in the specified schema. Defaults to public :data: Optional python dict data to use instead of loading from the db_entity.url :return: a list of lists. Each list is a list of features of distinct subclass of GeoJsonFeature that is created dynamically. To persist these features, you must first create the subclass's table in the database using create_table_for_dynamic_class(). You should also register the table as a DbEntity. """ if self.seed_data: data = geojson.loads(jsonify(self.seed_data), object_hook=geojson.GeoJSON.to_instance) else: fp = open(db_entity.url.replace('file://', '')) data = geojson.load(fp, object_hook=geojson.GeoJSON.to_instance) feature_class_creator = FeatureClassCreator(config_entity, db_entity) # find all unique properties feature_class_configuration = feature_class_creator.feature_class_configuration_from_geojson_introspection(data) feature_class_creator.update_db_entity(feature_class_configuration) feature_class = feature_class_creator.dynamic_model_class(base_only=True) # Create our base table. Normally this is done by the import, but we're just importing into memory create_tables_for_dynamic_classes(feature_class) # Now write each feature to our newly created table for feature in map(lambda feature: self.instantiate_sub_class(feature_class, feature), data.features): feature.save() # Create the rel table too rel_feature_class = feature_class_creator.dynamic_model_class() create_tables_for_dynamic_classes(rel_feature_class) # PostGIS 2 handles this for us now # if InformationSchema.objects.table_exists(db_entity.schema, db_entity.table): # # Tell PostGIS about the new geometry column or the table # sync_geometry_columns(db_entity.schema, db_entity.table) # Create association classes and tables and populate them with data create_and_populate_relations(config_entity, db_entity)
def write_results_to_database(self, options, public_health_output_list): drop_table( '{grid_outcome_schema}.{grid_outcome_table}'.format(**options)) attribute_list = filter(lambda x: x != 'id', self.outcome_fields) options['output_field_syntax'] = 'id int, ' + \ create_sql_calculations(attribute_list, '{0} numeric(20,8)') execute_sql( "create table {grid_outcome_schema}.{grid_outcome_table} ({output_field_syntax});" .format(**options)) output_textfile = StringIO("") for row in public_health_output_list: stringrow = [] for item in row: if isinstance(item, int): stringrow.append(str(item)) else: stringrow.append(str(round(item, 8))) output_textfile.write("\t".join(stringrow) + "\n") output_textfile.seek(os.SEEK_SET) #copy text file output back into Postgres copy_from_text_to_db( output_textfile, '{grid_outcome_schema}.{grid_outcome_table}'.format(**options)) output_textfile.close() ##--------------------------- pSql = '''alter table {grid_outcome_schema}.{grid_outcome_table} add column wkb_geometry geometry (GEOMETRY, 4326);'''.format( **options) execute_sql(pSql) pSql = '''update {grid_outcome_schema}.{grid_outcome_table} b set wkb_geometry = st_setSRID(a.wkb_geometry, 4326) from (select id, wkb_geometry from {source_grid_schema}.{source_grid_table}) a where cast(a.id as int) = cast(b.id as int); '''.format(**options) execute_sql(pSql) add_geom_idx(options['grid_outcome_schema'], options['grid_outcome_table'], 'wkb_geometry') add_primary_key(options['grid_outcome_schema'], options['grid_outcome_table'], 'id') # Since not every grid cell results in a grid_outcome, we need to wipe out the rel # table and recreate it to match the base grid_coutcome table. Otherwise there will # be to many rel table rows and cloning the DbEntity or ConfigEntity will fail logger.info( "Writing to relative table {grid_outcome_schema}.{grid_outcome_table}rel" .format(**options)) truncate_table( "{grid_outcome_schema}.{grid_outcome_table}rel".format(**options)) from footprint.main.publishing.data_import_publishing import create_and_populate_relations create_and_populate_relations( self.config_entity, self.config_entity.computed_db_entities( key=DbEntityKey.PH_GRID_OUTCOMES)[0])
def run_analysis(self, **kwargs): start_time = time.time() self.report_progress(0.1, **kwargs) self.climate_zone_class = self.config_entity.db_entity_feature_class(DbEntityKey.CLIMATE_ZONES) self.energy_class = self.config_entity.db_entity_feature_class(DbEntityKey.ENERGY) self.base_class = self.config_entity.db_entity_feature_class(DbEntityKey.BASE_CANVAS) self.rel_table = parse_schema_and_table(self.energy_class._meta.db_table)[1] self.rel_column = self.energy_class._meta.parents.values()[0].column options = dict( energy_result_table=self.energy_class.db_entity_key, energy_schema=parse_schema_and_table(self.energy_class._meta.db_table)[0], base_table=self.base_class.db_entity_key, base_schema=parse_schema_and_table(self.base_class._meta.db_table)[0], ) logger.info("Running Energy Updater Tool with options %s" % options) if isinstance(self.config_entity.subclassed, FutureScenario): self.end_state_class = self.config_entity.db_entity_feature_class(DbEntityKey.END_STATE) logger.info("Running Future Calculations") energy_output_list, options = self.run_future_calculations(options, **kwargs) else: logger.info("Running Base Calculations") energy_output_list, options = self.run_base_calculations(options, **kwargs) logger.info("Writing to feature table {energy_schema}.{energy_result_table}".format(**options)) self.write_results_to_database(options, energy_output_list) logger.info("Writing to relative table {energy_schema}.{rel_table}".format( energy_schema=options['energy_schema'], rel_table=self.rel_table)) updated = datetime.datetime.now() truncate_table(options['energy_schema'] + '.' + self.rel_table) pSql = ''' insert into {energy_schema}.{rel_table} ({rel_column}, updated) select id, '{updated}' from {energy_schema}.{energy_result_table};'''.format( energy_schema=options['energy_schema'], energy_result_table=options['energy_result_table'], rel_table=self.rel_table, rel_column=self.rel_column, updated=updated) execute_sql(pSql) from footprint.main.publishing.data_import_publishing import create_and_populate_relations create_and_populate_relations(self.config_entity, self.config_entity.computed_db_entities(key=DbEntityKey.ENERGY)[0]) self.report_progress(0.2, **kwargs) logger.info('Finished: ' + str(time.time() - start_time))
def run_analysis(self, **kwargs): start_time = time.time() self.report_progress(0.1, **kwargs) self.climate_zone_class = self.config_entity.db_entity_feature_class(DbEntityKey.CLIMATE_ZONES) self.energy_class = self.config_entity.db_entity_feature_class(DbEntityKey.ENERGY) self.base_class = self.config_entity.db_entity_feature_class(DbEntityKey.BASE_CANVAS) self.rel_table = parse_schema_and_table(self.energy_class._meta.db_table)[1] self.rel_column = self.energy_class._meta.parents.values()[0].column options = dict( energy_result_table=self.energy_class.db_entity_key, energy_schema=parse_schema_and_table(self.energy_class._meta.db_table)[0], base_table=self.base_class.db_entity_key, base_schema=parse_schema_and_table(self.base_class._meta.db_table)[0], ) logger.info("Running Energy Updater Tool with options %s" % options) if isinstance(self.config_entity.subclassed, FutureScenario): self.end_state_class = self.config_entity.db_entity_feature_class(DbEntityKey.END_STATE) logger.info("Running Future Calculations") energy_output_list, options = self.run_future_calculations(options, **kwargs) else: logger.info("Running Base Calculations") energy_output_list, options = self.run_base_calculations(options, **kwargs) logger.info("Writing to feature table {energy_schema}.{energy_result_table}".format(**options)) self.write_results_to_database(options, energy_output_list) logger.info("Writing to relative table {energy_schema}.{rel_table}".format( energy_schema=options['energy_schema'], rel_table=self.rel_table)) updated = datetime.datetime.now() truncate_table(options['energy_schema'] + '.' + self.rel_table) pSql = ''' insert into {energy_schema}.{rel_table} ({rel_column}, updated) select id, '{updated}' from {energy_schema}.{energy_result_table};'''.format( energy_schema=options['energy_schema'], energy_result_table=options['energy_result_table'], rel_table=self.rel_table, rel_column=self.rel_column, updated=updated) execute_sql(pSql) from footprint.main.publishing.data_import_publishing import create_and_populate_relations create_and_populate_relations(self.config_entity, self.config_entity.computed_db_entities(key=DbEntityKey.ENERGY)[0]) self.report_progress(0.2, **kwargs) logger.info('Finished: ' + str(time.time() - start_time))
def write_results_to_database(self, options, public_health_output_list): drop_table('{grid_outcome_schema}.{grid_outcome_table}'.format(**options)) attribute_list = filter(lambda x: x != 'id', self.outcome_fields) options['output_field_syntax'] = 'id int, ' + \ create_sql_calculations(attribute_list, '{0} numeric(20,8)') execute_sql("create table {grid_outcome_schema}.{grid_outcome_table} ({output_field_syntax});".format( **options)) output_textfile = StringIO("") for row in public_health_output_list: stringrow = [] for item in row: if isinstance(item, int): stringrow.append(str(item)) else: stringrow.append(str(round(item, 8))) output_textfile.write("\t".join(stringrow) + "\n") output_textfile.seek(os.SEEK_SET) #copy text file output back into Postgres copy_from_text_to_db(output_textfile, '{grid_outcome_schema}.{grid_outcome_table}'.format(**options)) output_textfile.close() ##--------------------------- pSql = '''alter table {grid_outcome_schema}.{grid_outcome_table} add column wkb_geometry geometry (GEOMETRY, 4326);'''.format(**options) execute_sql(pSql) pSql = '''update {grid_outcome_schema}.{grid_outcome_table} b set wkb_geometry = st_setSRID(a.wkb_geometry, 4326) from (select id, wkb_geometry from {source_grid_schema}.{source_grid_table}) a where cast(a.id as int) = cast(b.id as int); '''.format(**options) execute_sql(pSql) add_geom_idx(options['grid_outcome_schema'], options['grid_outcome_table'], 'wkb_geometry') add_primary_key(options['grid_outcome_schema'], options['grid_outcome_table'], 'id') # Since not every grid cell results in a grid_outcome, we need to wipe out the rel # table and recreate it to match the base grid_coutcome table. Otherwise there will # be to many rel table rows and cloning the DbEntity or ConfigEntity will fail logger.info("Writing to relative table {grid_outcome_schema}.{grid_outcome_table}rel".format(**options)) truncate_table("{grid_outcome_schema}.{grid_outcome_table}rel".format(**options)) from footprint.main.publishing.data_import_publishing import create_and_populate_relations create_and_populate_relations( self.config_entity, self.config_entity.computed_db_entities(key=DbEntityKey.PH_GRID_OUTCOMES)[0])
def run_water_calculations(self, **kwargs): self.format_policy_inputs() self.water_class = self.config_entity.db_entity_feature_class( DbEntityKey.WATER) self.base_class = self.config_entity.db_entity_feature_class( DbEntityKey.BASE_CANVAS) self.climate_zone_class = self.config_entity.db_entity_feature_class( DbEntityKey.CLIMATE_ZONES) self.rel_table = parse_schema_and_table( self.water_class._meta.db_table)[1] self.rel_column = self.water_class._meta.parents.values()[0].column if isinstance(self.config_entity.subclassed, FutureScenario): self.report_progress(0.2, **kwargs) self.end_state_class = self.config_entity.db_entity_feature_class( DbEntityKey.END_STATE) water_output_list, options = self.run_future_water_calculations( **kwargs) else: self.report_progress(0.2, **kwargs) water_output_list, options = self.run_base_water_calculations() self.report_progress(0.7, **kwargs) self.write_water_results_to_database(options, water_output_list) updated = datetime.datetime.now() truncate_table(options['water_schema'] + '.' + self.rel_table) pSql = ''' insert into {water_schema}.{rel_table} ({rel_column}, updated) select id, '{updated}' from {water_schema}.{water_result_table};'''.format( water_schema=options['water_schema'], water_result_table=options['water_result_table'], rel_table=self.rel_table, rel_column=self.rel_column, updated=updated) execute_sql(pSql) from footprint.main.publishing.data_import_publishing import create_and_populate_relations create_and_populate_relations( self.config_entity, self.config_entity.computed_db_entities(key=DbEntityKey.WATER)[0]) self.report_progress(0.10000001, **kwargs)
def run_water_calculations(self, **kwargs): self.format_policy_inputs() self.water_class = self.config_entity.db_entity_feature_class(DbEntityKey.WATER) self.base_class = self.config_entity.db_entity_feature_class(DbEntityKey.BASE_CANVAS) self.climate_zone_class = self.config_entity.db_entity_feature_class(DbEntityKey.CLIMATE_ZONES) self.rel_table = parse_schema_and_table(self.water_class._meta.db_table)[1] self.rel_column = self.water_class._meta.parents.values()[0].column if isinstance(self.config_entity.subclassed, FutureScenario): self.report_progress(0.2, **kwargs) self.end_state_class = self.config_entity.db_entity_feature_class(DbEntityKey.END_STATE) water_output_list, options = self.run_future_water_calculations(**kwargs) else: self.report_progress(0.2, **kwargs) water_output_list, options = self.run_base_water_calculations() self.report_progress(0.7, **kwargs) self.write_water_results_to_database(options, water_output_list) updated = datetime.datetime.now() truncate_table(options['water_schema'] + '.' + self.rel_table) pSql = ''' insert into {water_schema}.{rel_table} ({rel_column}, updated) select id, '{updated}' from {water_schema}.{water_result_table};'''.format( water_schema=options['water_schema'], water_result_table=options['water_result_table'], rel_table=self.rel_table, rel_column=self.rel_column, updated=updated) execute_sql(pSql) from footprint.main.publishing.data_import_publishing import create_and_populate_relations create_and_populate_relations(self.config_entity, self.config_entity.computed_db_entities(key=DbEntityKey.WATER)[0]) self.report_progress(0.10000001, **kwargs)
def importer(self, config_entity, db_entity, **kwargs): """ Replaces the normal ImportProcessor importer with one to import a sql from disk """ if InformationSchema.objects.table_exists(db_entity.schema, db_entity.table): # The table already exists. Skip the import an log a warning logger.warn("The target table for the feature table import already exists. Skipping table import.") else: # We don't store the upload_id alone, so pull it off the url upload_id = db_entity.url.replace('file:///tmp/', '').replace('.sql.zip', '') # Unpack the zipfile and return the path the sql file was placed at if db_entity.url.startswith('file://'): file_path = db_entity.url[len('file://'):] logger.warn(file_path) path = unpack_zipfile(file_path, upload_id) # The file is always the name of the table defined therein table_name = path.split('/')[-1].split('.')[0].lower() db_entity.url = 'file://%s' % path # Update the db_entity.url from the zip file url to the file_path # This lets ImportData find it. logger.info("Url of DbEntity is %s" % db_entity.url) db_entity.save() # Perform some sed updates to get the sql file ready for import regex_substitutions = [] sql_file_path = file_url_to_path(db_entity.url) # Add IF EXISTS to the drop table to prevent an error if IF EXISTS doesn't exist yet regex_substitutions.append((r'DROP TABLE (?!IF EXISTS)', r'DROP TABLE IF EXISTS')) # TODO temp, fix an AC bug. It seems that using a capitalized column is problematic (?) # The suggested solution is to double quote it, but quotes cause other problems, so we simply lowercase regex_substitutions.append((r' OGC_FID ', ' ogc_fid ', (4, 4))) # only line 4 regex_substitutions.append((r'PRIMARY KEY \(ogc_fid\)', 'PRIMARY KEY (ogc_fid)', (4, 4))) # only line 4 # TODO end temp fix # Update the index name to include the schema. This format matches that created for preconfigured feature # tables (see import_data.py) spatial_index_name = '{schema}_{key}_geom_idx'.format(schema=db_entity.schema, key=db_entity.key) regex_substitutions.append((r'CREATE INDEX ".*" ON', 'CREATE INDEX "%s" ON' % spatial_index_name, (6, 6))) # only line 6 6 # Remove the reference to the geometry_columns, since we use a materialized view regex_substitutions.append((r'^DELETE FROM geometry_columns', '--DELETE FROM geometry_columns', (2, 2))) # Update the sql to have a unique table name which matches the DbEntity key # Also change public to our import schema to keep it from causing trouble in the public schema # Otherwise we run into all kinds of trouble trying to get the SQL into the system regex_substitutions.append((r'"public"."%s"' % table_name, '"import"."%s"' % db_entity.key)) regex_substitutions.append((r"'%s'" % table_name, "'%s'" % db_entity.key, (2, 5))) regex_substitutions.append((r'"%s_pk"' % table_name, '"%s_pk"' % db_entity.key, (4, 4))) # Update public to the import schema regex_substitutions.append((r"AddGeometryColumn\('public'", "AddGeometryColumn('%s'" % settings.IMPORT_SCHEMA, (5, 5))) regex_substitutions.append((r'"%s_wkb_geometry_geom_idx"' % table_name, '"%s_wkb_geometry_geom_idx"' % db_entity.key, (6, 6))) for command in regex_substitutions: logger.info("Applying the following substitution %s" % ', '.join(command[0:2])) apply_regexes_to_file(sql_file_path, regex_substitutions) ImportData(config_entity=config_entity, db_entity_key=db_entity.key).run() # Add our normal primary key in the id column if negit eded add_primary_key_if_needed(db_entity) feature_class_creator = FeatureClassCreator(config_entity, db_entity) # Inspect the imported table to create the feature_class_configuration feature_class_configuration = feature_class_creator.feature_class_configuration_from_introspection() # Merge the created feature_class_configuration with the on already defined for the db_entity feature_class_creator.update_db_entity(feature_class_configuration) logger.info("Finished import for DbEntity: %s, feature_class_configuration: %s" % (db_entity, db_entity.feature_class_configuration)) # Create association classes and tables and populate them with data create_and_populate_relations(config_entity, feature_class_creator.db_entity)