Exemple #1
0
    def handle(self, *args, **options):
        drop_db('sample_data')
        project = Project.objects.all()[0]
        client_fixture = ConfigEntityFixture.resolve_config_entity_fixture(
            project)
        default_db_entities = client_fixture.default_db_entities
        for db_entity_config in default_db_entities:
            importer = ImportData(config_entity=project,
                                  db_entity=db_entity_config)
            importer.target_database = settings.DATABASES['sample_data']
            importer.create_target_db_string()

            # For now we only import data for DbEntity instances with a configured database url
            connection_dict = postgres_url_to_connection_dict(
                db_entity_config['url'])
            # The import database currently stores tables as public.[config_entity.key]_[feature_class._meta.db_table (with schema removed)][_sample (for samples)]
            # We always use the table name without the word sample for the target table name
            source_table = "{0}_{1}_{2}".format(project.key,
                                                db_entity_config['table'],
                                                'sample')
            importer._dump_tables_to_target('-t %s' % source_table,
                                            source_schema='public',
                                            target_schema='public',
                                            source_table=source_table,
                                            target_table=source_table,
                                            connection_dict=connection_dict)
    def __init__(self, **arguments):

        self.arguments = arguments
        self.dump_only = self.arguments.get('dump_only', None)
        self.region_key = self.arguments.get('schema', None)
        self.target_database = database_settings('default')
        # The config_entity whose feature tables should be imported
        self.config_entity = self.arguments.get('config_entity', None)
        if self.config_entity:
            logger.info(
                "Importing DbEntity table into ConfigEntity {0}".format(
                    self.config_entity.subclassed))
        # The optional db_entity_key whose Feature class table should be imported. Otherwise all DbEntity tables
        # are imported for the config_entity, including inherited ones from parent ConfigEntities
        self.db_entity_key = self.arguments.get('db_entity_key', None)
        self.db_entities = filter(
            lambda db_entity: not self.db_entity_key or
            (db_entity.key == self.db_entity_key),
            self.config_entity.owned_db_entities())
        self.test = self.arguments.get('test', None)

        # The psql connection to the target server, normally the django server
        self.create_target_db_string()

        self.command_execution = CommandExecution(logger)

        self.target_connection_dict = dict(
            user=self.target_database['USER'],
            password=self.target_database['PASSWORD'],
            host=self.target_database.get('HOST', 'localhost'),
            port=self.target_database.get('PORT', 5432),
            database=self.target_database['NAME'])

        # Used to get around password authentication
        self.connections = [
            "{host}:*:*:{user}:{password}".format(
                **dict(host=self.target_database['HOST'],
                       user=self.target_database['USER'],
                       password=self.target_database['PASSWORD']))
        ]

        for db_entity in self.db_entities:
            # Create a password file in order to avoid dealing with stdin for passwords
            # This has been bypassed in favor of passing the password to stdin
            if not (db_entity.has_db_url or db_entity.has_file_url):
                raise Exception(
                    "This db_entity, {0}, has no database or file url".format(
                        db_entity.key))
            if db_entity.has_db_url:
                # Setup the connection strings for the db_entity so that we can get around interactive password authentication
                # TODO This is never distinct per db_entity. We could just use self.target_connection_dict
                connection_dict = postgres_url_to_connection_dict(
                    db_entity.url)
                self.connections.append(
                    "{host}:*:*:{user}:{password}".format(**connection_dict))
Exemple #3
0
    def __init__(self, **arguments):

        self.arguments = arguments
        self.dump_only = self.arguments.get('dump_only', None)
        self.region_key = self.arguments.get('schema', None)
        self.target_database = database_settings('default')
        # The config_entity whose feature tables should be imported
        self.config_entity = self.arguments.get('config_entity', None)
        if self.config_entity:
            logger.info("Importing DbEntity table into ConfigEntity {0}".format(self.config_entity.subclassed))
        # The optional db_entity_key whose Feature class table should be imported. Otherwise all DbEntity tables
        # are imported for the config_entity, including inherited ones from parent ConfigEntities
        self.db_entity_key = self.arguments.get('db_entity_key', None)
        self.db_entities = filter(lambda db_entity: not self.db_entity_key or (db_entity.key == self.db_entity_key),
                                  self.config_entity.owned_db_entities())
        self.test = self.arguments.get('test', None)

        # The psql connection to the target server, normally the django server
        self.create_target_db_string()

        self.command_execution = CommandExecution(logger)

        self.target_connection_dict = dict(
            user=self.target_database['USER'],
            password=self.target_database['PASSWORD'],
            host=self.target_database.get('HOST', 'localhost'),
            port=self.target_database.get('PORT', 5432),
            database=self.target_database['NAME']
        )

        # Used to get around password authentication
        self.connections = ["{host}:*:*:{user}:{password}".format(**dict(
                    host=self.target_database['HOST'],
                    user=self.target_database['USER'],
                    password=self.target_database['PASSWORD']))]


        for db_entity in self.db_entities:
            # Create a password file in order to avoid dealing with stdin for passwords
            # This has been bypassed in favor of passing the password to stdin
            if not (db_entity.has_db_url or db_entity.has_file_url):
                raise Exception("This db_entity, {0}, has no database or file url".format(db_entity.key))
            if db_entity.has_db_url:
                # Setup the connection strings for the db_entity so that we can get around interactive password authentication
                # TODO This is never distinct per db_entity. We could just use self.target_connection_dict
                connection_dict = postgres_url_to_connection_dict(db_entity.url)
                self.connections.append("{host}:*:*:{user}:{password}".format(**connection_dict))
Exemple #4
0
    def handle(self, *args, **options):
        drop_db('sample_data')
        project = Project.objects.all()[0]
        client_fixture = ConfigEntityFixture.resolve_config_entity_fixture(project)
        default_db_entities = client_fixture.default_db_entities
        for db_entity_config in default_db_entities:
            importer = ImportData(config_entity=project, db_entity=db_entity_config)
            importer.target_database = settings.DATABASES['sample_data']
            importer.create_target_db_string()

            # For now we only import data for DbEntity instances with a configured database url
            connection_dict = postgres_url_to_connection_dict(db_entity_config['url'])
            # The import database currently stores tables as public.[config_entity.key]_[feature_class._meta.db_table (with schema removed)][_sample (for samples)]
            # We always use the table name without the word sample for the target table name
            source_table = "{0}_{1}_{2}".format(project.key, db_entity_config['table'], 'sample')
            importer._dump_tables_to_target('-t %s' % source_table,
                                            source_schema='public',
                                            target_schema='public',
                                            source_table=source_table,
                                            target_table=source_table,
                                            connection_dict=connection_dict)
Exemple #5
0
    def import_data(self, **kwargs):
        """
            Imports data from an external source to create the test data
            :return a two item tuple containing the region that was imported and a list of the imported projects
        """

        # Calculate a sample lat/lon box of the config_entity
        config_entity = self.config_entity
        if self.test:
            bounds = chop_geom(config_entity.bounds, 0.90)
            logger.info(u"Creating subselection with extents: {0}. This will be used to crop any table that doesn't have a sample version".format(bounds))

        conn = psycopg2.connect(**pg_connection_parameters(settings.DATABASES['default']))
        conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
        cursor = conn.cursor()

        for db_entity in self.db_entities:

            # This is the index on wkb_geometry.
            spatial_index_name = '{schema}_{key}_geom_idx'.format(schema=db_entity.schema, key=db_entity.key)

            table = db_entity.table

            if db_entity.has_file_url:
                # Remove any table of the same name from the import schema. This is unlikley since imported
                # tables have timestamps
                drop_table('"%s"."%s"' % (settings.IMPORT_SCHEMA, db_entity.key))
                sql_file_path = file_url_to_path(db_entity.url)
                # Create a command that pipes shp2pgsql to psql
                db_entity.srid = db_entity.srid or '4326'
                logger.info("verifying SRID {0}".format(db_entity.srid))
                verify_srid(db_entity.srid)

                # Create the import schema if needed
                PGNamespace.objects.create_schema(settings.IMPORT_SCHEMA)

                # Import the table
                import_sql_command = '/usr/bin/psql {0} -f {1}'.format(self.target_database_connection, sql_file_path)
                stdin = "{0}\n{1}".format(self.arguments.get('password', None), self.target_database.get('PASSWORD', None))
                results = self.command_execution.run(import_sql_command, stdin=stdin)
                if results.returncode:
                    raise Exception(results.stderr.text)

                # We expect a table in the public schema with a named based on db_entity.key
                # Move the table from the public schema to the db_entity schema
                move_to_schema = "alter table {0}.{1} set schema {2};".format(settings.IMPORT_SCHEMA, db_entity.key, db_entity.schema)
                logger.info("Moving import file table to schema: %s" % move_to_schema)
                cursor.execute(move_to_schema)
                # Drop the constraint that enforces the srid of the wkb_geometry if one exists
                drop_constraint = '''alter table {0}.{1} drop constraint if exists enforce_srid_wkb_geometry'''.format(db_entity.schema, db_entity.key)
                logger.info("Dropping constraint on wkb_geometry: %s" % drop_constraint)
                cursor.execute(drop_constraint)

                # Note we're not creating an index on wkb_geometry
                # here because imported files already have an index
                # created.

            elif db_entity.has_db_url:
                # The import database currently stores tables as
                # public.[config_entity.key]_[feature_class._meta.db_table (with schema removed)][_sample (for samples)]
                #
                # We always use the table name without the word sample for the target table name
                if settings.USE_SAMPLE_DATA_SETS or self.test:
                    source_table = "{0}_{1}_{2}".format(
                        config_entity.import_key or config_entity.key, db_entity.table, 'sample')
                else:
                    source_table = "{0}_{1}".format(config_entity.import_key or config_entity.key, db_entity.table)

                connection_dict = postgres_url_to_connection_dict(db_entity.url)
                self._dump_tables_to_target(
                    '-t %s' % source_table,
                    source_schema='public',
                    target_schema=db_entity.schema,
                    source_table=source_table,
                    target_table=table,
                    connection_dict=connection_dict)

                # Create a spatial index
                spatial_index = '''create index {index_name} on {schema}.{key} using GIST (wkb_geometry);'''.format(
                    index_name=spatial_index_name,
                    schema=db_entity.schema, key=db_entity.key)
                cursor.execute(spatial_index)

            # Whether the table comes from our server or an upload, we want to transform the SRID to 4326
            transform_to_4326 = 'ALTER TABLE {schema}.{table} ALTER COLUMN wkb_geometry ' \
                                'TYPE Geometry(geometry, 4326) ' \
                                'USING ST_Transform(ST_Force_2d(wkb_geometry), 4326);'.format
            logger.info("Transforming to 4326: %s" % transform_to_4326(schema=db_entity.schema, table=db_entity.table))

            cursor.execute(transform_to_4326(schema=db_entity.schema, table=db_entity.table))

            # Now cluster the data and vacuum so that future joins are faster:
            # * CLUSTER rewrites the data on disk so that rows that are spatially near each
            #   other are also near each other on disk
            # * VACUUM cleans up disk space, removing sparse holes on disk.
            # * ANALYZE regenerates statistics about wkb_geometry so that the query planner can make
            #   better decisions.

            logger.info('Clustering %s.%s to optimize spatial joins', db_entity.schema, table)
            cluster = 'CLUSTER {index_name} ON {target_schema}.{target_table};'.format(
                index_name=spatial_index_name,
                target_schema=db_entity.schema,
                target_table=table)
            cursor.execute(cluster)

            logger.info('Vacuuming and analyzing %s.%s.', db_entity.schema, table)
            analyze = 'VACUUM ANALYZE {target_schema}.{target_table};'.format(
                target_schema=db_entity.schema,
                target_table=table)

            cursor.execute(analyze)

            logger.info("Finished importing data for DbEntity table {0}.{1}".format(db_entity.schema, db_entity.key))
    def import_data(self, **kwargs):
        """
            Imports data from an external source to create the test data
            :return a two item tuple containing the region that was imported and a list of the imported projects
        """

        # Calculate a sample lat/lon box of the config_entity
        config_entity = self.config_entity
        if self.test:
            bounds = chop_geom(config_entity.bounds, 0.90)
            logger.info(
                u"Creating subselection with extents: {0}. This will be used to crop any table that doesn't have a sample version"
                .format(bounds))

        conn = psycopg2.connect(
            **pg_connection_parameters(settings.DATABASES['default']))
        conn.set_isolation_level(
            psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
        cursor = conn.cursor()

        for db_entity in self.db_entities:

            # This is the index on wkb_geometry.
            spatial_index_name = '{schema}_{key}_geom_idx'.format(
                schema=db_entity.schema, key=db_entity.key)

            table = db_entity.table

            if db_entity.has_file_url:
                # Remove any table of the same name from the import schema. This is unlikley since imported
                # tables have timestamps
                drop_table('"%s"."%s"' %
                           (settings.IMPORT_SCHEMA, db_entity.key))
                sql_file_path = file_url_to_path(db_entity.url)
                # Create a command that pipes shp2pgsql to psql
                db_entity.srid = db_entity.srid or '4326'
                logger.info("verifying SRID {0}".format(db_entity.srid))
                verify_srid(db_entity.srid)

                # Create the import schema if needed
                PGNamespace.objects.create_schema(settings.IMPORT_SCHEMA)

                # Import the table
                import_sql_command = '/usr/bin/psql {0} -f {1}'.format(
                    self.target_database_connection, sql_file_path)
                stdin = "{0}\n{1}".format(
                    self.arguments.get('password', None),
                    self.target_database.get('PASSWORD', None))
                results = self.command_execution.run(import_sql_command,
                                                     stdin=stdin)
                if results.returncode:
                    raise Exception(results.stderr.text)

                # We expect a table in the public schema with a named based on db_entity.key
                # Move the table from the public schema to the db_entity schema
                move_to_schema = "alter table {0}.{1} set schema {2};".format(
                    settings.IMPORT_SCHEMA, db_entity.key, db_entity.schema)
                logger.info("Moving import file table to schema: %s" %
                            move_to_schema)
                cursor.execute(move_to_schema)
                # Drop the constraint that enforces the srid of the wkb_geometry if one exists
                drop_constraint = '''alter table {0}.{1} drop constraint if exists enforce_srid_wkb_geometry'''.format(
                    db_entity.schema, db_entity.key)
                logger.info("Dropping constraint on wkb_geometry: %s" %
                            drop_constraint)
                cursor.execute(drop_constraint)

                # Note we're not creating an index on wkb_geometry
                # here because imported files already have an index
                # created.

            elif db_entity.has_db_url:
                # The import database currently stores tables as
                # public.[config_entity.key]_[feature_class._meta.db_table (with schema removed)][_sample (for samples)]
                #
                # We always use the table name without the word sample for the target table name
                if settings.USE_SAMPLE_DATA_SETS or self.test:
                    source_table = "{0}_{1}_{2}".format(
                        config_entity.import_key or config_entity.key,
                        db_entity.table, 'sample')
                else:
                    source_table = "{0}_{1}".format(
                        config_entity.import_key or config_entity.key,
                        db_entity.table)

                connection_dict = postgres_url_to_connection_dict(
                    db_entity.url)
                self._dump_tables_to_target('-t %s' % source_table,
                                            source_schema='public',
                                            target_schema=db_entity.schema,
                                            source_table=source_table,
                                            target_table=table,
                                            connection_dict=connection_dict)

                # Create a spatial index
                spatial_index = '''create index {index_name} on {schema}.{key} using GIST (wkb_geometry);'''.format(
                    index_name=spatial_index_name,
                    schema=db_entity.schema,
                    key=db_entity.key)
                cursor.execute(spatial_index)

            # Whether the table comes from our server or an upload, we want to transform the SRID to 4326
            transform_to_4326 = 'ALTER TABLE {schema}.{table} ALTER COLUMN wkb_geometry ' \
                                'TYPE Geometry(geometry, 4326) ' \
                                'USING ST_Transform(ST_Force_2d(wkb_geometry), 4326);'.format
            logger.info("Transforming to 4326: %s" % transform_to_4326(
                schema=db_entity.schema, table=db_entity.table))

            cursor.execute(
                transform_to_4326(schema=db_entity.schema,
                                  table=db_entity.table))

            # Now cluster the data and vacuum so that future joins are faster:
            # * CLUSTER rewrites the data on disk so that rows that are spatially near each
            #   other are also near each other on disk
            # * VACUUM cleans up disk space, removing sparse holes on disk.
            # * ANALYZE regenerates statistics about wkb_geometry so that the query planner can make
            #   better decisions.

            logger.info('Clustering %s.%s to optimize spatial joins',
                        db_entity.schema, table)
            cluster = 'CLUSTER {index_name} ON {target_schema}.{target_table};'.format(
                index_name=spatial_index_name,
                target_schema=db_entity.schema,
                target_table=table)
            cursor.execute(cluster)

            logger.info('Vacuuming and analyzing %s.%s.', db_entity.schema,
                        table)
            analyze = 'VACUUM ANALYZE {target_schema}.{target_table};'.format(
                target_schema=db_entity.schema, target_table=table)

            cursor.execute(analyze)

            logger.info(
                "Finished importing data for DbEntity table {0}.{1}".format(
                    db_entity.schema, db_entity.key))