Ejemplo n.º 1
0
def query_geo_table(
        db_name,
        query,
        geom_col='geom',
        host='localhost',
        username='******',
        password=PG_PASSWORD,
        debug=False
):
    """
    Query a geo table in a SQL database and get the result as a ``geopandas.GeoDataFrame``

    Be aware of the name of the geometry column. In PostGIS it's typically called 'geom',
    but geopandas seems to expect 'geometry' instead.

    :param db_name: 'name_of_the_database'
    :param query: 'SELECT gid, pop2015, geom FROM my_table WHERE pop2015 > 1000'
    :param geom_col: the name of the geometry column. Should either be 'geom' or 'geometry'
    :param host: by default is 'localhost', but could also be '192.168.1.14'
    :return: ``geopandas.GeoDataFrame``
    """
    config = {'host': host, 'password': password, 'username': username, 'debug': debug}

    if debug:
        print('QUERYING...')
        print('-' * 40)
        print(query)

    connection = make_database_connection(db_name, 'psycopg2', **config)

    gdf = gpd.GeoDataFrame.from_postgis(query, connection, geom_col=geom_col)

    connection.close()

    return gdf
Ejemplo n.º 2
0
def query_table(
        db_name,
        query,
        host='localhost',
        username='******',
        password=PG_PASSWORD,
        debug=False
):
    """
    Query a table in a database and get the result as a ``pandas.DataFrame``

    :param db_name: 'name_of_the_database'
    :param query: 'SELECT * FROM my_table'
    :param host: by default is 'localhost', but could also be '192.168.1.14'
    :return: ``pandas.DataFrame``
    """
    config = {'host': host, 'password': password, 'username': username, 'debug': debug}

    if debug:
        print('QUERYING...')
        print('-' * 40)
        print(query)

    engine = make_database_connection(db_name, 'sqlalchemy', **config)

    df = pd.read_sql(query, engine)

    engine.dispose()

    return df
Ejemplo n.º 3
0
def dataframe_to_postgis(db_name,
                         dataframe,
                         table_name,
                         host='localhost',
                         username='******',
                         password=PG_PASSWORD,
                         debug=False):
    """
    Write a ``pandas.DataFrame`` to a PostgreSQL database.

    :param db_name: 'name_of_the_database'
    :param dataframe: ``pandas.DataFrame``
    :param table_name: 'name_of_the_table'
    :param host: by default is 'localhost', but could also be '192.168.1.14'
    :return: None
    """
    config = {
        'host': host,
        'password': password,
        'username': username,
        'debug': debug
    }

    start_time = time.time()

    if debug:
        print(f'WRITING {table_name}//{db_name}//{host} FROM DATAFRAME')

    # FORCE ALL COLUMN NAMES TO LOWER-CASE (pgSQL requirement)
    dataframe.columns = [x.lower() for x in dataframe.columns]

    # CONNECT TO DATABASE, WRITE DATAFRAME, THEN DISCONNECT
    engine = make_database_connection(db_name, 'sqlalchemy', **config)
    dataframe.to_sql(table_name, engine, if_exists='replace')
    engine.dispose()

    if debug:
        # REPORT THE RUNTIME
        runtime = time.time() - start_time
        print(f'FINISHED IN {runtime} SECONDS')
Ejemplo n.º 4
0
def execute_query(database,
                  query,
                  host='localhost',
                  username='******',
                  password=PG_PASSWORD,
                  debug=False):
    """
    Use ``psycopg2`` to execute and commit a SQL command in the database.

    :param database: 'name_of_the_database'
    :param query: 'DROP VIEW IF EXISTS my_view;'
    :param host: by default is 'localhost', but could also be '192.168.1.14'
    :param debug: bool, controls whether or not the query gets printed out to the console
    :return: None
    """
    config = {
        'host': host,
        'password': password,
        'username': username,
        'debug': debug
    }

    if debug:
        start_time = time.time()
        print('UPDATING via psycopg2:')
        print('\t', query)

    connection = make_database_connection(database, 'psycopg2', **config)
    cursor = connection.cursor()

    cursor.execute(query)

    cursor.close()
    connection.commit()
    connection.close()

    if debug:
        runtime = round(time.time() - start_time, 2)
        print(f'\t COMMITTED IN - {runtime} seconds')
Ejemplo n.º 5
0
def geodataframe_to_postgis(database,
                            geodataframe,
                            output_table_name,
                            output_epsg=None,
                            host='localhost',
                            username='******',
                            password=PG_PASSWORD,
                            debug=False):
    """
    Write a ``geopandas.GeoDataFrame`` to a PostGIS table in a SQL database.

    Assumes that the geometry column has already been named 'geometry'

    :param database: 'name_of_the_database'
    :param geodataframe: geopandas.GeoDataFrame
    :param output_table_name: 'name_of_the_output_table'
    :param output_epsg: if not None, will reproject data from input EPSG to specified EPSG
    :param host: by default is 'localhost', but could also be '192.168.1.14'
    :return: None
    """
    start_time = time.time()
    config = {
        'host': host,
        'password': password,
        'username': username,
        'debug': debug
    }

    # Get the geometry type
    # It's possible there are both MULTIPOLYGONS and POLYGONS. This grabs the MULTI variant
    geom_types = list(geodataframe.geometry.geom_type.unique())
    geom_typ = max(geom_types, key=len).upper()

    if debug:
        print(f'\t PROCESSING \t {geom_typ} \t {output_table_name}')

    # Get the EPSG value
    try:
        # gdf should have a CRS stored like this: {'init': 'epsg:4326'}
        epsg_code = int(geodataframe.crs['init'].split(':')[1])
    except:
        return 'This geodataframe does not have a valid EPSG. Aborting.'

    # Sanitize the columns before writing to the database
    # Make all column names lower case
    geodataframe.columns = [x.lower() for x in geodataframe.columns]

    # Replace the 'geom' column with 'geometry'
    if 'geom' in geodataframe.columns:
        geodataframe['geometry'] = geodataframe['geom']
        geodataframe.drop('geom', 1, inplace=True)

    # Drop the 'gid' column
    if 'gid' in geodataframe.columns:
        geodataframe.drop('gid', 1, inplace=True)

    # Rename 'unique_id' to 'old_uid'
    if 'unique_id' in geodataframe.columns:
        geodataframe['old_uid'] = geodataframe['unique_id']
        geodataframe.drop('unique_id', 1, inplace=True)

    # Build a 'geom' column using geoalchemy2 and drop the source 'geometry' column
    geodataframe['geom'] = geodataframe['geometry'].apply(
        lambda x: WKTElement(x.wkt, srid=epsg_code))
    geodataframe.drop('geometry', 1, inplace=True)

    # write geodataframe to SQL database
    if debug:
        print(f'\t WRITING TO - {database} /// {host}')

    engine = make_database_connection(database, 'sqlalchemy', **config)
    geodataframe.to_sql(output_table_name,
                        engine,
                        if_exists='replace',
                        index=True,
                        index_label='gid',
                        dtype={'geom': Geometry(geom_typ, srid=epsg_code)})
    engine.dispose()

    if debug:
        runtime = round((time.time() - start_time), 2)
        print(f'\t FINISHED IN {runtime} seconds')

    # If provided an EPSG, alter whatever the native projection was to the output_epsg
    if output_epsg:
        project_spatial_table(database, output_table_name, geom_typ, epsg_code,
                              output_epsg, **config)

    # Add a unique_id column and do a spatial index
    prep_spatial_table(database, output_table_name, **config)
Ejemplo n.º 6
0
def make_new_database(database_name: str,
                      host: str = 'localhost',
                      username: str = 'postgres',
                      password: str = PG_PASSWORD,
                      debug: bool = False):
    """
    Create a new PostgreSQL database, load PostGIS, and define a custom hexagon function

    :param database_name: 'name_of_the_database'
    :param host: name of the pgSQL host (string). eg: 'localhost' or '192.168.1.14'
    :param username: valid PostgreSQL database username (string). eg: 'postgres'
    :param password: password for the supplied username (string). eg: 'mypassword123'
    :param debug: boolean to print messages to console
    :return: None
    """

    config = {
        'host': host,
        'password': password,
        'username': username,
        'debug': debug
    }

    # check to see if this database already exists
    exists_qry = f""" SELECT EXISTS(
                        SELECT datname FROM pg_catalog.pg_database WHERE lower(datname) = lower('{database_name}')
                     );  """

    exist_query_response = query_table('postgres', exists_qry, **config)

    exists_result = [
        str(row.exists) for idx, row in exist_query_response.iterrows()
    ]

    if 'False' in exists_result:
        if debug:
            print(f'Make new database routine for: {database_name}')

        make_db = f"CREATE DATABASE {database_name};"

        c_postgres = make_database_connection('postgres', 'psycopg2', **config)
        c_postgres.set_isolation_level(
            psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
        cursor = c_postgres.cursor()

        cursor.execute(make_db)

        cursor.close()
        c_postgres.commit()
        c_postgres.close()

        # Check if PostGIS already exists by default. If not, add it.
        if 'geometry_columns' not in get_full_list_of_tables_in_db(
                database_name, **config):
            msg = f'Adding postGIS extension to {database_name}'
            execute_query(database_name, 'CREATE EXTENSION postGIS;', **config)
        else:
            msg = f'PostGIS exists by default in {database_name}'
        if debug:
            print(msg)

        # Load the custom SQL hexagon grid function
        load_hexgrid_function(database_name, **config)

    else:
        if debug:
            print(f'{database_name} already exists at {host}')