def exec_sql(sql, user=settings.get('DB_USER'), database=settings.get('DB_NAME')): connection = psycopg2.connect(user=user, dbname=database) connection.set_session(autocommit=True) cursor = connection.cursor() cursor.execute(sql) return cursor
def download_pbf(): if settings.get("PBF_FILE"): print("skip pbf download since PBF_FILE env is defined: {}".format(settings.get("PBF_FILE"))) return url = settings.get("PBF_FILE_URL") destination_dir = settings.get("IMPORT_DIR") check_call(["wget", "--no-clobber", "--directory-prefix", destination_dir, url])
def export_to_tsv(query, path): check_call([ "psql", "-c", "COPY ({}) TO STDOUT WITH DELIMITER '\t' CSV HEADER".format(query), "-o", path, settings.get("DB_USER"), settings.get("DB_NAME"), ])
def create_database(): create_user_query = "CREATE USER {} WITH PASSWORD '{}';".format( settings.get("DB_USER"), settings.get("DB_PASSWORD")) create_database_query = "CREATE DATABASE {} WITH TEMPLATE template_postgis OWNER {};".format( settings.get("DB_NAME"), settings.get("DB_USER")) exec_sql(create_user_query, user="******", database="postgres") exec_sql(create_database_query, user="******", database="postgres")
def restore_wikipedia_dump(): _create_temporary_user_for_dump() dump_filename = settings.get("WIKIPEDIA_DUMP_URL").split("/")[-1] dump_path = "{}/{}".format(settings.get("IMPORT_DIR"), dump_filename) check_call(["pg_restore", "-j", "2", "--dbname", "osm", "-U", "brian", dump_path]) _alter_wikipedia_dump_owner()
def download_pbf(): if settings.get("PBF_FILE"): print "skip pbf download since PBF_FILE env is defined: {}".format( settings.get("PBF_FILE")) return url = settings.get("PBF_FILE_URL") destination_dir = settings.get("IMPORT_DIR") check_call( ["wget", "--no-clobber", "--directory-prefix", destination_dir, url])
def _recreate_database(): print("drop database") drop_database_query = "DROP DATABASE IF EXISTS {};".format( settings.get("DB_NAME")) drop_user_query = "DROP USER IF EXISTS {};".format(settings.get("DB_USER")) exec_sql(drop_database_query, user="******", database="postgres") exec_sql(drop_user_query, user="******", database="postgres") print("create database") init_database()
def create_database(): create_user_query = "CREATE USER {} WITH PASSWORD '{}';".format(settings.get("DB_USER"), settings.get("DB_PASSWORD")) create_database_query = "CREATE DATABASE {} WITH TEMPLATE template_postgis OWNER {};".format( settings.get("DB_NAME"), settings.get("DB_USER") ) exec_sql(create_user_query, user="******", database="postgres") exec_sql(create_database_query, user="******", database="postgres")
def exec_sql_from_file(filename, user=settings.get("DB_USER"), database=settings.get("DB_NAME"), cwd=""): log.info("start executing sql file {}".format(filename)) check_call([ "psql", "-v", "ON_ERROR_STOP=1", "--username={}".format(user), "--dbname={}".format(settings.get("DB_NAME")), "--file={}/{}".format(cwd, filename) ], stdout=open(os.devnull, 'w') ) log.info("finished executing sql file {}".format(filename))
def exec_sql_from_file(filename, user=settings.get("DB_USER"), database=settings.get("DB_NAME"), cwd=""): log.info("start executing sql file {}".format(filename)) check_call([ "psql", "--username={}".format(user), "--dbname={}".format( settings.get("DB_NAME")), "--file={}/{}".format(cwd, filename) ], stdout=open(os.devnull, 'w')) log.info("finished executing sql file {}".format(filename))
def exec_sql_from_file(filename, user=settings.get("DB_USER"), database=settings.get("DB_NAME"), cwd="", parallelize=False): log.info("start executing sql file {}".format(filename)) check_call([ "par_psql" if parallelize else "psql", "-v", "ON_ERROR_STOP=1", "--username={}".format(user), "--dbname={}".format( settings.get("DB_NAME")), "--file={}/{}".format(cwd, filename) ], stdout=open(os.devnull, 'w')) log.info("finished executing sql file {}".format(filename))
def import_pbf_file(): import_dir = settings.get("IMPORT_DIR") pbf_filename = settings.get("PBF_FILE") or settings.get( "PBF_FILE_URL").split('/')[-1] pbf_filepath = import_dir + pbf_filename imposm_connection = "postgis://{user}@{host}/{db_name}".format( user=settings.get("DB_USER"), host=settings.get("DB_HOST"), db_name=settings.get("DB_NAME"), ) check_call([ "imposm", "import", "-connection", imposm_connection, "-mapping", "{}/mapping.yml".format(settings.get("IMPORT_DIR")), "-dbschema-import", settings.get("DB_SCHEMA"), "-read", pbf_filepath, "-write", "-overwritecache", ])
def vacuum_database(): if settings.get('SKIP_VACUUM'): return log.info("start vacuum database") check_call([ "vacuumdb", "--username=postgres", "--dbname={}".format(settings.get("DB_NAME")), "--analyze", "--jobs={}".format(settings.get('VACUUM_JOBS')), ], stdout=open(os.devnull, 'w')) log.info("finished vacuum database")
def vacuum_database(): if settings.get('SKIP_VACUUM'): return log.info("start vacuum database") exec_sql('VACUUM ANALYZE', user="******") log.info("finished vacuum database")
def _create_temporary_user_for_dump(): query = """ CREATE ROLE brian LOGIN PASSWORD 'brian'; GRANT ALL PRIVILEGES ON DATABASE {database} to brian; """.format(database=settings.get("DB_NAME")) exec_sql(query, user="******")
def _alter_wikipedia_dump_owner(): query = """ ALTER TABLE wikipedia_article OWNER TO {username}; ALTER TABLE wikipedia_redirect OWNER TO {username}; """.format(username=settings.get("DB_USER")) exec_sql(query, user="******")
def init_database(): exists_query = "SELECT 1 AS result FROM pg_database WHERE datname='{}'".format(settings.get("DB_NAME")) if exists(exists_query, user="******", database="postgres"): print("skip database init, since it is already initialized") return create_extensions() create_database() create_custom_types() disable_notices()
def import_wikipedia(): if exists( "SELECT * FROM information_schema.tables WHERE table_name='wikipedia_article'" ): log.info("skip wikipedia import, since table already exists") return if settings.get("SKIP_WIKIPEDIA"): log.info( "SKIP_WIKIPEDIA = True in .env file, therefore skipping import and only create basic scaffolding" ) create_basic_scaffolding() return download_dump(settings.get("WIKIPEDIA_DUMP_URL")) download_dump(settings.get("WIKIPEDIA_REDIRECTS_DUMP_URL")) restore_wikipedia_dumps() run_in_parallel(prepare_wikipedia_redirects, create_wikipedia_index)
def init_database(): exists_query = "SELECT 1 AS result FROM pg_database WHERE datname='{}'".format( settings.get("DB_NAME")) if exists(exists_query, user="******", database="postgres"): print("skip database init, since it is already initialized") return create_extensions() create_database() create_custom_types() disable_notices()
def _create_temporary_user_for_dump(): query = """ DO $$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'brian') THEN CREATE ROLE brian LOGIN PASSWORD 'brian'; END IF; END $$; GRANT ALL PRIVILEGES ON DATABASE {database} to brian; """.format(database=settings.get("DB_NAME")) exec_sql(query, user="******")
def restore_wikipedia_dumps(): _create_temporary_user_for_dump() article_dump_filename = settings.get("WIKIPEDIA_DUMP_URL").split("/")[-1] article_dump_path = "{}/{}".format(settings.get("IMPORT_DIR"), article_dump_filename) redirect_dump_filename = settings.get( "WIKIPEDIA_REDIRECTS_DUMP_URL").split("/")[-1] redirect_dump_path = "{}/{}".format(settings.get("IMPORT_DIR"), redirect_dump_filename) logged_check_call([ "pg_restore", "-j", "2", "--dbname", "osm", "-U", "brian", article_dump_path ]) logged_check_call([ "pg_restore", "-j", "2", "--dbname", "osm", "-U", "brian", redirect_dump_path ]) _alter_wikipedia_dump_owner()
def import_pbf_file(): import_dir = settings.get("IMPORT_DIR") pbf_filename = settings.get("PBF_FILE") or settings.get("PBF_FILE_URL").split('/')[-1] pbf_filepath = import_dir + pbf_filename imposm_connection = "postgis://{user}@{host}/{db_name}".format( user=settings.get("DB_USER"), host=settings.get("DB_HOST"), db_name=settings.get("DB_NAME"), ) check_call([ "imposm", "import", "-connection", imposm_connection, "-mapping", "{}/mapping.yml".format(settings.get("IMPORT_DIR")), "-dbschema-import", settings.get("DB_SCHEMA"), "-read", pbf_filepath, "-write", "-overwritecache", ])
from osmnames import settings from sqlalchemy.engine import create_engine from sqlalchemy.orm.session import Session engine = create_engine( "postgresql+psycopg2://{user}:{password}@{host}/{db_name}".format( user=settings.get("DB_USER"), password=settings.get("DB_PASSWORD"), host=settings.get("DB_HOST"), db_name=settings.get("DB_NAME"), ) ) def session(): return Session(engine)
def count(query, user=settings.get('DB_USER'), database=settings.get('DB_NAME')): return exec_sql(query, user, database).fetchone()[0]
def exists(query, user=settings.get('DB_USER'), database=settings.get('DB_NAME')): return exec_sql("SELECT EXISTS({});".format(query), user, database).fetchone()[0]
def geonames_export_path(): return "{}{}_geonames.tsv".format(settings.get("EXPORT_DIR"), imported_pbf_filename())
def housenumbers_export_path(): return "{}{}_housenumbers.tsv".format(settings.get("EXPORT_DIR"), imported_pbf_filename())
def download_wikipedia_dump(): url = settings.get("WIKIPEDIA_DUMP_URL") destination_dir = settings.get("IMPORT_DIR") check_call(["wget", "--no-clobber", "--directory-prefix", destination_dir, url])
def imported_pbf_filename(): filename_with_suffix = settings.get("PBF_FILE") or settings.get( "PBF_FILE_URL").split('/')[-1] return filename_with_suffix.split(".")[0]
def download_dump(url): destination_dir = settings.get("IMPORT_DIR") logged_check_call( ["wget", "--no-clobber", "--directory-prefix", destination_dir, url])
def imported_pbf_filename(): filename_with_suffix = settings.get("PBF_FILE") or settings.get("PBF_FILE_URL").split('/')[-1] return filename_with_suffix.split(".")[0]
def import_admin_level_type_mapping(): exec_sql_from_file("admin_level_type_mapping.sql", cwd="{}/sql/".format(settings.get("DATA_DIR")))
def _alter_wikipedia_dump_owner(): query = """ ALTER TABLE wikipedia_article OWNER TO {username}; """.format(username=settings.get("DB_USER")) exec_sql(query, user="******")
def import_country_osm_grid(): exec_sql_from_file("country_osm_grid.sql", cwd="{}/sql/".format(settings.get("DATA_DIR")))
def create_export_dir(): if not os.path.exists(settings.get("EXPORT_DIR")): os.makedirs(settings.get("EXPORT_DIR"))