Ejemplo n.º 1
0
def exec_sql(sql,
             user=settings.get('DB_USER'),
             database=settings.get('DB_NAME')):
    connection = psycopg2.connect(user=user, dbname=database)
    connection.set_session(autocommit=True)
    cursor = connection.cursor()
    cursor.execute(sql)
    return cursor
Ejemplo n.º 2
0
def download_pbf():
    if settings.get("PBF_FILE"):
        print("skip pbf download since PBF_FILE env is defined: {}".format(settings.get("PBF_FILE")))
        return

    url = settings.get("PBF_FILE_URL")
    destination_dir = settings.get("IMPORT_DIR")
    check_call(["wget", "--no-clobber", "--directory-prefix", destination_dir, url])
Ejemplo n.º 3
0
def export_to_tsv(query, path):
    check_call([
        "psql",
        "-c", "COPY ({}) TO STDOUT WITH DELIMITER '\t' CSV HEADER".format(query),
        "-o", path,
        settings.get("DB_USER"),
        settings.get("DB_NAME"),
        ])
Ejemplo n.º 4
0
def create_database():
    create_user_query = "CREATE USER {} WITH PASSWORD '{}';".format(
        settings.get("DB_USER"), settings.get("DB_PASSWORD"))
    create_database_query = "CREATE DATABASE {} WITH TEMPLATE template_postgis OWNER {};".format(
        settings.get("DB_NAME"), settings.get("DB_USER"))

    exec_sql(create_user_query, user="******", database="postgres")
    exec_sql(create_database_query, user="******", database="postgres")
Ejemplo n.º 5
0
def restore_wikipedia_dump():
    _create_temporary_user_for_dump()

    dump_filename = settings.get("WIKIPEDIA_DUMP_URL").split("/")[-1]
    dump_path = "{}/{}".format(settings.get("IMPORT_DIR"), dump_filename)

    check_call(["pg_restore", "-j", "2", "--dbname", "osm", "-U", "brian", dump_path])

    _alter_wikipedia_dump_owner()
Ejemplo n.º 6
0
def download_pbf():
    if settings.get("PBF_FILE"):
        print "skip pbf download since PBF_FILE env is defined: {}".format(
            settings.get("PBF_FILE"))
        return

    url = settings.get("PBF_FILE_URL")
    destination_dir = settings.get("IMPORT_DIR")
    check_call(
        ["wget", "--no-clobber", "--directory-prefix", destination_dir, url])
Ejemplo n.º 7
0
def export_to_tsv(query, path):
    check_call([
        "psql",
        "-c",
        "COPY ({}) TO STDOUT WITH DELIMITER '\t' CSV HEADER".format(query),
        "-o",
        path,
        settings.get("DB_USER"),
        settings.get("DB_NAME"),
    ])
Ejemplo n.º 8
0
def _recreate_database():
    print("drop database")
    drop_database_query = "DROP DATABASE IF EXISTS {};".format(
        settings.get("DB_NAME"))
    drop_user_query = "DROP USER IF EXISTS {};".format(settings.get("DB_USER"))
    exec_sql(drop_database_query, user="******", database="postgres")
    exec_sql(drop_user_query, user="******", database="postgres")

    print("create database")
    init_database()
Ejemplo n.º 9
0
def create_database():
    create_user_query = "CREATE USER {} WITH PASSWORD '{}';".format(settings.get("DB_USER"),
                                                                    settings.get("DB_PASSWORD"))
    create_database_query = "CREATE DATABASE {} WITH TEMPLATE template_postgis OWNER {};".format(
            settings.get("DB_NAME"),
            settings.get("DB_USER")
            )

    exec_sql(create_user_query, user="******", database="postgres")
    exec_sql(create_database_query, user="******", database="postgres")
Ejemplo n.º 10
0
def exec_sql_from_file(filename, user=settings.get("DB_USER"), database=settings.get("DB_NAME"), cwd=""):
    log.info("start executing sql file {}".format(filename))
    check_call([
            "psql",
            "-v", "ON_ERROR_STOP=1",
            "--username={}".format(user),
            "--dbname={}".format(settings.get("DB_NAME")),
            "--file={}/{}".format(cwd, filename)
        ], stdout=open(os.devnull, 'w')
    )
    log.info("finished executing sql file {}".format(filename))
Ejemplo n.º 11
0
def exec_sql_from_file(filename,
                       user=settings.get("DB_USER"),
                       database=settings.get("DB_NAME"),
                       cwd=""):
    log.info("start executing sql file {}".format(filename))
    check_call([
        "psql", "--username={}".format(user), "--dbname={}".format(
            settings.get("DB_NAME")), "--file={}/{}".format(cwd, filename)
    ],
               stdout=open(os.devnull, 'w'))
    log.info("finished executing sql file {}".format(filename))
Ejemplo n.º 12
0
def exec_sql_from_file(filename,
                       user=settings.get("DB_USER"),
                       database=settings.get("DB_NAME"),
                       cwd="",
                       parallelize=False):
    log.info("start executing sql file {}".format(filename))
    check_call([
        "par_psql" if parallelize else "psql", "-v", "ON_ERROR_STOP=1",
        "--username={}".format(user), "--dbname={}".format(
            settings.get("DB_NAME")), "--file={}/{}".format(cwd, filename)
    ],
               stdout=open(os.devnull, 'w'))
    log.info("finished executing sql file {}".format(filename))
Ejemplo n.º 13
0
def import_pbf_file():
    import_dir = settings.get("IMPORT_DIR")
    pbf_filename = settings.get("PBF_FILE") or settings.get(
        "PBF_FILE_URL").split('/')[-1]
    pbf_filepath = import_dir + pbf_filename

    imposm_connection = "postgis://{user}@{host}/{db_name}".format(
        user=settings.get("DB_USER"),
        host=settings.get("DB_HOST"),
        db_name=settings.get("DB_NAME"),
    )

    check_call([
        "imposm",
        "import",
        "-connection",
        imposm_connection,
        "-mapping",
        "{}/mapping.yml".format(settings.get("IMPORT_DIR")),
        "-dbschema-import",
        settings.get("DB_SCHEMA"),
        "-read",
        pbf_filepath,
        "-write",
        "-overwritecache",
    ])
Ejemplo n.º 14
0
def vacuum_database():
    if settings.get('SKIP_VACUUM'):
        return

    log.info("start vacuum database")
    check_call([
        "vacuumdb",
        "--username=postgres",
        "--dbname={}".format(settings.get("DB_NAME")),
        "--analyze",
        "--jobs={}".format(settings.get('VACUUM_JOBS')),
    ],
               stdout=open(os.devnull, 'w'))
    log.info("finished vacuum database")
Ejemplo n.º 15
0
def vacuum_database():
    if settings.get('SKIP_VACUUM'):
        return

    log.info("start vacuum database")
    exec_sql('VACUUM ANALYZE', user="******")
    log.info("finished vacuum database")
Ejemplo n.º 16
0
def _create_temporary_user_for_dump():
    query = """
        CREATE ROLE brian LOGIN PASSWORD 'brian';
        GRANT ALL PRIVILEGES ON DATABASE {database} to brian;
    """.format(database=settings.get("DB_NAME"))

    exec_sql(query, user="******")
Ejemplo n.º 17
0
def vacuum_database():
    if settings.get('SKIP_VACUUM'):
        return

    log.info("start vacuum database")
    exec_sql('VACUUM ANALYZE', user="******")
    log.info("finished vacuum database")
Ejemplo n.º 18
0
def _alter_wikipedia_dump_owner():
    query = """
        ALTER TABLE wikipedia_article OWNER TO {username};
        ALTER TABLE wikipedia_redirect OWNER TO {username};
    """.format(username=settings.get("DB_USER"))

    exec_sql(query, user="******")
Ejemplo n.º 19
0
def init_database():
    exists_query = "SELECT 1 AS result FROM pg_database WHERE datname='{}'".format(settings.get("DB_NAME"))
    if exists(exists_query, user="******", database="postgres"):
        print("skip database init, since it is already initialized")
        return

    create_extensions()
    create_database()
    create_custom_types()
    disable_notices()
Ejemplo n.º 20
0
def import_wikipedia():
    if exists(
            "SELECT * FROM information_schema.tables WHERE table_name='wikipedia_article'"
    ):
        log.info("skip wikipedia import, since table already exists")
        return

    if settings.get("SKIP_WIKIPEDIA"):
        log.info(
            "SKIP_WIKIPEDIA = True in .env file, therefore skipping import and only create basic scaffolding"
        )
        create_basic_scaffolding()
        return

    download_dump(settings.get("WIKIPEDIA_DUMP_URL"))
    download_dump(settings.get("WIKIPEDIA_REDIRECTS_DUMP_URL"))
    restore_wikipedia_dumps()

    run_in_parallel(prepare_wikipedia_redirects, create_wikipedia_index)
Ejemplo n.º 21
0
def init_database():
    exists_query = "SELECT 1 AS result FROM pg_database WHERE datname='{}'".format(
        settings.get("DB_NAME"))
    if exists(exists_query, user="******", database="postgres"):
        print("skip database init, since it is already initialized")
        return

    create_extensions()
    create_database()
    create_custom_types()
    disable_notices()
Ejemplo n.º 22
0
def _create_temporary_user_for_dump():
    query = """
        DO $$
        BEGIN
            IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'brian') THEN
                CREATE ROLE brian LOGIN PASSWORD 'brian';
            END IF;
        END
        $$;
        GRANT ALL PRIVILEGES ON DATABASE {database} to brian;
    """.format(database=settings.get("DB_NAME"))

    exec_sql(query, user="******")
Ejemplo n.º 23
0
def restore_wikipedia_dumps():
    _create_temporary_user_for_dump()

    article_dump_filename = settings.get("WIKIPEDIA_DUMP_URL").split("/")[-1]
    article_dump_path = "{}/{}".format(settings.get("IMPORT_DIR"),
                                       article_dump_filename)

    redirect_dump_filename = settings.get(
        "WIKIPEDIA_REDIRECTS_DUMP_URL").split("/")[-1]
    redirect_dump_path = "{}/{}".format(settings.get("IMPORT_DIR"),
                                        redirect_dump_filename)

    logged_check_call([
        "pg_restore", "-j", "2", "--dbname", "osm", "-U", "brian",
        article_dump_path
    ])
    logged_check_call([
        "pg_restore", "-j", "2", "--dbname", "osm", "-U", "brian",
        redirect_dump_path
    ])

    _alter_wikipedia_dump_owner()
Ejemplo n.º 24
0
def import_pbf_file():
    import_dir = settings.get("IMPORT_DIR")
    pbf_filename = settings.get("PBF_FILE") or settings.get("PBF_FILE_URL").split('/')[-1]
    pbf_filepath = import_dir + pbf_filename

    imposm_connection = "postgis://{user}@{host}/{db_name}".format(
        user=settings.get("DB_USER"),
        host=settings.get("DB_HOST"),
        db_name=settings.get("DB_NAME"),
        )

    check_call([
        "imposm", "import",
        "-connection", imposm_connection,
        "-mapping", "{}/mapping.yml".format(settings.get("IMPORT_DIR")),
        "-dbschema-import", settings.get("DB_SCHEMA"),
        "-read", pbf_filepath,
        "-write",
        "-overwritecache",
    ])
Ejemplo n.º 25
0
from osmnames import settings

from sqlalchemy.engine import create_engine
from sqlalchemy.orm.session import Session

engine = create_engine(
        "postgresql+psycopg2://{user}:{password}@{host}/{db_name}".format(
            user=settings.get("DB_USER"),
            password=settings.get("DB_PASSWORD"),
            host=settings.get("DB_HOST"),
            db_name=settings.get("DB_NAME"),
            )
        )


def session():
    return Session(engine)
Ejemplo n.º 26
0
def count(query, user=settings.get('DB_USER'), database=settings.get('DB_NAME')):
    return exec_sql(query, user, database).fetchone()[0]
Ejemplo n.º 27
0
def count(query,
          user=settings.get('DB_USER'),
          database=settings.get('DB_NAME')):
    return exec_sql(query, user, database).fetchone()[0]
Ejemplo n.º 28
0
def exists(query, user=settings.get('DB_USER'), database=settings.get('DB_NAME')):
    return exec_sql("SELECT EXISTS({});".format(query), user, database).fetchone()[0]
Ejemplo n.º 29
0
def geonames_export_path():
    return "{}{}_geonames.tsv".format(settings.get("EXPORT_DIR"), imported_pbf_filename())
Ejemplo n.º 30
0
def exists(query,
           user=settings.get('DB_USER'),
           database=settings.get('DB_NAME')):
    return exec_sql("SELECT EXISTS({});".format(query), user,
                    database).fetchone()[0]
Ejemplo n.º 31
0
def housenumbers_export_path():
    return "{}{}_housenumbers.tsv".format(settings.get("EXPORT_DIR"),
                                          imported_pbf_filename())
Ejemplo n.º 32
0
def exec_sql(sql, user=settings.get('DB_USER'), database=settings.get('DB_NAME')):
    connection = psycopg2.connect(user=user, dbname=database)
    connection.set_session(autocommit=True)
    cursor = connection.cursor()
    cursor.execute(sql)
    return cursor
Ejemplo n.º 33
0
def download_wikipedia_dump():
    url = settings.get("WIKIPEDIA_DUMP_URL")
    destination_dir = settings.get("IMPORT_DIR")
    check_call(["wget", "--no-clobber", "--directory-prefix", destination_dir, url])
Ejemplo n.º 34
0
def housenumbers_export_path():
    return "{}{}_housenumbers.tsv".format(settings.get("EXPORT_DIR"), imported_pbf_filename())
Ejemplo n.º 35
0
def imported_pbf_filename():
    filename_with_suffix = settings.get("PBF_FILE") or settings.get(
        "PBF_FILE_URL").split('/')[-1]
    return filename_with_suffix.split(".")[0]
Ejemplo n.º 36
0
def download_dump(url):
    destination_dir = settings.get("IMPORT_DIR")
    logged_check_call(
        ["wget", "--no-clobber", "--directory-prefix", destination_dir, url])
Ejemplo n.º 37
0
def imported_pbf_filename():
    filename_with_suffix = settings.get("PBF_FILE") or settings.get("PBF_FILE_URL").split('/')[-1]
    return filename_with_suffix.split(".")[0]
Ejemplo n.º 38
0
def import_admin_level_type_mapping():
    exec_sql_from_file("admin_level_type_mapping.sql",
                       cwd="{}/sql/".format(settings.get("DATA_DIR")))
Ejemplo n.º 39
0
def _alter_wikipedia_dump_owner():
    query = """
        ALTER TABLE wikipedia_article OWNER TO {username};
    """.format(username=settings.get("DB_USER"))

    exec_sql(query, user="******")
Ejemplo n.º 40
0
def import_country_osm_grid():
    exec_sql_from_file("country_osm_grid.sql", cwd="{}/sql/".format(settings.get("DATA_DIR")))
Ejemplo n.º 41
0
def create_export_dir():
    if not os.path.exists(settings.get("EXPORT_DIR")):
        os.makedirs(settings.get("EXPORT_DIR"))
Ejemplo n.º 42
0
def import_country_osm_grid():
    exec_sql_from_file("country_osm_grid.sql",
                       cwd="{}/sql/".format(settings.get("DATA_DIR")))
Ejemplo n.º 43
0
def geonames_export_path():
    return "{}{}_geonames.tsv".format(settings.get("EXPORT_DIR"),
                                      imported_pbf_filename())