コード例 #1
0
def write_geojson_to_database(catalog_index, source):
    print('Writing #{} geojson to database...'.format(catalog_index))
    table = '\"dataset-{}\"'.format(str(catalog_index).zfill(5))

    drop_table = 'drop table if exists {}'.format(table)
    database_io.get_postgres_engine().execute(drop_table)

    create_table = 'create table if not exists {} (geojson json);'.format(
        table)
    database_io.get_postgres_engine().execute(create_table)

    delete_records = 'delete from {};'.format(table)
    database_io.get_postgres_engine().execute(delete_records)

    source_decoded = None
    successfully_decoded = False
    encodings = ['utf-8', 'unicode-escape', 'ISO-8859-1', 'latin-1']
    for e in encodings:
        try:
            source_decoded = source.decode(e)
            print('Encoding: {}'.format(e))
            successfully_decoded = True
        except UnicodeDecodeError:
            print('Error while decoding with {}'.format(e))
        if successfully_decoded:
            break

    insert_values = 'insert into {} (geojson) values (\'{}\');'.format(
        table, source_decoded.replace('%', '%%'))
    database_io.get_postgres_engine().execute(insert_values)
    print('Wrote #{} geojson to database!'.format(catalog_index))
コード例 #2
0
def write_map_catalog_to_database():
    gsc = write_geojson_catalog_json()
    catalog_path = 'db/catalog-geojson.json'
    df = pd.read_json(json.dumps(gsc), orient='records')
    # source = json.loads(catalog_path)
    l = 20000  # starting at 20000  not interfering with other CSV data sets
    for i in range(0, 385):
        try:
            insert_values = 'insert into \"data-catalog-map\" (catalog_index, description, target_url) values ({}, \'{}\', \'{}\');'.format(
                l, df['description'][i], df['target_url'][i])
            database_io.get_postgres_engine().execute(insert_values)
        except:
            print(insert_values)
            continue
        l = l + 1
    # df.to_sql('data-catalog-map', engine, if_exists='replace')
    print('Wrote map catalog to database.')
コード例 #3
0
def read_geojson_from_database(catalog_index):
    print('Reading #{} geojson from database...'.format(catalog_index))
    table = '\"dataset-{}\"'.format(str(catalog_index).zfill(5))

    select_geojson = 'select * from {} limit 1;'.format(table)
    result = database_io.get_postgres_engine().execute(select_geojson)

    rows = [row for row in result]
    geojson = rows[0][0]

    print('Read #{} geojson from database!'.format(catalog_index))

    return json.dumps(geojson)
コード例 #4
0
def get_status_code_of_data_set(catalog_index):
    sql = '''select status_code, description
from catalog_status cs natural join catalog_status_codes csc
where cs.status_code = csc.code
and catalog_index = {};'''.format(catalog_index)

    try:
        result = pd.read_sql_query(sql, database_io.get_postgres_engine())
        status, description = result['status_code'][0], result['description'][0]
    except:
        status = 999
        description = ''
        print('Status code could not be read from database.')
    return status, description
コード例 #5
0
def get_catalog(from_database=False):
    if from_database:
        try:
            catalog = pd.read_sql_table('data_catalog', database_io.get_postgres_engine())
            # print('Read catalog from database!')
            # catalog = catalog.to_json(orient='rows')
            # print('Converted to json!')
            return catalog
        except:
            print('Catalog could not be read from database!')

    catalog_path = 'db/catalog.json'
    catalog = None
    try:
        catalog = json.load(open(catalog_path))
        #print('Found catalog @ {}'.format(catalog_path))
    except:
        print('Catalog was not found. Please run polling first!')
    return catalog
コード例 #6
0
def get_description_by_catalog_index(catalog_index, from_database=False):
    try:
        if from_database:
            if catalog_index < 20000:
                df = pd.read_sql_query('select description from data_catalog where catalog_index = {}'.format(catalog_index), database_io.get_postgres_engine())
                description = df['description'][0]
            else: # it's a (tr|m)ap ;-)
                df = pd.read_sql_query('select description from \"data-catalog-map\" where catalog_index = {}'.format(catalog_index), database_io.get_postgres_engine())
                description = df['description'][0]
        else:
            description = get_catalog(from_database=False)[catalog_index]['description']
        description_without_html_tags = re.sub('<[^>]*>', '', description)
        if len(description_without_html_tags) < len(description):
            print('HTML tags were deleted from description.')
            description = description_without_html_tags
        print('Data set: {}'.format(description if not (len(description) == 0) else '(no description available)'))
    except:
        description = ''
        print('Data set\'s description could not be loaded.')
    return description
コード例 #7
0
def get_target_url_by_catalog_index(catalog_index):
    if catalog_index < 20000: # csv
        df = pd.read_sql_query('select target_url from data_catalog where catalog_index = {}'.format(catalog_index), database_io.get_postgres_engine())
    else: # geojson
        df = pd.read_sql_query('select target_url from \"data-catalog-map\" where catalog_index = {}'.format(catalog_index), database_io.get_postgres_engine())

    return df['target_url'][0]