def write_geojson_to_database(catalog_index, source): print('Writing #{} geojson to database...'.format(catalog_index)) table = '\"dataset-{}\"'.format(str(catalog_index).zfill(5)) drop_table = 'drop table if exists {}'.format(table) database_io.get_postgres_engine().execute(drop_table) create_table = 'create table if not exists {} (geojson json);'.format( table) database_io.get_postgres_engine().execute(create_table) delete_records = 'delete from {};'.format(table) database_io.get_postgres_engine().execute(delete_records) source_decoded = None successfully_decoded = False encodings = ['utf-8', 'unicode-escape', 'ISO-8859-1', 'latin-1'] for e in encodings: try: source_decoded = source.decode(e) print('Encoding: {}'.format(e)) successfully_decoded = True except UnicodeDecodeError: print('Error while decoding with {}'.format(e)) if successfully_decoded: break insert_values = 'insert into {} (geojson) values (\'{}\');'.format( table, source_decoded.replace('%', '%%')) database_io.get_postgres_engine().execute(insert_values) print('Wrote #{} geojson to database!'.format(catalog_index))
def write_map_catalog_to_database(): gsc = write_geojson_catalog_json() catalog_path = 'db/catalog-geojson.json' df = pd.read_json(json.dumps(gsc), orient='records') # source = json.loads(catalog_path) l = 20000 # starting at 20000 not interfering with other CSV data sets for i in range(0, 385): try: insert_values = 'insert into \"data-catalog-map\" (catalog_index, description, target_url) values ({}, \'{}\', \'{}\');'.format( l, df['description'][i], df['target_url'][i]) database_io.get_postgres_engine().execute(insert_values) except: print(insert_values) continue l = l + 1 # df.to_sql('data-catalog-map', engine, if_exists='replace') print('Wrote map catalog to database.')
def read_geojson_from_database(catalog_index): print('Reading #{} geojson from database...'.format(catalog_index)) table = '\"dataset-{}\"'.format(str(catalog_index).zfill(5)) select_geojson = 'select * from {} limit 1;'.format(table) result = database_io.get_postgres_engine().execute(select_geojson) rows = [row for row in result] geojson = rows[0][0] print('Read #{} geojson from database!'.format(catalog_index)) return json.dumps(geojson)
def get_status_code_of_data_set(catalog_index): sql = '''select status_code, description from catalog_status cs natural join catalog_status_codes csc where cs.status_code = csc.code and catalog_index = {};'''.format(catalog_index) try: result = pd.read_sql_query(sql, database_io.get_postgres_engine()) status, description = result['status_code'][0], result['description'][0] except: status = 999 description = '' print('Status code could not be read from database.') return status, description
def get_catalog(from_database=False): if from_database: try: catalog = pd.read_sql_table('data_catalog', database_io.get_postgres_engine()) # print('Read catalog from database!') # catalog = catalog.to_json(orient='rows') # print('Converted to json!') return catalog except: print('Catalog could not be read from database!') catalog_path = 'db/catalog.json' catalog = None try: catalog = json.load(open(catalog_path)) #print('Found catalog @ {}'.format(catalog_path)) except: print('Catalog was not found. Please run polling first!') return catalog
def get_description_by_catalog_index(catalog_index, from_database=False): try: if from_database: if catalog_index < 20000: df = pd.read_sql_query('select description from data_catalog where catalog_index = {}'.format(catalog_index), database_io.get_postgres_engine()) description = df['description'][0] else: # it's a (tr|m)ap ;-) df = pd.read_sql_query('select description from \"data-catalog-map\" where catalog_index = {}'.format(catalog_index), database_io.get_postgres_engine()) description = df['description'][0] else: description = get_catalog(from_database=False)[catalog_index]['description'] description_without_html_tags = re.sub('<[^>]*>', '', description) if len(description_without_html_tags) < len(description): print('HTML tags were deleted from description.') description = description_without_html_tags print('Data set: {}'.format(description if not (len(description) == 0) else '(no description available)')) except: description = '' print('Data set\'s description could not be loaded.') return description
def get_target_url_by_catalog_index(catalog_index): if catalog_index < 20000: # csv df = pd.read_sql_query('select target_url from data_catalog where catalog_index = {}'.format(catalog_index), database_io.get_postgres_engine()) else: # geojson df = pd.read_sql_query('select target_url from \"data-catalog-map\" where catalog_index = {}'.format(catalog_index), database_io.get_postgres_engine()) return df['target_url'][0]