def update_service_balance(conn): subquery = """SELECT * FROM neighbourhoods JOIN businessstats USING (area_id);""" result = pgquery(conn, subquery, None) for row in result: recreation = row[-1] education = row[-2] health = row[-3] food = row[-4] retail = row[-5] recreation = fix_NULL(recreation) education = fix_NULL(education) food = fix_NULL(food) retail = fix_NULL(retail) health = fix_NULL(health) sum = recreation + education + health + food + retail if (sum == 0): continue service_balance = (education * 5 + food * 4 + retail * 3 + recreation * 2 + health) / sum query = """UPDATE neighbourhoods SET service_balance = {} WHERE area_id = {}""".format(service_balance, row[0]) pgexec(conn, query, None, "Set service balance of area " + str(row[0]) + " to " + str(service_balance))
def create_table(file, queries): data = list(csv.DictReader(open(file + '.csv'))) clean_empty_string(data) # to reset table pgexec(conn, "DROP TABLE IF EXISTS " + file, None, "Reset Table " + file) # create table using schema pgexec(conn, queries[0], None, "Create Table " + file) # insert values to table for row in data: pgexec(conn, queries[1], row, "row inserted")
from pg_tools import pgquery from pg_tools import pgconnect from pg_tools import pgexec cmd = """ create extension IF NOT EXISTS postgis; create extension IF NOT EXISTS fuzzystrmatch; create extension IF NOT EXISTS postgis_tiger_geocoder; create extension IF NOT EXISTS postgis_topology; """ conn = pgconnect() pgexec(conn, cmd, None, "enabling postgis")
area_id INT NOT NULL PRIMARY KEY, area_name VARCHAR(70), parent_area_id INT )""" statistical_areas_insert_stmt = """INSERT INTO StatisticalAreas( area_id, area_name, parent_area_id) VALUES ( %(area_id)s, %(area_name)s, %(parent_area_id)s)""" # =====QUERIES END===== #force drop and create schema pgexec(conn, "DROP SCHEMA IF EXISTS cyclability CASCADE;", None, "drop shema") pgexec(conn, "CREATE SCHEMA cyclability;", None, "create schema") # Queries stored in key value pair, with file name as key, and queries stored as lists queries = { 'BusinessStats': [business_stats_schema, business_stats_insert_stmt], 'BikeSharingPods': [bike_pods_schema, bike_pods_insert_stmt], 'CensusStats': [census_stats_schema, census_stats_insert_stmt], 'Neighbourhoods': [neighbourhoods_schema, neighbourhoods_insert_stmt], 'StatisticalAreas': [statistical_areas_schema, statistical_areas_insert_stmt] } # Loop through each file and queries to create a table for k, v in queries.items(): create_table(k, v)
None) avg_pd = float(avg_pd[0][0]) avg_dd = float(avg_dd[0][0]) avg_sb = float(avg_sb[0][0]) avg_bd = float(avg_bd[0][0]) std_pd = float(std_pd[0][0]) std_dd = float(std_dd[0][0]) std_sb = float(std_sb[0][0]) std_bd = float(std_bd[0][0]) print(avg_pd) result = pgquery(conn, "SELECT * FROM neighbourhoods", None) def additional_score(): return 0 for row in result: z_score = (float(fix_NULL(row[-3])) - avg_bd) / std_bd z_score += (float(fix_NULL(row[-4])) - avg_sb) / std_sb z_score += (float(fix_NULL(row[-5])) - avg_dd) / std_dd z_score += (float(fix_NULL(row[-6])) - avg_pd) / std_pd z_score += additional_score() query = "UPDATE neighbourhoods SET cyclability_score = {} WHERE area_id = {}".format( z_score, row[0]) pgexec(conn, query, None, "Updating score for {}".format(row[1]))
def create_column(conn, col_name, table_name, type): query = """ALTER TABLE {} DROP COLUMN IF EXISTS {}, ADD COLUMN {} {};""".format(table_name, col_name, col_name, type) pgexec(conn, query, None, "Created Column " + col_name + " on " + table_name)
def update_column_with_another(conn, col_name, table_name, value): query = """UPDATE {} SET {} = COALESCE{}""".format(table_name, col_name, value) pgexec(conn, query, None, "Update " + col_name + " on " + table_name +" with " + value)
for r in areas: area_ids.append(r[0]) row = {} for i in range(0, len(shapes)): record = sf.record(i) if int(record[0]) in area_ids: shape = sf.shape(i) row['area_id'] = record[0] # prepare the polygon data # this is a bit complex with our dataset as it has complex polygons, some with multiple parts... row['geom'] = "POLYGON((" i = 0 for x, y in shape.points: row['geom'] += "%s %s," % (x, y) # check for start of a new polygon part i += 1 if i in shape.parts: row['geom'] = re.sub(",$", "),(", row['geom']) # properly end the polygon string row['geom'] = re.sub(",$", "))", row['geom']) # finally: insert new row into the table pgexec(conn, update_stmt, args=row, msg="inserted " + str(record[2])) index_command = "CREATE INDEX area_idx ON neighbourhoods USING GIST (geom);" pgexec(conn, index_command, None, "Created spatial index")