def upload_model(args, summary): with aigh_conn.cursor() as cur: cur.execute("SELECT MAX(f1) FROM models WHERE name=%s", (summary['name'],)) best_score, = cur.fetchone() if best_score and best_score > summary['f1']: print('Not uploading because, a model already exists with f1 %f' % best_score) return zip_file = args.model_class.zip_weights(args.weights, base_dir='weights') s3 = boto3.resource('s3') key = os.path.join('building-detection', os.path.basename(zip_file)) qargs = {k : summary[k] for k in ['name', 'id', 'precision', 'recall', 'threshold', 'f1']} qargs['instance'] = '/'.join(args.weights.split('/')[-2:]) qargs['s3_loc'] = os.path.join('s3://aigh-deep-learning-models/', key) conn = psycopg2.connect( dbname=os.environ.get('PGDATABASE', 'aigh'), user=os.environ.get('PGUSER', ''), password=os.environ.get('PGPASSWORD', ''), host=os.environ.get('PGHOST', '') ) with aigh_conn.cursor() as cur: cur.execute(""" INSERT INTO models(name, instance, id, tested_on, precision, recall, threshold, s3_loc, f1) VALUES (%(name)s, %(instance)s, %(id)s, now(), %(precision)s, %(recall)s, %(threshold)s, %(s3_loc)s, %(f1)s) """, qargs) json.dump(summary, open('.summary.json', 'w')) with ZipFile(zip_file, 'a') as z: z.write('.summary.json', '%s/description.json' % qargs['id']) s3.meta.client.upload_file(zip_file, 'aigh-deep-learning-models', key) aigh_conn.commit()
def get_best_model(): ''' Select the best model based on f-score ''' with conn.cursor() as cur: cur.execute(""" SELECT name, instance, id, s3_loc FROM models ORDER BY f1 DESC LIMIT 1 """) name, instance, id, s3_loc = cur.fetchone() id = id.replace('-', '') if not os.path.exists(os.path.join('weights', id)): # Download weights print('Downloading weights for %s (%s)' % (name, id)) s3 = boto3.resource('s3') res = re.search('s3://([^/]*)/(.*)$', s3_loc) bucket, key = res.group(1), res.group(2) s3.Bucket(bucket).download_file(key, 'weights/%s.zip' % id) print('Extracting weights...') with zipfile.ZipFile('weights/%s.zip' % id, 'r') as z: z.extractall('weights') os.remove('weights/%s.zip' % id) description = json.load(open('weights/%s/description.json' % id)) module = importlib.import_module(description['name']) model_class = getattr(module, module.NAME) model = model_class( weights=os.path.join('weights', id, description['weights'])) return model, id, description['threshold']
def get_latest_version(country): ''' Get the most recent version number for a particular country. This provides a reasonable default if the user doesn't specify a version. Arguments: country : text - The country that we are checking versions for Returns: UUID - The version number ''' with aigh_conn.cursor() as cur: cur.execute( """ SELECT DISTINCT ON (ts) version FROM buildings.buildings WHERE project=%s ORDER BY ts LIMIT 1 """, (country, )) return cur.fetchone()[0]
def get_srid(geom, version): ''' The PostGIS function for DBSCAN clustering operates on euclidean distance. We'd like to specify the distance threshold (epsilon) in terms of meters, so we'll convert the geometries to a meter based geometry so that euclidean distance will work. Arguments: geom : shapely.Geometry - Geometry of the catchment zone version : UUID - Version of the buildings we are clustering Returns: int - This is the SRID of the projection we should transform to ''' geom = shape(geom) with aigh_conn.cursor() as cur: cur.execute( """ SELECT _ST_BestSRID(ST_Centroid(ST_ConvexHull(ST_Collect(geom)))) FROM buildings.buildings WHERE version=%s AND ST_Contains(ST_GeomFromText(%s, 4326), geom); """, (version, geom.wkt)) return cur.fetchone()[0]
def generate_samples(model, country, threshold, N, ensemble=False): ''' Main function to generate training samples ''' read_cur, write_cur = conn.cursor(), conn.cursor() read_cur.execute( """ SELECT filename, shifted FROM buildings.images WHERE project=%s AND (done=false OR done IS NULL) ORDER BY random() LIMIT 1000 """, (country, )) augs = [ noop, # leave image unchanged partial(rotate, 180), # flip it upside down mirror, # mirror it distort, # keep dimensions, but distort the color channels partial(crop, corner=0), # crop the top left corner and stretch partial(crop, corner=1), # crop the top right corner and stretch partial(crop, corner=2), # crop the bottom left corner and stretch partial(crop, corner=3) # crop the bottom right corner and stretch ] if not ensemble: augs = [noop] TS = datetime.now().isoformat() for file, geom in read_cur: result = process_file(file, write_cur, augs, model, threshold, ensemble) if result: features, roff, coff, img_data = result geom = wkb.loads(geom, hex=True) minlon, minlat, maxlon, maxlat = geom.bounds gsd = get_gsd( minlat, 18 ) # images we've gathered are from zoom level 18 on Bing Maps # Compute the lat/lon bounds of the image sample cropped_geom = box(minlon + coff * gsd, minlat + roff * gsd, minlon + (coff + img_data.shape[1]) * gsd, minlat + (roff + img_data.shape[0]) * gsd) features['properties'] = { 'image': file, 'roff': roff, 'coff': coff } sample_name = os.path.join('generated_training_data', TS, 'sample_%d' % N) dump(features, img_data, sample_name, plot=True) N -= 1 tqdm.write(str(N)) write_cur.execute( "UPDATE buildings.images SET done=true WHERE filename=%s AND project=%s", (file, country)) conn.commit() if N == 0: return
def transfer(region, version): geom = shape(region['geometry']) project = region['properties']['project'] org_id = region['properties']['organization_id'] TS = datetime.now() with aigh_conn.cursor() as aigh_cur, atlas_conn.cursor() as atlas_cur: print('Transfering clusters...') aigh_cur.execute( """ SELECT version, %s as organization_id, size as building_count, geom, ST_Centroid(geom) as centroid, site_dist as site_distance FROM clusters WHERE clusters.version=%s AND ST_Relate(ST_GeomFromText(%s, 4326), geom, '2********') """, (org_id, version, geom.wkt)) atlas_cur.execute( """ DELETE FROM buildings WHERE version=%s AND ST_Relate(ST_GeomFromText(%s, 4326), geom, '2********'); DELETE FROM building_clusters WHERE organization_id=%s AND version=%s; DELETE FROM active_building_clusters WHERE organization_id=%s AND version=%s; """, (version, geom.wkt, org_id, version, org_id, version)) atlas_cur.execute( """ UPDATE active_building_clusters SET active=false WHERE organization_id=%s; INSERT INTO active_building_clusters (version, organization_id, active, entered) VALUES (%s, %s, true, %s) """, (org_id, version, org_id, TS)) args_str = ','.join( atlas_cur.mogrify("(%s,%s,%s,%s,%s,%s)", x) for x in aigh_cur) atlas_cur.execute(""" INSERT INTO building_clusters ( version, organization_id, building_count, geom, centroid, site_distance ) VALUES %s """ % args_str) with aigh_conn.cursor( name='aigh') as aigh_cur, atlas_conn.cursor() as atlas_cur: print('Transfering buildings...') aigh_cur.execute( """ SELECT geom, %s as version FROM buildings.buildings as b WHERE version=%s AND ST_Relate(ST_GeomFromText(%s, 4326), geom, '2********') """, (version, version, geom.wkt)) count = 0 while True: rows = aigh_cur.fetchmany(2048) if len(rows) == 0: break args_str = ','.join(atlas_cur.mogrify("(%s,%s)", x) for x in rows) atlas_cur.execute( "INSERT INTO buildings (geom, version) VALUES %s" % args_str) count += len(rows) print('Inserted %d rows' % count) atlas_conn.commit()
def cluster(region, version, epsilon): ''' Cluster buildings together for a given region and insert them into the clusters table Arguments: region : GeoJSON (Feature) - GeoJSON object describing the geometry of the catchment zone for the organization we are clustering for. The properties field must contain a "project" field indicating which project in the bulidings.buildings table that it belongs to. version : UUID - Version number of the model used to predict the buildings epsilon : float - Minimum distance for a point to be considered part of a cluster ''' geom = shape(region['geometry']) project = region['properties']['project'] srid = get_srid(geom, version) with aigh_conn.cursor() as cur: # Create the table or empty out any clusters with the same version ID cur.execute( """ CREATE TABLE IF NOT EXISTS clusters( id serial, project text, size int, geom geometry('geometry', 4326), version uuid ); DELETE FROM clusters WHERE ST_Relate(ST_GeomFromText(%s, 4326), geom, '2********') AND version=%s """, (geom.wkt, version)) print('Clustering buildings...') cur.execute( """ INSERT INTO clusters (project, size, geom, version) SELECT %s as project, COUNT(*) as size, ST_ConvexHull(ST_Collect(geom)) as geom, %s as version FROM ( SELECT ST_ClusterDBSCAN(ST_Transform(geom, %s), eps := %s, minpoints := 3) over () as cid, geom FROM buildings.buildings WHERE "version"=%s AND ST_Contains(ST_GeomFromText(%s, 4326), geom) )clustering GROUP BY cid """, (project, version, srid, epsilon, version, geom.wkt)) print('Computing nearest fixtures for each cluster...') cur.execute( """ UPDATE clusters SET site_dist=dist FROM( SELECT DISTINCT ON (clusters.id) clusters.id as cluster_id, ST_Distance(clusters.geom::geography, fixtures.geom::geography) as dist FROM clusters, fixtures WHERE clusters.version=%s AND ST_Relate(ST_GeomFromText(%s, 4326), geom, '2********') ORDER BY clusters.id, dist )q WHERE id=cluster_id; """, (version, geom.wkt)) aigh_conn.commit()