def upload_model(args, summary): with aigh_conn.cursor() as cur: cur.execute("SELECT MAX(f1) FROM models WHERE name=%s", (summary['name'],)) best_score, = cur.fetchone() if best_score and best_score > summary['f1']: print('Not uploading because, a model already exists with f1 %f' % best_score) return zip_file = args.model_class.zip_weights(args.weights, base_dir='weights') s3 = boto3.resource('s3') key = os.path.join('building-detection', os.path.basename(zip_file)) qargs = {k : summary[k] for k in ['name', 'id', 'precision', 'recall', 'threshold', 'f1']} qargs['instance'] = '/'.join(args.weights.split('/')[-2:]) qargs['s3_loc'] = os.path.join('s3://aigh-deep-learning-models/', key) conn = psycopg2.connect( dbname=os.environ.get('PGDATABASE', 'aigh'), user=os.environ.get('PGUSER', ''), password=os.environ.get('PGPASSWORD', ''), host=os.environ.get('PGHOST', '') ) with aigh_conn.cursor() as cur: cur.execute(""" INSERT INTO models(name, instance, id, tested_on, precision, recall, threshold, s3_loc, f1) VALUES (%(name)s, %(instance)s, %(id)s, now(), %(precision)s, %(recall)s, %(threshold)s, %(s3_loc)s, %(f1)s) """, qargs) json.dump(summary, open('.summary.json', 'w')) with ZipFile(zip_file, 'a') as z: z.write('.summary.json', '%s/description.json' % qargs['id']) s3.meta.client.upload_file(zip_file, 'aigh-deep-learning-models', key) aigh_conn.commit()
def generate_samples(model, country, threshold, N, ensemble=False): ''' Main function to generate training samples ''' read_cur, write_cur = conn.cursor(), conn.cursor() read_cur.execute( """ SELECT filename, shifted FROM buildings.images WHERE project=%s AND (done=false OR done IS NULL) ORDER BY random() LIMIT 1000 """, (country, )) augs = [ noop, # leave image unchanged partial(rotate, 180), # flip it upside down mirror, # mirror it distort, # keep dimensions, but distort the color channels partial(crop, corner=0), # crop the top left corner and stretch partial(crop, corner=1), # crop the top right corner and stretch partial(crop, corner=2), # crop the bottom left corner and stretch partial(crop, corner=3) # crop the bottom right corner and stretch ] if not ensemble: augs = [noop] TS = datetime.now().isoformat() for file, geom in read_cur: result = process_file(file, write_cur, augs, model, threshold, ensemble) if result: features, roff, coff, img_data = result geom = wkb.loads(geom, hex=True) minlon, minlat, maxlon, maxlat = geom.bounds gsd = get_gsd( minlat, 18 ) # images we've gathered are from zoom level 18 on Bing Maps # Compute the lat/lon bounds of the image sample cropped_geom = box(minlon + coff * gsd, minlat + roff * gsd, minlon + (coff + img_data.shape[1]) * gsd, minlat + (roff + img_data.shape[0]) * gsd) features['properties'] = { 'image': file, 'roff': roff, 'coff': coff } sample_name = os.path.join('generated_training_data', TS, 'sample_%d' % N) dump(features, img_data, sample_name, plot=True) N -= 1 tqdm.write(str(N)) write_cur.execute( "UPDATE buildings.images SET done=true WHERE filename=%s AND project=%s", (file, country)) conn.commit() if N == 0: return
def cluster(region, version, epsilon): ''' Cluster buildings together for a given region and insert them into the clusters table Arguments: region : GeoJSON (Feature) - GeoJSON object describing the geometry of the catchment zone for the organization we are clustering for. The properties field must contain a "project" field indicating which project in the bulidings.buildings table that it belongs to. version : UUID - Version number of the model used to predict the buildings epsilon : float - Minimum distance for a point to be considered part of a cluster ''' geom = shape(region['geometry']) project = region['properties']['project'] srid = get_srid(geom, version) with aigh_conn.cursor() as cur: # Create the table or empty out any clusters with the same version ID cur.execute( """ CREATE TABLE IF NOT EXISTS clusters( id serial, project text, size int, geom geometry('geometry', 4326), version uuid ); DELETE FROM clusters WHERE ST_Relate(ST_GeomFromText(%s, 4326), geom, '2********') AND version=%s """, (geom.wkt, version)) print('Clustering buildings...') cur.execute( """ INSERT INTO clusters (project, size, geom, version) SELECT %s as project, COUNT(*) as size, ST_ConvexHull(ST_Collect(geom)) as geom, %s as version FROM ( SELECT ST_ClusterDBSCAN(ST_Transform(geom, %s), eps := %s, minpoints := 3) over () as cid, geom FROM buildings.buildings WHERE "version"=%s AND ST_Contains(ST_GeomFromText(%s, 4326), geom) )clustering GROUP BY cid """, (project, version, srid, epsilon, version, geom.wkt)) print('Computing nearest fixtures for each cluster...') cur.execute( """ UPDATE clusters SET site_dist=dist FROM( SELECT DISTINCT ON (clusters.id) clusters.id as cluster_id, ST_Distance(clusters.geom::geography, fixtures.geom::geography) as dist FROM clusters, fixtures WHERE clusters.version=%s AND ST_Relate(ST_GeomFromText(%s, 4326), geom, '2********') ORDER BY clusters.id, dist )q WHERE id=cluster_id; """, (version, geom.wkt)) aigh_conn.commit()