Example #1
0
def upload_model(args, summary):
    with aigh_conn.cursor() as cur:
        cur.execute("SELECT MAX(f1) FROM models WHERE name=%s", (summary['name'],))
        best_score, = cur.fetchone()
        if best_score and best_score > summary['f1']:
            print('Not uploading because, a model already exists with f1 %f' % best_score)
            return

    zip_file = args.model_class.zip_weights(args.weights, base_dir='weights')
    s3 = boto3.resource('s3')
    key = os.path.join('building-detection', os.path.basename(zip_file))

    qargs = {k : summary[k] for k in ['name', 'id', 'precision', 'recall', 'threshold', 'f1']}
    qargs['instance'] = '/'.join(args.weights.split('/')[-2:])
    qargs['s3_loc'] = os.path.join('s3://aigh-deep-learning-models/', key)

    conn = psycopg2.connect(
        dbname=os.environ.get('PGDATABASE', 'aigh'),
        user=os.environ.get('PGUSER', ''),
        password=os.environ.get('PGPASSWORD', ''),
        host=os.environ.get('PGHOST', '')
    )

    with aigh_conn.cursor() as cur:
        cur.execute("""
            INSERT INTO models(name, instance, id, tested_on, precision, recall, threshold, s3_loc, f1)
            VALUES (%(name)s, %(instance)s, %(id)s, now(), %(precision)s, %(recall)s, %(threshold)s, %(s3_loc)s, %(f1)s)
        """, qargs)

        json.dump(summary, open('.summary.json', 'w'))
        with ZipFile(zip_file, 'a') as z:
            z.write('.summary.json', '%s/description.json' % qargs['id'])

        s3.meta.client.upload_file(zip_file, 'aigh-deep-learning-models', key)
        aigh_conn.commit()
Example #2
0
def get_best_model():
    '''
    Select the best model based on f-score
    '''
    with conn.cursor() as cur:
        cur.execute("""
            SELECT name, instance, id, s3_loc
            FROM models
            ORDER BY f1 DESC
            LIMIT 1
        """)
        name, instance, id, s3_loc = cur.fetchone()
        id = id.replace('-', '')
        if not os.path.exists(os.path.join('weights', id)):
            # Download weights
            print('Downloading weights for %s (%s)' % (name, id))
            s3 = boto3.resource('s3')
            res = re.search('s3://([^/]*)/(.*)$', s3_loc)
            bucket, key = res.group(1), res.group(2)
            s3.Bucket(bucket).download_file(key, 'weights/%s.zip' % id)
            print('Extracting weights...')
            with zipfile.ZipFile('weights/%s.zip' % id, 'r') as z:
                z.extractall('weights')
            os.remove('weights/%s.zip' % id)

        description = json.load(open('weights/%s/description.json' % id))
        module = importlib.import_module(description['name'])
        model_class = getattr(module, module.NAME)
        model = model_class(
            weights=os.path.join('weights', id, description['weights']))
        return model, id, description['threshold']
Example #3
0
def get_latest_version(country):
    '''
    Get the most recent version number for a particular country.  This provides
    a reasonable default if the user doesn't specify a version.

    Arguments:
        country : text - The country that we are checking versions for
    Returns:
        UUID - The version number
    '''
    with aigh_conn.cursor() as cur:
        cur.execute(
            """
            SELECT DISTINCT ON (ts) version 
            FROM buildings.buildings 
            WHERE project=%s ORDER BY ts LIMIT 1
        """, (country, ))
        return cur.fetchone()[0]
Example #4
0
def get_srid(geom, version):
    '''
    The PostGIS function for DBSCAN clustering operates on euclidean 
    distance.  We'd like to specify the distance threshold (epsilon)
    in terms of meters, so we'll convert the geometries to a meter based
    geometry so that euclidean distance will work. 

    Arguments:
        geom : shapely.Geometry - Geometry of the catchment zone
        version : UUID - Version of the buildings we are clustering
    Returns:
        int - This is the SRID of the projection we should transform to
    '''
    geom = shape(geom)
    with aigh_conn.cursor() as cur:
        cur.execute(
            """
            SELECT _ST_BestSRID(ST_Centroid(ST_ConvexHull(ST_Collect(geom))))
            FROM buildings.buildings 
            WHERE 
                version=%s AND 
                ST_Contains(ST_GeomFromText(%s, 4326), geom);
        """, (version, geom.wkt))
        return cur.fetchone()[0]
Example #5
0
def generate_samples(model, country, threshold, N, ensemble=False):
    '''
    Main function to generate training samples
    '''
    read_cur, write_cur = conn.cursor(), conn.cursor()
    read_cur.execute(
        """
        SELECT filename, shifted FROM buildings.images
        WHERE project=%s AND (done=false OR done IS NULL)
        ORDER BY random()
        LIMIT 1000
    """, (country, ))

    augs = [
        noop,  # leave image unchanged
        partial(rotate, 180),  # flip it upside down
        mirror,  # mirror it
        distort,  # keep dimensions, but distort the color channels
        partial(crop, corner=0),  # crop the top left corner and stretch
        partial(crop, corner=1),  # crop the top right corner and stretch
        partial(crop, corner=2),  # crop the bottom left corner and stretch
        partial(crop, corner=3)  # crop the bottom right corner and stretch
    ]

    if not ensemble:
        augs = [noop]

    TS = datetime.now().isoformat()

    for file, geom in read_cur:
        result = process_file(file, write_cur, augs, model, threshold,
                              ensemble)
        if result:
            features, roff, coff, img_data = result

            geom = wkb.loads(geom, hex=True)
            minlon, minlat, maxlon, maxlat = geom.bounds
            gsd = get_gsd(
                minlat, 18
            )  # images we've gathered are from zoom level 18 on Bing Maps

            # Compute the lat/lon bounds of the image sample
            cropped_geom = box(minlon + coff * gsd, minlat + roff * gsd,
                               minlon + (coff + img_data.shape[1]) * gsd,
                               minlat + (roff + img_data.shape[0]) * gsd)

            features['properties'] = {
                'image': file,
                'roff': roff,
                'coff': coff
            }
            sample_name = os.path.join('generated_training_data', TS,
                                       'sample_%d' % N)
            dump(features, img_data, sample_name, plot=True)
            N -= 1
            tqdm.write(str(N))

        write_cur.execute(
            "UPDATE buildings.images SET done=true WHERE filename=%s AND project=%s",
            (file, country))
        conn.commit()
        if N == 0: return
Example #6
0
def transfer(region, version):
    geom = shape(region['geometry'])
    project = region['properties']['project']
    org_id = region['properties']['organization_id']

    TS = datetime.now()

    with aigh_conn.cursor() as aigh_cur, atlas_conn.cursor() as atlas_cur:
        print('Transfering clusters...')
        aigh_cur.execute(
            """
            SELECT
                version,
                %s as organization_id,
                size as building_count,
                geom,
                ST_Centroid(geom) as centroid,
                site_dist as site_distance
            FROM clusters
            WHERE clusters.version=%s AND ST_Relate(ST_GeomFromText(%s, 4326), geom, '2********')
        """, (org_id, version, geom.wkt))

        atlas_cur.execute(
            """
            DELETE FROM buildings 
            WHERE version=%s AND 
                ST_Relate(ST_GeomFromText(%s, 4326), geom, '2********');
            DELETE FROM building_clusters WHERE organization_id=%s AND version=%s;
            DELETE FROM active_building_clusters WHERE organization_id=%s AND version=%s;
        """, (version, geom.wkt, org_id, version, org_id, version))

        atlas_cur.execute(
            """
            UPDATE active_building_clusters SET active=false WHERE organization_id=%s;
            INSERT INTO active_building_clusters (version, organization_id, active, entered)
            VALUES (%s, %s, true, %s)
        """, (org_id, version, org_id, TS))

        args_str = ','.join(
            atlas_cur.mogrify("(%s,%s,%s,%s,%s,%s)", x) for x in aigh_cur)
        atlas_cur.execute("""
            INSERT INTO building_clusters (
                version, 
                organization_id, 
                building_count, 
                geom, 
                centroid, 
                site_distance
            ) VALUES %s
        """ % args_str)

    with aigh_conn.cursor(
            name='aigh') as aigh_cur, atlas_conn.cursor() as atlas_cur:
        print('Transfering buildings...')
        aigh_cur.execute(
            """
            SELECT geom, %s as version
            FROM buildings.buildings as b
            WHERE version=%s AND ST_Relate(ST_GeomFromText(%s, 4326), geom, '2********')
        """, (version, version, geom.wkt))

        count = 0
        while True:
            rows = aigh_cur.fetchmany(2048)
            if len(rows) == 0:
                break
            args_str = ','.join(atlas_cur.mogrify("(%s,%s)", x) for x in rows)
            atlas_cur.execute(
                "INSERT INTO buildings (geom, version) VALUES %s" % args_str)
            count += len(rows)
            print('Inserted %d rows' % count)
    atlas_conn.commit()
Example #7
0
def cluster(region, version, epsilon):
    '''
    Cluster buildings together for a given region and insert them into the clusters table
    Arguments:
        region : GeoJSON (Feature) - GeoJSON object describing the geometry of the 
            catchment zone for the organization we are clustering for.  The properties
            field must contain a "project" field indicating which project in the 
            bulidings.buildings table that it belongs to.
        version : UUID - Version number of the model used to predict the buildings
        epsilon : float - Minimum distance for a point to be considered part of a cluster
    '''

    geom = shape(region['geometry'])
    project = region['properties']['project']

    srid = get_srid(geom, version)

    with aigh_conn.cursor() as cur:
        # Create the table or empty out any clusters with the same version ID
        cur.execute(
            """
            CREATE TABLE IF NOT EXISTS clusters(
                id serial, 
                project text, 
                size int, 
                geom geometry('geometry', 4326), 
                version uuid
            );
            DELETE FROM clusters WHERE ST_Relate(ST_GeomFromText(%s, 4326), geom, '2********') AND version=%s
        """, (geom.wkt, version))

        print('Clustering buildings...')
        cur.execute(
            """
            INSERT INTO clusters (project, size, geom, version)
            SELECT
                %s as project,
                COUNT(*) as size,
                ST_ConvexHull(ST_Collect(geom)) as geom,
                %s as version
            FROM (
                SELECT
                    ST_ClusterDBSCAN(ST_Transform(geom, %s), eps := %s, minpoints := 3) over () as cid,
                    geom
                FROM buildings.buildings
                WHERE "version"=%s AND ST_Contains(ST_GeomFromText(%s, 4326), geom)
            )clustering
            GROUP BY cid
        """, (project, version, srid, epsilon, version, geom.wkt))

        print('Computing nearest fixtures for each cluster...')
        cur.execute(
            """
            UPDATE clusters SET site_dist=dist FROM(
                SELECT DISTINCT ON (clusters.id)
                    clusters.id as cluster_id,
                    ST_Distance(clusters.geom::geography, fixtures.geom::geography) as dist
                FROM clusters, fixtures
                WHERE clusters.version=%s AND ST_Relate(ST_GeomFromText(%s, 4326), geom, '2********')
                ORDER BY clusters.id, dist
            )q WHERE id=cluster_id;
        """, (version, geom.wkt))

    aigh_conn.commit()