Ejemplo n.º 1
0
def upload_model(args, summary):
    with aigh_conn.cursor() as cur:
        cur.execute("SELECT MAX(f1) FROM models WHERE name=%s", (summary['name'],))
        best_score, = cur.fetchone()
        if best_score and best_score > summary['f1']:
            print('Not uploading because, a model already exists with f1 %f' % best_score)
            return

    zip_file = args.model_class.zip_weights(args.weights, base_dir='weights')
    s3 = boto3.resource('s3')
    key = os.path.join('building-detection', os.path.basename(zip_file))

    qargs = {k : summary[k] for k in ['name', 'id', 'precision', 'recall', 'threshold', 'f1']}
    qargs['instance'] = '/'.join(args.weights.split('/')[-2:])
    qargs['s3_loc'] = os.path.join('s3://aigh-deep-learning-models/', key)

    conn = psycopg2.connect(
        dbname=os.environ.get('PGDATABASE', 'aigh'),
        user=os.environ.get('PGUSER', ''),
        password=os.environ.get('PGPASSWORD', ''),
        host=os.environ.get('PGHOST', '')
    )

    with aigh_conn.cursor() as cur:
        cur.execute("""
            INSERT INTO models(name, instance, id, tested_on, precision, recall, threshold, s3_loc, f1)
            VALUES (%(name)s, %(instance)s, %(id)s, now(), %(precision)s, %(recall)s, %(threshold)s, %(s3_loc)s, %(f1)s)
        """, qargs)

        json.dump(summary, open('.summary.json', 'w'))
        with ZipFile(zip_file, 'a') as z:
            z.write('.summary.json', '%s/description.json' % qargs['id'])

        s3.meta.client.upload_file(zip_file, 'aigh-deep-learning-models', key)
        aigh_conn.commit()
Ejemplo n.º 2
0
def generate_samples(model, country, threshold, N, ensemble=False):
    '''
    Main function to generate training samples
    '''
    read_cur, write_cur = conn.cursor(), conn.cursor()
    read_cur.execute(
        """
        SELECT filename, shifted FROM buildings.images
        WHERE project=%s AND (done=false OR done IS NULL)
        ORDER BY random()
        LIMIT 1000
    """, (country, ))

    augs = [
        noop,  # leave image unchanged
        partial(rotate, 180),  # flip it upside down
        mirror,  # mirror it
        distort,  # keep dimensions, but distort the color channels
        partial(crop, corner=0),  # crop the top left corner and stretch
        partial(crop, corner=1),  # crop the top right corner and stretch
        partial(crop, corner=2),  # crop the bottom left corner and stretch
        partial(crop, corner=3)  # crop the bottom right corner and stretch
    ]

    if not ensemble:
        augs = [noop]

    TS = datetime.now().isoformat()

    for file, geom in read_cur:
        result = process_file(file, write_cur, augs, model, threshold,
                              ensemble)
        if result:
            features, roff, coff, img_data = result

            geom = wkb.loads(geom, hex=True)
            minlon, minlat, maxlon, maxlat = geom.bounds
            gsd = get_gsd(
                minlat, 18
            )  # images we've gathered are from zoom level 18 on Bing Maps

            # Compute the lat/lon bounds of the image sample
            cropped_geom = box(minlon + coff * gsd, minlat + roff * gsd,
                               minlon + (coff + img_data.shape[1]) * gsd,
                               minlat + (roff + img_data.shape[0]) * gsd)

            features['properties'] = {
                'image': file,
                'roff': roff,
                'coff': coff
            }
            sample_name = os.path.join('generated_training_data', TS,
                                       'sample_%d' % N)
            dump(features, img_data, sample_name, plot=True)
            N -= 1
            tqdm.write(str(N))

        write_cur.execute(
            "UPDATE buildings.images SET done=true WHERE filename=%s AND project=%s",
            (file, country))
        conn.commit()
        if N == 0: return
Ejemplo n.º 3
0
def cluster(region, version, epsilon):
    '''
    Cluster buildings together for a given region and insert them into the clusters table
    Arguments:
        region : GeoJSON (Feature) - GeoJSON object describing the geometry of the 
            catchment zone for the organization we are clustering for.  The properties
            field must contain a "project" field indicating which project in the 
            bulidings.buildings table that it belongs to.
        version : UUID - Version number of the model used to predict the buildings
        epsilon : float - Minimum distance for a point to be considered part of a cluster
    '''

    geom = shape(region['geometry'])
    project = region['properties']['project']

    srid = get_srid(geom, version)

    with aigh_conn.cursor() as cur:
        # Create the table or empty out any clusters with the same version ID
        cur.execute(
            """
            CREATE TABLE IF NOT EXISTS clusters(
                id serial, 
                project text, 
                size int, 
                geom geometry('geometry', 4326), 
                version uuid
            );
            DELETE FROM clusters WHERE ST_Relate(ST_GeomFromText(%s, 4326), geom, '2********') AND version=%s
        """, (geom.wkt, version))

        print('Clustering buildings...')
        cur.execute(
            """
            INSERT INTO clusters (project, size, geom, version)
            SELECT
                %s as project,
                COUNT(*) as size,
                ST_ConvexHull(ST_Collect(geom)) as geom,
                %s as version
            FROM (
                SELECT
                    ST_ClusterDBSCAN(ST_Transform(geom, %s), eps := %s, minpoints := 3) over () as cid,
                    geom
                FROM buildings.buildings
                WHERE "version"=%s AND ST_Contains(ST_GeomFromText(%s, 4326), geom)
            )clustering
            GROUP BY cid
        """, (project, version, srid, epsilon, version, geom.wkt))

        print('Computing nearest fixtures for each cluster...')
        cur.execute(
            """
            UPDATE clusters SET site_dist=dist FROM(
                SELECT DISTINCT ON (clusters.id)
                    clusters.id as cluster_id,
                    ST_Distance(clusters.geom::geography, fixtures.geom::geography) as dist
                FROM clusters, fixtures
                WHERE clusters.version=%s AND ST_Relate(ST_GeomFromText(%s, 4326), geom, '2********')
                ORDER BY clusters.id, dist
            )q WHERE id=cluster_id;
        """, (version, geom.wkt))

    aigh_conn.commit()