예제 #1
0
def extract(reference, selection, oid, srid):

    print(f"Running postgisS2Extract for {reference}")

    parcelcountsql = """
        SELECT count(es.ogc_fid)
        FROM {} es, dias_catalogue dias, {} aoi
        WHERE es.wkb_geometry && st_transform(dias.footprint, {})
        And es.wkb_geometry && st_transform(st_buffer(aoi.wkb_geometry::geography,
            1000)::geometry, {})
        And st_area(es.wkb_geometry) > 3000.0
        And aoi.{} = '{}' and dias.id = {}
        """
    updateSql = """
    UPDATE dias_catalogue set status='{}'
    WHERE id = {} and status = '{}'"""

    incurs = inconn.cursor()

    incurs.execute(
        parcelcountsql.format(dbconfig['tables']['parcel_table'],
                              dbconfig['tables']['aoi_table'], srid, srid,
                              dbconfig['args']['aoi_field'],
                              dbconfig['args']['name'], oid,
                              dbconfig['tables']['results_table'], oid))

    nrecs = incurs.fetchone()

    # If no parcels inside, we can stop
    if nrecs[0] == 0:
        print("Image {} contains no parcels (FATAL)".format(reference))
        incurs.execute(updateSql.format('no_parcels', oid, 'inprogress'))
        inconn.commit()
        incurs.close()
        inconn.close()
        #sys.exit(1)

    # Copy input data from S3 to local disk
    # SOBLOO
    # rootpath = '{}/L1C'.format(reference.split('_')[0])
    # CREODIAS
    rootpath = 'Sentinel-2/MSI/L2A'

    obstime = reference.split('_')[2][0:8]
    obs_path = "{}/{}/{}".format(obstime[0:4], obstime[4:6], obstime[6:8])

    # There was an issue with the manifest.safe sometime during 2018, and we need
    #   to check the GRANULE directory to understand where image data is located
    # CREODIAS
    s3path = "{}/{}/{}/GRANULE/".format(rootpath, obs_path, reference)
    # SOBLOO
    # s3path = "{}/{}/{}.SAFE/GRANULE/".format(rootpath, reference, reference.replace('MSIL1C', 'MSIL2A'))

    flist = dwb.listFileFromS3(s3path)
    # print(flist)

    if not flist:
        print("Resource {} not available in S3 storage (FATAL)".format(s3path))
        incurs.execute(updateSql.format('S2_nopath', oid, 'inprogress'))
        inconn.commit()
        incurs.close()
        inconn.close()
        #sys.exit(1)

    # We want 3 image files only, e.g. to create NDVI and have some idea about local image quality
    # SOBLOO does not produce 10 m L2A bands and only B8A (not B08)!
    s3subdir = flist[1].replace(s3path, '').split('/')[0]

    print(s3path)
    print(flist[1])
    print(s3subdir)

    file_set = {}

    for k in selection.keys():
        s = selection.get(k)
        fpath = "data/{}".format(s.split('/')[-1])
        alt_s = s.replace('0m/', '0m/L2A_')

        if dwb.getFileFromS3('{}{}/IMG_DATA/{}'.format(s3path, s3subdir, s),
                             fpath) == 1:
            print("Image {} found in bucket".format(s))
            file_set[k] = fpath
        elif dwb.getFileFromS3(
                '{}{}/IMG_DATA/{}'.format(s3path, s3subdir, alt_s),
                fpath) == 1:
            # LEVEL2AP has another naming convention.
            print("Image {} found in bucket".format(alt_s))
            file_set[k] = fpath
        else:
            print("Neither Image {} nor {} found in bucket".format(s, alt_s))
            incurs.execute(
                updateSql.format('{} notfound'.format(k), oid, 'inprogress'))
            inconn.commit()
            incurs.close()
            inconn.close()
            return
            #sys.exit(1)

    # Get the parcel polygon in this image' footprint
    #print(file_set)

    mgrs_tile = reference.split('_')[5]

    outsrid = int('326{}'.format(mgrs_tile[1:3]))

    incurs.close()

    outconn = psycopg2.connect(connString)
    if not outconn:
        print("No out connection established")
        #sys.exit(1)

    # Open a named cursor
    incurs = inconn.cursor(name='fetch_image_coverage',
                           cursor_factory=psycopg2.extras.DictCursor)

    parcelsql = """
        SELECT es.ogc_fid, ST_AsGeoJSON(st_transform(es.wkb_geometry, {}))::json
        FROM {} es, dias_catalogue dias, {} aoi
        WHERE es.wkb_geometry && st_transform(dias.footprint, {})
        And es.wkb_geometry && st_transform(st_buffer(aoi.wkb_geometry::geography,
            1000)::geometry, {})
        And st_area(es.wkb_geometry) > 3000.0
        And aoi.{} = '{}' and dias.id = {}
        -- and es.ogc_fid not in (select distinct pid from {} where obsid = {})
        """

    incurs.execute(
        parcelsql.format(outsrid, dbconfig['tables']['parcel_table'],
                         dbconfig['tables']['aoi_table'], srid, srid,
                         dbconfig['args']['aoi_field'],
                         dbconfig['args']['name'], oid,
                         dbconfig['tables']['results_table'], oid))

    sqlload = time.time() - start
    print("Images loaded and {} features selected from database in {} seconds".
          format(nrecs[0], sqlload))

    nrows = {}
    for k in file_set.keys():
        nrows[k] = 0

    affine = {}
    array = {}

    bands = file_set.keys()

    for b in bands:
        with rasterio.open(file_set.get(b)) as src:
            affine[b] = src.transform
            array[b] = src.read(1)

    while True:
        rowset = incurs.fetchmany(size=2000)

        if not rowset:
            break

        features = {
            "type":
            "FeatureCollection",
            "features": [{
                "type": "feature",
                "geometry": f[1],
                "properties": {
                    "pid": int(f[0])
                }
            } for f in rowset]
        }

        for b in bands:

            zs = zonal_stats(features,
                             array[b],
                             affine=affine[b],
                             stats=[
                                 "count", "mean", "std", "min", "max",
                                 "percentile_25", "percentile_50",
                                 "percentile_75"
                             ],
                             prefix="",
                             nodata=0,
                             geojson_out=True)

            df = pd.DataFrame(zs)

            df = pd.DataFrame.from_dict(df.properties.to_dict(),
                                        orient='index')

            df['obsid'] = oid
            df['band'] = b

            df.rename(index=str,
                      columns={
                          "percentile_25": "p25",
                          "percentile_50": "p50",
                          "percentile_75": "p75"
                      },
                      inplace=True)

            nrows[b] = nrows[b] + len(df)
            # df is the dataframe
            if len(df) > 0:
                df.dropna(inplace=True)
                if len(df.values) > 0:
                    df_columns = list(df)
                    s_buf = io.StringIO()
                    df.to_csv(s_buf, header=False, index=False, sep=',')
                    s_buf.seek(0)
                    outcurs = outconn.cursor()
                    # print(tuple(df_columns))
                    try:
                        #psycopg2.extras.execute_batch(outcurs, insert_stmt, df.values)
                        outcurs.copy_from(s_buf,
                                          dbconfig['tables']['results_table'],
                                          columns=tuple(df_columns),
                                          sep=',')
                        outconn.commit()
                    except psycopg2.IntegrityError:
                        print("insert statement {} contains duplicate index")
                    # except Error as e:
                    #    print(e)
                    finally:
                        outcurs.close()
                else:
                    print("No valid data in block {}".format(nrows[b]))

    outconn.close()

    incurs.close()

    incurs = inconn.cursor()

    updateStatusSql = """update dias_catalogue set status='{}', {} = current_timestamp where id = {} and status = '{}'"""

    try:
        incurs.execute(
            updateStatusSql.format('extracted', 'time_extracted', oid,
                                   'inprogress'))
        inconn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        inconn.close()
        if outconn:
            outconn.close()

    incurs.close()
    inconn.close()
    '''
    if os.path.exists(fpath):
        print("Removing {}".format(fpath))
        os.remove(fpath)
    
    for f in file_set.keys():
        if os.path.exists(file_set.get(f)):
            print("Removing {}".format(file_set.get(f)))
            os.remove(file_set.get(f))
    '''
    print(
        "Total time required for {} features and {} bands: {} seconds".format(
            nrows.get('B8'), len(bands),
            time.time() - start))
예제 #2
0
print(flist[1])
print(s3subdir)

selection = {'B4': '{}/{}_{}_{}_{}.jp2'.format('R10m', mgrs_tile, full_tstamp, 'B04', '10m'),
             'B8': '{}/{}_{}_{}_{}.jp2'.format('R10m', mgrs_tile, full_tstamp, 'B08', '10m'),
             'SC': '{}/{}_{}_{}_{}.jp2'.format('R20m', mgrs_tile, full_tstamp, 'SCL', '20m')
             }

file_set = {}

for k in selection.keys():
    s = selection.get(k)
    fpath = "data/{}".format(s.split('/')[-1])
    alt_s = s.replace('0m/', '0m/L2A_')

    if dwb.getFileFromS3('{}{}/IMG_DATA/{}'.format(s3path, s3subdir, s), fpath) == 1:
        print("Image {} found in bucket".format(s))
        file_set[k] = fpath
    elif dwb.getFileFromS3('{}{}/IMG_DATA/{}'.format(s3path, s3subdir, alt_s), fpath) == 1:
        # LEVEL2AP has another naming convention.
        print("Image {} found in bucket".format(alt_s))
        file_set[k] = fpath
    else:
        print("Neither Image {} nor {} found in bucket".format(s, alt_s))
        incurs.execute(updateSql.format(
            '{} notfound'.format(k), oid, 'inprogress'))
        inconn.commit()
        incurs.close()
        inconn.close()
        sys.exit(1)
예제 #3
0
    sys.exit(1)

# Copy input data from S3 to local disk

# CREODIAS:
s3path = "Sentinel-1/SAR/CARD-BS/{}/{}/{}.data/Gamma0_VV.img".format(
    datetime.strftime(obstime, '%Y/%m/%d'), reference, reference)
print("Transferring {}".format(s3path))

# SOBLOO:
# s3path = "{}/GRD/{}/{}.data/Gamma0_VV.img".format(reference.split('_')[0], reference, reference)
fpath = 'data/{}_VV.img'.format(reference)

outsrid = -1

if dwb.getFileFromS3(s3path, fpath) == 0:
    print("Resource {} not available in S3 storage (FATAL)".format(s3path))
    incurs.execute(updateSql.format('No S3 VV img', oid, 'inprogress'))
    inconn.commit()
    incurs.close()
    inconn.close()
    sys.exit(1)

s3path = s3path.replace('.img', '.hdr')
fpath = 'data/{}_VV.hdr'.format(reference)

if dwb.getFileFromS3(s3path, fpath) == 0:
    print("Resource {} not available in S3 storage (FATAL)".format(s3path))
    incurs.execute(updateSql.format('No S3 VV hdr', oid, 'inprogress'))
    inconn.commit()
    incurs.close()
예제 #4
0
def extract(files):
    start = time.time()
    
    reference = files[0].replace('data/', '').replace('_VV.img', '')
    
    # Rev 1.1. configuration parsing from json
    with open('s3_config.json', 'r') as f:
        s3config = json.load(f)
    s3config = s3config['s3']
    
    with open('db_config_s1.json', 'r') as f:
        dbconfig = json.load(f)
    dbconfig = dbconfig['database']
    
    # Input data base is postgis
    connString = "host={} dbname={} user={} port={} password={}".format(
        dbconfig['connection']['host'], dbconfig['connection']['dbname'],
        dbconfig['connection']['dbuser'], dbconfig['connection']['port'],
        dbconfig['connection']['dbpasswd'])
    
    inconn = psycopg2.connect(connString)
    if not inconn:
        print("No in connection established")
        #sys.exit(1)
    
    incurs = inconn.cursor()
    
    srid = -1
    
    sridSql = "select srid from geometry_columns where f_table_name = '{}';"
    
    try:
        incurs.execute(sridSql.format(dbconfig['tables']['parcel_table']))
        result = incurs.fetchone()
        if not result:
            print("{} does not exist or is not a spatial table")
        else:
            srid = result[0]
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        inconn.close()
        #sys.exit(1)
    
    #print("Parcel srid = ", srid)
    
    # Get the first image records that is not yet processed
    imagesql = """
        SELECT id, reference, obstime from dias_catalogue, {}
        WHERE footprint && wkb_geometry and {} = '{}'
        And obstime between '{}' and '{}'
        And reference = '{}'
        And card='bs' order by id desc limit 1
        """
    
    updateSql = """
        UPDATE dias_catalogue set status='{}'
        WHERE id = {} And status = '{}'
        """
    updateStatusSql = """update dias_catalogue set status='{}', {} = current_timestamp where id = {} and status = '{}'"""
    
    try:
        incurs.execute(imagesql.format(
            dbconfig['tables']['aoi_table'],
            dbconfig['args']['aoi_field'], dbconfig['args']['name'],
            dbconfig['args']['startdate'], dbconfig['args']['enddate'], reference))
        result = incurs.fetchone()
        if not result:
            print("No images with reference found")
            inconn.close()
            #sys.exit(1)
        else:
            oid = result[0]
            reference = result[1]
            obstime = result[2]

    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        inconn.close()
        #sys.exit(1)
    
    print(f"Running postgisS1Extract for {reference}")
    
    # Count parcels inside this image footprint
    parcelcountsql = """
        SELECT count(es.ogc_fid)
        FROM {} es, dias_catalogue dias, {} aoi
        WHERE es.wkb_geometry && st_transform(dias.footprint, {})
        And es.wkb_geometry && st_transform(st_buffer(aoi.wkb_geometry::geography,
            1000)::geometry, {})
        And st_area(es.wkb_geometry) > 3000.0
        And aoi.{} = '{}' and dias.id = {}
        -- and es.ogc_fid not in (select distinct pid from {} where obsid = {})
        """
    
    incurs.execute(parcelcountsql.format(
        dbconfig['tables']['parcel_table'],
        dbconfig['tables']['aoi_table'], srid, srid,
        dbconfig['args']['aoi_field'], dbconfig['args']['name'],
        oid, dbconfig['tables']['results_table'], oid))
    
    nrecs = incurs.fetchone()
    
    # If no parcels inside, we can stop
    if nrecs[0] == 0:
        print("Image {} contains no parcels".format(reference))
        incurs.execute(updateSql.format('No_parcels', oid, 'inprogress'))
        inconn.commit()
        incurs.close()
        inconn.close()
        #sys.exit(1)
    
    
    # Copy input data from S3 to local disk
    
    # CREODIAS:
    s3path = "Sentinel-1/SAR/CARD-BS/{}/{}/{}.data/Gamma0_VV.img".format(
        datetime.strftime(obstime, '%Y/%m/%d'), reference, reference)
    print("Transferring {}".format(s3path))
    
    # SOBLOO:
    # s3path = "{}/GRD/{}/{}.data/Gamma0_VV.img".format(reference.split('_')[0], reference, reference)
    fpath = 'data/{}_VV.img'.format(reference)
    
    outsrid = -1
    
    if dwb.getFileFromS3(s3path, fpath) == 0:
        print("Resource {} not available in S3 storage (FATAL)".format(s3path))
        incurs.execute(updateSql.format('No S3 VV img', oid, 'inprogress'))
        inconn.commit()
        incurs.close()
        inconn.close()
        return
        #sys.exit(1)
    
    s3path = s3path.replace('.img', '.hdr')
    fpath = 'data/{}_VV.hdr'.format(reference)
    
    if dwb.getFileFromS3(s3path, fpath) == 0:
        print("Resource {} not available in S3 storage (FATAL)".format(s3path))
        incurs.execute(updateSql.format('No S3 VV hdr', oid, 'inprogress'))
        inconn.commit()
        incurs.close()
        inconn.close()
        return
        #sys.exit(1)
    else:
        # Only if the header file is present can we open the image to check its projection
        with rasterio.open(fpath.replace('hdr', 'img')) as src:
            outsrid = src.crs.to_epsg()
    
    #print('Out SRID: ', outsrid)
    
    # CREODIAS:
    s3path = "Sentinel-1/SAR/CARD-BS/{}/{}/{}.data/Gamma0_VH.img".format(
        datetime.strftime(obstime, '%Y/%m/%d'), reference, reference)
    print("Transferring {}".format(s3path))
    # SOBLOO:
    # s3path = "{}/GRD/{}/{}.data/Gamma0_VH.img".format(reference.split('_')[0], reference, reference)
    fpath = 'data/{}_VH.img'.format(reference)
    
    if dwb.getFileFromS3(s3path, fpath) == 0:
        print("Resource {} not available in S3 storage (FATAL)".format(s3path))
        incurs.execute(updateSql.format('No S3 VH img', oid, 'inprogress'))
        inconn.commit()
        incurs.close()
        inconn.close()
        return
        #sys.exit(1)
    
    s3path = s3path.replace('.img', '.hdr')
    fpath = 'data/{}_VH.hdr'.format(reference)
    
    if dwb.getFileFromS3(s3path, fpath) == 0:
        print("Resource {} not available in S3 storage (FATAL)".format(s3path))
        incurs.execute(updateSql.format('No S3 VH hdr', oid, 'inprogress'))
        inconn.commit()
        incurs.close()
        inconn.close()
        return
        #sys.exit(1)
    
    
    # Open a connection to save results
    outconn = psycopg2.connect(connString)
    if not outconn:
        print("No out connection established")
        incurs.execute(updateSql.format('no_out_conn', oid, 'inprogress'))
        inconn.commit()
        incurs.close()
        inconn.close()
        #sys.exit(1)
    
    # Get the parcel polygon in this image' footprint
    
    incurs.close()
    # Open a named cursor
    incurs = inconn.cursor(name='fetch_image_coverage',
                           cursor_factory=psycopg2.extras.DictCursor)
    
    parcelsql = """
        SELECT es.ogc_fid, ST_AsGeoJSON(st_transform(es.wkb_geometry, {}))::json
        FROM {} es, dias_catalogue dias, {} aoi
        WHERE es.wkb_geometry && st_transform(dias.footprint, {})
        And es.wkb_geometry && st_transform(st_buffer(aoi.wkb_geometry::geography,
            1000)::geometry, {})
        And st_area(es.wkb_geometry) > 3000.0
        And aoi.{} = '{}'
        And dias.id = {}
        -- and es.ogc_fid not in (select distinct pid from {} where obsid = {})
        """
    
    incurs.execute(parcelsql.format(
        outsrid, dbconfig['tables']['parcel_table'],
        dbconfig['tables']['aoi_table'], srid, srid,
        dbconfig['args']['aoi_field'], dbconfig['args']['name'],
        oid, dbconfig['tables']['results_table'], oid))
    
    sqlload = time.time() - start
    print("Images loaded and {} features selected from database in {} seconds".format(
        nrecs[0], sqlload))
    
    nrows = {}
    nrows['VV'] = 0
    nrows['VH'] = 0
    
    affine = {}
    array = {}
    
    bands = ['VV', 'VH']
    
    for b in bands:
        with rasterio.open('data/{}_{}.img'.format(reference, b)) as src:
            affine[b] = src.transform
            array[b] = src.read(1)
    
    while True:
        rowset = incurs.fetchmany(size=2000)
    
        if not rowset:
            break
    
        features = {"type": "FeatureCollection",
                    "features": [{"type": "feature", "geometry": f[1],
                                  "properties": {"pid": int(f[0])}} for f in rowset]}
    
        for b in bands:
    
            zs = zonal_stats(
                features, array[b], affine=affine[b], stats=[
                    "count", "mean", "std", "min", "max", "percentile_25",
                    "percentile_50", "percentile_75"],
                prefix="", nodata=0, geojson_out=True)
    
            df = pd.DataFrame(zs)
    
            df = pd.DataFrame.from_dict(df.properties.to_dict(), orient='index')
    
            df['obsid'] = oid
            df['band'] = b
    
            df.rename(index=str, columns={
                      "percentile_25": "p25", "percentile_50": "p50",
                      "percentile_75": "p75"}, inplace=True)
    
            nrows[b] = nrows[b] + len(df)
            # df is the dataframe
            if len(df) > 0:
                df.dropna(inplace=True)
                if len(df.values) > 0:
                    df_columns = list(df)
                    s_buf = io.StringIO()
                    df.to_csv(s_buf, header=False, index=False, sep=',')
                    s_buf.seek(0)
                    outcurs = outconn.cursor()
                    # print(tuple(df_columns))
                    try:
                        # psycopg2.extras.execute_batch(outcurs, insert_stmt, df.values)
                        outcurs.copy_from(
                            s_buf, dbconfig['tables']['results_table'],
                            columns=tuple(df_columns), sep=',')
                        outconn.commit()
                    except psycopg2.IntegrityError:
                        print("insert statement {insert_stmt} contains duplicate index")
                    # except Error as e:
                    #    print(e)
                    finally:
                        outcurs.close()
                else:
                    print("No valid data in block {}".format(nrows[b]))
    
    outconn.close()
    
    incurs.close()
    
    incurs = inconn.cursor()
    
    updateStatusSql = """update dias_catalogue set status='{}', {} = current_timestamp where id = {} and status = '{}'"""
    
    try:
        incurs.execute(updateStatusSql.format('extracted', 'time_extracted', oid, 'inprogress'))
        inconn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        inconn.close()
        if outconn:
            outconn.close()
    
    incurs.close()
    inconn.close()

    print("Total time required for {} features and {} bands: {} seconds".format(
        nrows['VV'], len(bands), time.time() - start))