Example #1
0
 def shardsWithinPolygon(self, dataSet, minT, maxT, extentFilter, maskFilters=[], xCol = "x", yCol="y"):
     
     if isinstance(maskFilters, MaskFilter):
         maskFilters = [maskFilters]
     
     gcs = self.asyncQuery.filterShards(dataSet.parentDataSet, dataSet.dataSet, dataSet.region, 0.0, 0.0, 0.0, 0.0, minT, maxT,xCol,yCol, extentFilter, maskFilters)
    
     gcs = [ json.loads(gc) for gc in gcs ]
     
     return [Shard(BoundingBox( gc['minX'], gc['maxX'], gc['minY'], gc['maxY'], gc['minT'], gc['maxT'], gc['numberOfPoints']), gc['shardName']) for gc in gcs ]
Example #2
0
 def gridCellsWithinPolygon(self, dataSet, minT, maxT, extentFilter, maskFilters=[], xCol = "x", yCol="y"):
     
     if isinstance(maskFilters, MaskFilter):
         maskFilters = [maskFilters]
     
     gcs = self.asyncQuery.filterGridCells(dataSet.parentDataSet, dataSet.dataSet, dataSet.region, 0.0, 0.0, 0.0, 0.0, minT, maxT,xCol,yCol, extentFilter, maskFilters)
    
     gcs = [ json.loads(gc) for gc in gcs ]
     
     return [BoundingBox( gc['gridCellMinX'], gc['gridCellMaxX'], gc['gridCellMinY'], gc['gridCellMaxY'], gc['minTime'], gc['maxTime'], gc['totalPoints'] ) for gc in gcs ]
Example #3
0
    def timeseriesFromList(self,
                           gridcells,
                           startdate,
                           enddate,
                           interval=3,
                           minT=None,
                           maxT=None,
                           save=True,
                           weighted=None):

        dfStats = pd.DataFrame(gridcells)

        if minT is None and maxT is None:
            bbx = self.client.boundingBox(self.inputDataSet)
            minT = bbx.minT
            maxT = bbx.maxT

        for idx, line in dfStats.iterrows():

            self.logger.info(
                "Calculating gridcell minX=%s maxX=%s minY=%s maxY=%s minT=%s maxT=%s ..."
                % (line['minX'], line['maxX'], line['minY'], line['maxY'],
                   minT, maxT))
            bbx_in = BoundingBox(line['minX'].item(), line['maxX'].item(),
                                 line['minY'].item(), line['maxY'].item(),
                                 minT, maxT)

            results = self.gridcellTimeseries(bbx_in,
                                              startdate,
                                              enddate,
                                              interval,
                                              weighted=weighted)

            self.logger.info("Adding timesereis results to stats...")
            for key in results:
                if isinstance(results[key], list):
                    if not np.isin(key, dfStats.columns):
                        newColumn = [key]
                        #
                        dfStats = dfStats.reindex(columns=np.append(
                            dfStats.columns.values, newColumn))
                        dfStats[[key]] = dfStats[[key]].astype('object',
                                                               inplace=True)
                        dfStats.at[idx, key] = results[key]
                    else:
                        dfStats.at[idx, key] = results[key]

        if save:
            file = os.path.join(self.config("outputPath"),
                                self.config("outputFileName"))
            self.logger.info("Saving results under file=%s" % file)
            dfStats.to_json(file)

        return dfStats
Example #4
0
 def boundingBox(self, dataSet ):
     
     bbox = json.loads(self.query.getDataSetBoundingBox( dataSet.parentDataSet, dataSet.dataSet, dataSet.region ))
     
     #Setup the bounding box
     minX = bbox['gridCellMinX']
     maxX = bbox['gridCellMaxX']
     minY = bbox['gridCellMinY']
     maxY = bbox['gridCellMaxY']
     minT = datetime.fromtimestamp( bbox['minTime'] )
     maxT = datetime.fromtimestamp( bbox['maxTime'] )
     numberOfPoints = bbox['totalPoints']
     return BoundingBox( minX, maxX, minY, maxY, minT, maxT, numberOfPoints )
Example #5
0
 def shards(self, dataSet, boundingBox, xCol = "x", yCol="y", maskFilters =[] ):
     
     if isinstance(maskFilters, MaskFilter):
         maskFilters = [maskFilters]
     
     bb = boundingBox
     gcs = []
     if len(maskFilters) == 0:
         gcs = json.loads(self.query.getShards(dataSet.parentDataSet, dataSet.dataSet, dataSet.region, bb.minX, bb.maxX, bb.minY, bb.maxY, bb.minT, bb.maxT,xCol,yCol))
     else:
         gcs = self.asyncQuery.filterShards(dataSet.parentDataSet, dataSet.dataSet, dataSet.region, bb.minX, bb.maxX, bb.minY, bb.maxY, bb.minT, bb.maxT,xCol,yCol, MaskFilter(), maskFilters)
         gcs = [json.loads(gc) for gc in gcs]
     
     return [Shard(BoundingBox( gc['minX'], gc['maxX'], gc['minY'], gc['maxY'], gc['minT'], gc['maxT'], gc['numberOfPoints']), gc['shardName']) for gc in gcs ]
Example #6
0
    gridCells = client.gridCells(inputDs, bb)

    minT = datetime.datetime(2011, 3, 1, 0, 0, 0)
    maxT = datetime.datetime(2011, 3, 31, 23, 59, 59)

    mask = '/data/puma1/scratch/cryotempo/masks/ice.shp'

    tmp = gp.read_file(mask)

    print(tmp)

    for i, gc in enumerate(gridCells):
        #logging.log('Processing GC {} Total {}'.format(i,len(gridCells)))
        resultInfo = client.executeQuery(
            inputDs, BoundingBox(gc.minX, gc.maxX, gc.minY, gc.maxY, minT,
                                 maxT))
        if resultInfo.status == "Success":
            df = resultInfo.to_df
            #print("MinLon {} MinLat {}".format(df['lon'].min(), df['lat'].min()))
            ds = PointDataSet(df, proj4)
            client.releaseCacheHandle(resultInfo.resultFileName)
            geoDs = ds.asGeoDataSet()
            geoDs.withinMask(mask, 'Glacier')

    #fp = '/data/puma1/scratch/v2/malard/export/mtngla_tdx_1556569735.nc'
    #dataSet = 'tdx'
    #projection = "+proj=aea +lat_1=25 +lat_2=47 +lat_0=36 +lon_0=85 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"

    #glacierMask = '/data/puma1/scratch/malard/mask/mtngla/static/RGIv60/Glacier/HMA/cell_x400000_y0_s100000/mask_Glacier_x400000_y0_s100000.gpkg'
    #debrisMask = '/data/puma1/scratch/malard/mask/mtngla/static/SDCv10/Debris/HMA/cell_x400000_y0_s100000/mask_Debris_x400000_y0_s100000.gpkg'
    #minX = 400000
Example #7
0
Created on Wed Nov 20 09:33:33 2019

@author: jon
"""

from MalardClient.MalardClient import MalardClient
from MalardClient.DataSet import DataSet
from MalardClient.BoundingBox import BoundingBox

client = MalardClient()

ds = DataSet("cryotempo","poca","greenland" )

dsSwath = DataSet("cryotempo","GRIS_BaselineC_Q2","greenland" )

bb = client.boundingBox(ds)

gcs = client.gridCells(ds, bb)

minX=-1600000
maxX=-1500000
minY=-2600000
maxY=-2500000
minT=1298912551
maxT=1298912551

bb = BoundingBox( minX, maxX, minY, maxY, minT, maxT )

resPoca = client.executeQuery( ds, bb )

resSwath = client.executeQuery( dsSwath, bb )
Example #8
0
def main(pub_month, pub_year, loadConfig):

    region = loadConfig["region"]
    parentDataSet = loadConfig["parentDataSet"]
    uncertainty_threshold = loadConfig[
        "uncertainty_threshold"] if "uncertainty_threshold" in loadConfig else None
    powerdB = loadConfig["powerdB"]
    coh = loadConfig["coh"]
    dataSetName = loadConfig["resultsetName"]

    pocaParentDataSet = loadConfig["pocaParentDataSet"]
    pocaDataSetName = loadConfig["pocaDataSet"]
    pocaDemDiff = loadConfig["pocaDemDiff"]
    output_path = os.path.join(loadConfig["resultPath"], "pointProduct")
    ensure_dir(output_path)

    malardEnv = loadConfig["MalardEnvironment"]

    client = MalardClient(malardEnv)

    uncDatasetName = "{}_unc".format(
        dataSetName) if uncertainty_threshold is not None else dataSetName
    uncDataSet = DataSet(parentDataSet, uncDatasetName, region)
    dataSet = DataSet(parentDataSet, dataSetName, region)

    pocaDataSet = DataSet(pocaParentDataSet, pocaDataSetName, region)
    pocaDataSet_noDemDiff = DataSet(pocaParentDataSet,
                                    pocaDataSetName.replace("_demDiff", ""),
                                    region)

    projections = [
        'x', 'y', 'time', 'elev', 'powerdB', 'coh', 'demDiff', 'demDiffMad',
        'swathFileId', 'Q_uStd'
    ]
    filters = [{
        'column': 'Q_uStd',
        'op': 'lte',
        'threshold': uncertainty_threshold
    }, {
        'column': 'powerdB',
        'op': 'gte',
        'threshold': powerdB
    }, {
        'column': 'coh',
        'op': 'gte',
        'threshold': coh
    }, {
        'column': 'inRegionMask',
        'op': 'eq',
        'threshold': 1.0
    }]
    filters_poca = [{
        "column": "demDiff",
        "op": "lte",
        "threshold": pocaDemDiff
    }, {
        "column": "demDiff",
        "op": "gte",
        "threshold": -pocaDemDiff
    }, {
        'column': 'inRegionMask',
        'op': 'eq',
        'threshold': 1.0
    }]

    from_dt = datetime(pub_year, pub_month, 1, 0, 0, 0)
    to_dt = from_dt + relativedelta(months=1) - timedelta(seconds=1)

    bb = client.boundingBox(uncDataSet)
    gridcells = client.gridCells(
        uncDataSet,
        BoundingBox(bb.minX, bb.maxX, bb.minY, bb.maxY, from_dt, to_dt))

    proj4 = client.getProjection(uncDataSet).proj4

    print("Number of Gridcells found to process {}".format(len(gridcells)))
    process_start = datetime.now()

    print("MinT={} MaxT={}".format(from_dt, to_dt))
    #Create a shapefile index for each month
    index = s.ShapeFileIndex(output_path, "THEM_POINT", proj4,
                             uncDataSet.region, from_dt)

    for i, gc in enumerate(gridcells):
        gc_start = datetime.now()
        month_gc = BoundingBox(gc.minX, gc.maxX, gc.minY, gc.maxY, from_dt,
                               to_dt)
        queryInfo = client.executeQuery(uncDataSet,
                                        month_gc,
                                        projections=projections,
                                        filters=filters)

        if queryInfo.status == "Success" and not queryInfo.resultFileName.startswith(
                "Error"):

            data = queryInfo.to_df

            dataSwathStr = np.array(len(data), "S5")
            dataSwathStr.fill("swath")
            data["swathPoca"] = dataSwathStr
            swath_file_ids = data['swathFileId'].unique()
            pocaInfo = client.executeQuery(pocaDataSet,
                                           gc,
                                           filters=filters_poca)

            pocaDf = pd.DataFrame()
            if pocaInfo.status == "Success" and not pocaInfo.resultFileName.startswith(
                    "Error"):
                pocaDf = pocaInfo.to_df

                if len(pocaDf) > 0:
                    pocaStr = np.empty(len(pocaDf), "S5")
                    pocaStr.fill("poca")
                    pocaDf["swathPoca"] = pocaStr
                    poca_file_ids = pocaDf['swathFileId'].unique()
                    print("Poca points to include {}".format(len(pocaDf)))

                    data = pd.concat([data, pocaDf], sort=False)

            print("Found {} data rows".format(len(data)))
            if len(data) > 0:
                results = client.getSwathNamesFromIds(dataSet, swath_file_ids)
                if len(pocaDf) > 0:
                    try:
                        results.update(
                            client.getSwathNamesFromIds(
                                pocaDataSet_noDemDiff, poca_file_ids))
                    except KeyError as ex:
                        print(
                            "Exception caught while retrieving swathIds for data set {} file ids {}"
                            .format(pocaDataSet_noDemDiff, poca_file_ids))
                        raise KeyError(ex)

                writePointProduct(output_path, dataSet, month_gc, data, proj4,
                                  results, index)

            client.releaseCacheHandle(pocaInfo.resultFileName)
        else:
            print("Grid Cells skipped X=[{}] Y=[{}] with message [{}] ".format(
                gc.minX, gc.minY, queryInfo.status))
        client.releaseCacheHandle(queryInfo.resultFileName)

    index.close()
    gc_elapsed = (datetime.now() - gc_start).total_seconds()
    print('Processed [{}] grid cells. Took=[{}]s'.format(i + 1, gc_elapsed))

    process_elapsed = (datetime.now() - process_start).total_seconds()
    print("Took [{}s] to process".format(process_elapsed))
Example #9
0
    def timeseriesFromStats(self,
                            startdate,
                            enddate,
                            interval=3,
                            minT=None,
                            maxT=None,
                            minCount=0,
                            save=True,
                            weighted=None):
        self.logger.info("Get run statistics for parentDS=%s runName=%s ..." %
                         (self.inputDataSet.parentDataSet, self.runName))
        stats = self.query_sync.getRunStatistics(
            self.inputDataSet.parentDataSet, self.runName)
        stats = json.loads(stats)
        dfStats = json_normalize(stats)
        if minT is None and maxT is None:
            bbx = self.client.boundingBox(self.inputDataSet)
            minT = bbx.minT
            maxT = bbx.maxT

        for idx, line in dfStats.iterrows():
            if line['statistics.afterGlacierMask'] > minCount:
                minX, maxX = line['gridCell.minX'], line[
                    'gridCell.minX'] + line['gridCell.size']
                minY, maxY = line['gridCell.minY'], line[
                    'gridCell.minY'] + line['gridCell.size']
                self.logger.info("Calculating gridcell minX=%s minY=%s ..." %
                                 (minX, maxX))

                bbx_in = BoundingBox(minX, maxX, minY, maxY, minT, maxT)

                results = self.gridcellTimeseries(bbx_in,
                                                  startdate,
                                                  enddate,
                                                  interval,
                                                  weighted=weighted)
                self.logger.info("Adding timeseries results to stats...")
                for key in results:
                    if isinstance(results[key], list):
                        if not np.isin(key, dfStats.columns):
                            newColumn = [key]
                            #
                            dfStats = dfStats.reindex(columns=np.append(
                                dfStats.columns.values, newColumn))
                            dfStats[[key]] = dfStats[[key
                                                      ]].astype('object',
                                                                inplace=True)
                            dfStats.at[idx, key] = results[key]
                        else:
                            dfStats.at[idx, key] = results[key]
                    else:
                        dfStats.at[idx, key] = results[key]

        #size = dfStats['gridCell.size']
        #geometry = [Point(xy) for xy in zip(dfStats['gridCell.minX']+(size/2), dfStats['gridCell.minY']+(size/2))]
        #dfStats = gp.GeoDataFrame(dfStats, crs=self.projection, geometry=geometry)

        if save:
            file = os.path.join(self.config("outputPath"),
                                self.config("outputFileName"))
            self.logger.info("Saving results under file=%s" % file)
            dfStats.to_json(file)

        return dfStats
Example #10
0
from datetime import datetime

client = MalardClient()

ds = DataSet("cryotempo", "swath_c", "greenland")

proj4 = client.getProjection(ds).proj4

print(proj4)

minX = 700000
minY = -2200000
cell_size = 130000

bbox = BoundingBox(minX, minX + cell_size, minY, minY + cell_size,
                   datetime(2011, 2, 1, 0, 0), datetime(2011, 5, 1, 0, 0))

## TODO: These need to be stored in Malard by DataSet and Type.
maskFilterIce = MaskFilter(
    p_shapeFile="/data/puma1/scratch/cryotempo/masks/icesheets.shp")
maskFilterLRM = MaskFilter(
    p_shapeFile="/data/puma1/scratch/cryotempo/sarinmasks/LRM_Greenland.shp",
    p_includeWithin=False)

filters = [{
    "column": "power",
    "op": "gte",
    "threshold": 10000
}, {
    "column": "coh",
    "op": "gte",
Example #11
0
 def gridCells( self, dataSet, boundingBox, xCol = "x", yCol="y"):
     bb = boundingBox
     gcs = json.loads(self.query.getGridCells(dataSet.parentDataSet, dataSet.dataSet, dataSet.region, bb.minX, bb.maxX, bb.minY, bb.maxY, bb.minT, bb.maxT,xCol,yCol))
     
     return [BoundingBox( gc['gridCellMinX'], gc['gridCellMaxX'], gc['gridCellMinY'], gc['gridCellMaxY'], gc['minTime'], gc['maxTime'], gc['totalPoints'] ) for gc in gcs ]
Example #12
0
total_match = 0

for y in years:
    minT = datetime(y, 3, 1, 0, 0, 0)
    maxT = datetime(y, 6, 30, 23, 59, 59)

    #minX=-200000
    #maxX=-100000
    #minY=-2400000
    #maxY=-2300000

    #bb = BoundingBox( minX, maxX, minY, maxY, minT, maxT )
    bb = client.boundingBox(ds_oib)

    bb = BoundingBox(bb.minX, bb.maxX, bb.minY, bb.maxY, minT, maxT)

    gcs = client.gridCells(ds_oib, bb)
    nr_gcs = len(gcs)

    print("Nr of grid cells to process: {}".format(nr_gcs))

    for i, gc in enumerate(gcs):

        bb = BoundingBox(gc.minX, gc.maxX, gc.minY, gc.maxY, minT, maxT)

        resSwath = client.executeQuery(dsSwath,
                                       bb,
                                       filters=filters,
                                       projections=projections_swath)
    def regressionFromList(self,
                           gridcells,
                           linear=True,
                           robust=True,
                           weighted=None,
                           minT=None,
                           maxT=None,
                           save=True,
                           radius=None,
                           geometry='point'):

        dfStats = pd.DataFrame(gridcells)

        if minT is None and maxT is None:
            bbx = self.client.boundingBox(self.inputDataSet)
            minT = bbx.minT
            maxT = bbx.maxT

        for idx, line in dfStats.iterrows():

            self.logger.info(
                "Calculating gridcell minX=%s maxX=%s minY=%s maxY=%s minT=%s maxT=%s ..."
                % (line['minX'], line['maxX'], line['minY'], line['maxY'],
                   minT, maxT))
            bbx_in = BoundingBox(line['minX'].item(), line['maxX'].item(),
                                 line['minY'].item(), line['maxY'].item(),
                                 minT, maxT)

            results = self.gridcellRegression(bbx_in,
                                              linear=linear,
                                              robust=robust,
                                              weighted=weighted,
                                              radius=radius)

            self.logger.info("Adding regression results to stats...")
            for key in results:
                if isinstance(results[key], list):
                    if not np.isin(key, dfStats.columns):
                        newColumn = [key]
                        #
                        dfStats = dfStats.reindex(columns=np.append(
                            dfStats.columns.values, newColumn))
                        dfStats[[key]] = dfStats[[key]].astype('object',
                                                               inplace=True)
                        dfStats.at[idx, key] = results[key]
                else:
                    dfStats.at[idx, key] = results[key]

        size = dfStats['maxX'] - dfStats['minX']
        if geometry == 'point:':
            self.logger.info("Converted to point geometry")
            geometry = [
                Point(xy)
                for xy in zip(dfStats['minX'] + (size / 2), dfStats['minY'] +
                              (size / 2))
            ]
        elif geometry == 'cell':
            self.logger.info("Converted to cell geometry")
            geometry = []
            for idx, line in dfStats.iterrows():
                minX, maxX = line['minX'], line['maxX']
                minY, maxY = line['minY'], line['maxY']
                geometry.append(
                    Polygon([(minX, minY), (minX, maxY), (maxX, maxY),
                             (maxX, minY), (minX, minY)]))
        else:
            self.logger.info(
                "Error: not valid geometry specified. Should be either 'point' or 'cell'"
            )
        dfStats = gp.GeoDataFrame(dfStats,
                                  crs=self.projection,
                                  geometry=geometry)

        if save:
            file = os.path.join(self.config("outputPath"),
                                self.config("outputFileName"))
            self.logger.info("Saving results under file=%s" % file)
            dfStats.to_file(file, driver="GPKG")

        return dfStats