def shardsWithinPolygon(self, dataSet, minT, maxT, extentFilter, maskFilters=[], xCol = "x", yCol="y"): if isinstance(maskFilters, MaskFilter): maskFilters = [maskFilters] gcs = self.asyncQuery.filterShards(dataSet.parentDataSet, dataSet.dataSet, dataSet.region, 0.0, 0.0, 0.0, 0.0, minT, maxT,xCol,yCol, extentFilter, maskFilters) gcs = [ json.loads(gc) for gc in gcs ] return [Shard(BoundingBox( gc['minX'], gc['maxX'], gc['minY'], gc['maxY'], gc['minT'], gc['maxT'], gc['numberOfPoints']), gc['shardName']) for gc in gcs ]
def gridCellsWithinPolygon(self, dataSet, minT, maxT, extentFilter, maskFilters=[], xCol = "x", yCol="y"): if isinstance(maskFilters, MaskFilter): maskFilters = [maskFilters] gcs = self.asyncQuery.filterGridCells(dataSet.parentDataSet, dataSet.dataSet, dataSet.region, 0.0, 0.0, 0.0, 0.0, minT, maxT,xCol,yCol, extentFilter, maskFilters) gcs = [ json.loads(gc) for gc in gcs ] return [BoundingBox( gc['gridCellMinX'], gc['gridCellMaxX'], gc['gridCellMinY'], gc['gridCellMaxY'], gc['minTime'], gc['maxTime'], gc['totalPoints'] ) for gc in gcs ]
def timeseriesFromList(self, gridcells, startdate, enddate, interval=3, minT=None, maxT=None, save=True, weighted=None): dfStats = pd.DataFrame(gridcells) if minT is None and maxT is None: bbx = self.client.boundingBox(self.inputDataSet) minT = bbx.minT maxT = bbx.maxT for idx, line in dfStats.iterrows(): self.logger.info( "Calculating gridcell minX=%s maxX=%s minY=%s maxY=%s minT=%s maxT=%s ..." % (line['minX'], line['maxX'], line['minY'], line['maxY'], minT, maxT)) bbx_in = BoundingBox(line['minX'].item(), line['maxX'].item(), line['minY'].item(), line['maxY'].item(), minT, maxT) results = self.gridcellTimeseries(bbx_in, startdate, enddate, interval, weighted=weighted) self.logger.info("Adding timesereis results to stats...") for key in results: if isinstance(results[key], list): if not np.isin(key, dfStats.columns): newColumn = [key] # dfStats = dfStats.reindex(columns=np.append( dfStats.columns.values, newColumn)) dfStats[[key]] = dfStats[[key]].astype('object', inplace=True) dfStats.at[idx, key] = results[key] else: dfStats.at[idx, key] = results[key] if save: file = os.path.join(self.config("outputPath"), self.config("outputFileName")) self.logger.info("Saving results under file=%s" % file) dfStats.to_json(file) return dfStats
def boundingBox(self, dataSet ): bbox = json.loads(self.query.getDataSetBoundingBox( dataSet.parentDataSet, dataSet.dataSet, dataSet.region )) #Setup the bounding box minX = bbox['gridCellMinX'] maxX = bbox['gridCellMaxX'] minY = bbox['gridCellMinY'] maxY = bbox['gridCellMaxY'] minT = datetime.fromtimestamp( bbox['minTime'] ) maxT = datetime.fromtimestamp( bbox['maxTime'] ) numberOfPoints = bbox['totalPoints'] return BoundingBox( minX, maxX, minY, maxY, minT, maxT, numberOfPoints )
def shards(self, dataSet, boundingBox, xCol = "x", yCol="y", maskFilters =[] ): if isinstance(maskFilters, MaskFilter): maskFilters = [maskFilters] bb = boundingBox gcs = [] if len(maskFilters) == 0: gcs = json.loads(self.query.getShards(dataSet.parentDataSet, dataSet.dataSet, dataSet.region, bb.minX, bb.maxX, bb.minY, bb.maxY, bb.minT, bb.maxT,xCol,yCol)) else: gcs = self.asyncQuery.filterShards(dataSet.parentDataSet, dataSet.dataSet, dataSet.region, bb.minX, bb.maxX, bb.minY, bb.maxY, bb.minT, bb.maxT,xCol,yCol, MaskFilter(), maskFilters) gcs = [json.loads(gc) for gc in gcs] return [Shard(BoundingBox( gc['minX'], gc['maxX'], gc['minY'], gc['maxY'], gc['minT'], gc['maxT'], gc['numberOfPoints']), gc['shardName']) for gc in gcs ]
gridCells = client.gridCells(inputDs, bb) minT = datetime.datetime(2011, 3, 1, 0, 0, 0) maxT = datetime.datetime(2011, 3, 31, 23, 59, 59) mask = '/data/puma1/scratch/cryotempo/masks/ice.shp' tmp = gp.read_file(mask) print(tmp) for i, gc in enumerate(gridCells): #logging.log('Processing GC {} Total {}'.format(i,len(gridCells))) resultInfo = client.executeQuery( inputDs, BoundingBox(gc.minX, gc.maxX, gc.minY, gc.maxY, minT, maxT)) if resultInfo.status == "Success": df = resultInfo.to_df #print("MinLon {} MinLat {}".format(df['lon'].min(), df['lat'].min())) ds = PointDataSet(df, proj4) client.releaseCacheHandle(resultInfo.resultFileName) geoDs = ds.asGeoDataSet() geoDs.withinMask(mask, 'Glacier') #fp = '/data/puma1/scratch/v2/malard/export/mtngla_tdx_1556569735.nc' #dataSet = 'tdx' #projection = "+proj=aea +lat_1=25 +lat_2=47 +lat_0=36 +lon_0=85 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" #glacierMask = '/data/puma1/scratch/malard/mask/mtngla/static/RGIv60/Glacier/HMA/cell_x400000_y0_s100000/mask_Glacier_x400000_y0_s100000.gpkg' #debrisMask = '/data/puma1/scratch/malard/mask/mtngla/static/SDCv10/Debris/HMA/cell_x400000_y0_s100000/mask_Debris_x400000_y0_s100000.gpkg' #minX = 400000
Created on Wed Nov 20 09:33:33 2019 @author: jon """ from MalardClient.MalardClient import MalardClient from MalardClient.DataSet import DataSet from MalardClient.BoundingBox import BoundingBox client = MalardClient() ds = DataSet("cryotempo","poca","greenland" ) dsSwath = DataSet("cryotempo","GRIS_BaselineC_Q2","greenland" ) bb = client.boundingBox(ds) gcs = client.gridCells(ds, bb) minX=-1600000 maxX=-1500000 minY=-2600000 maxY=-2500000 minT=1298912551 maxT=1298912551 bb = BoundingBox( minX, maxX, minY, maxY, minT, maxT ) resPoca = client.executeQuery( ds, bb ) resSwath = client.executeQuery( dsSwath, bb )
def main(pub_month, pub_year, loadConfig): region = loadConfig["region"] parentDataSet = loadConfig["parentDataSet"] uncertainty_threshold = loadConfig[ "uncertainty_threshold"] if "uncertainty_threshold" in loadConfig else None powerdB = loadConfig["powerdB"] coh = loadConfig["coh"] dataSetName = loadConfig["resultsetName"] pocaParentDataSet = loadConfig["pocaParentDataSet"] pocaDataSetName = loadConfig["pocaDataSet"] pocaDemDiff = loadConfig["pocaDemDiff"] output_path = os.path.join(loadConfig["resultPath"], "pointProduct") ensure_dir(output_path) malardEnv = loadConfig["MalardEnvironment"] client = MalardClient(malardEnv) uncDatasetName = "{}_unc".format( dataSetName) if uncertainty_threshold is not None else dataSetName uncDataSet = DataSet(parentDataSet, uncDatasetName, region) dataSet = DataSet(parentDataSet, dataSetName, region) pocaDataSet = DataSet(pocaParentDataSet, pocaDataSetName, region) pocaDataSet_noDemDiff = DataSet(pocaParentDataSet, pocaDataSetName.replace("_demDiff", ""), region) projections = [ 'x', 'y', 'time', 'elev', 'powerdB', 'coh', 'demDiff', 'demDiffMad', 'swathFileId', 'Q_uStd' ] filters = [{ 'column': 'Q_uStd', 'op': 'lte', 'threshold': uncertainty_threshold }, { 'column': 'powerdB', 'op': 'gte', 'threshold': powerdB }, { 'column': 'coh', 'op': 'gte', 'threshold': coh }, { 'column': 'inRegionMask', 'op': 'eq', 'threshold': 1.0 }] filters_poca = [{ "column": "demDiff", "op": "lte", "threshold": pocaDemDiff }, { "column": "demDiff", "op": "gte", "threshold": -pocaDemDiff }, { 'column': 'inRegionMask', 'op': 'eq', 'threshold': 1.0 }] from_dt = datetime(pub_year, pub_month, 1, 0, 0, 0) to_dt = from_dt + relativedelta(months=1) - timedelta(seconds=1) bb = client.boundingBox(uncDataSet) gridcells = client.gridCells( uncDataSet, BoundingBox(bb.minX, bb.maxX, bb.minY, bb.maxY, from_dt, to_dt)) proj4 = client.getProjection(uncDataSet).proj4 print("Number of Gridcells found to process {}".format(len(gridcells))) process_start = datetime.now() print("MinT={} MaxT={}".format(from_dt, to_dt)) #Create a shapefile index for each month index = s.ShapeFileIndex(output_path, "THEM_POINT", proj4, uncDataSet.region, from_dt) for i, gc in enumerate(gridcells): gc_start = datetime.now() month_gc = BoundingBox(gc.minX, gc.maxX, gc.minY, gc.maxY, from_dt, to_dt) queryInfo = client.executeQuery(uncDataSet, month_gc, projections=projections, filters=filters) if queryInfo.status == "Success" and not queryInfo.resultFileName.startswith( "Error"): data = queryInfo.to_df dataSwathStr = np.array(len(data), "S5") dataSwathStr.fill("swath") data["swathPoca"] = dataSwathStr swath_file_ids = data['swathFileId'].unique() pocaInfo = client.executeQuery(pocaDataSet, gc, filters=filters_poca) pocaDf = pd.DataFrame() if pocaInfo.status == "Success" and not pocaInfo.resultFileName.startswith( "Error"): pocaDf = pocaInfo.to_df if len(pocaDf) > 0: pocaStr = np.empty(len(pocaDf), "S5") pocaStr.fill("poca") pocaDf["swathPoca"] = pocaStr poca_file_ids = pocaDf['swathFileId'].unique() print("Poca points to include {}".format(len(pocaDf))) data = pd.concat([data, pocaDf], sort=False) print("Found {} data rows".format(len(data))) if len(data) > 0: results = client.getSwathNamesFromIds(dataSet, swath_file_ids) if len(pocaDf) > 0: try: results.update( client.getSwathNamesFromIds( pocaDataSet_noDemDiff, poca_file_ids)) except KeyError as ex: print( "Exception caught while retrieving swathIds for data set {} file ids {}" .format(pocaDataSet_noDemDiff, poca_file_ids)) raise KeyError(ex) writePointProduct(output_path, dataSet, month_gc, data, proj4, results, index) client.releaseCacheHandle(pocaInfo.resultFileName) else: print("Grid Cells skipped X=[{}] Y=[{}] with message [{}] ".format( gc.minX, gc.minY, queryInfo.status)) client.releaseCacheHandle(queryInfo.resultFileName) index.close() gc_elapsed = (datetime.now() - gc_start).total_seconds() print('Processed [{}] grid cells. Took=[{}]s'.format(i + 1, gc_elapsed)) process_elapsed = (datetime.now() - process_start).total_seconds() print("Took [{}s] to process".format(process_elapsed))
def timeseriesFromStats(self, startdate, enddate, interval=3, minT=None, maxT=None, minCount=0, save=True, weighted=None): self.logger.info("Get run statistics for parentDS=%s runName=%s ..." % (self.inputDataSet.parentDataSet, self.runName)) stats = self.query_sync.getRunStatistics( self.inputDataSet.parentDataSet, self.runName) stats = json.loads(stats) dfStats = json_normalize(stats) if minT is None and maxT is None: bbx = self.client.boundingBox(self.inputDataSet) minT = bbx.minT maxT = bbx.maxT for idx, line in dfStats.iterrows(): if line['statistics.afterGlacierMask'] > minCount: minX, maxX = line['gridCell.minX'], line[ 'gridCell.minX'] + line['gridCell.size'] minY, maxY = line['gridCell.minY'], line[ 'gridCell.minY'] + line['gridCell.size'] self.logger.info("Calculating gridcell minX=%s minY=%s ..." % (minX, maxX)) bbx_in = BoundingBox(minX, maxX, minY, maxY, minT, maxT) results = self.gridcellTimeseries(bbx_in, startdate, enddate, interval, weighted=weighted) self.logger.info("Adding timeseries results to stats...") for key in results: if isinstance(results[key], list): if not np.isin(key, dfStats.columns): newColumn = [key] # dfStats = dfStats.reindex(columns=np.append( dfStats.columns.values, newColumn)) dfStats[[key]] = dfStats[[key ]].astype('object', inplace=True) dfStats.at[idx, key] = results[key] else: dfStats.at[idx, key] = results[key] else: dfStats.at[idx, key] = results[key] #size = dfStats['gridCell.size'] #geometry = [Point(xy) for xy in zip(dfStats['gridCell.minX']+(size/2), dfStats['gridCell.minY']+(size/2))] #dfStats = gp.GeoDataFrame(dfStats, crs=self.projection, geometry=geometry) if save: file = os.path.join(self.config("outputPath"), self.config("outputFileName")) self.logger.info("Saving results under file=%s" % file) dfStats.to_json(file) return dfStats
from datetime import datetime client = MalardClient() ds = DataSet("cryotempo", "swath_c", "greenland") proj4 = client.getProjection(ds).proj4 print(proj4) minX = 700000 minY = -2200000 cell_size = 130000 bbox = BoundingBox(minX, minX + cell_size, minY, minY + cell_size, datetime(2011, 2, 1, 0, 0), datetime(2011, 5, 1, 0, 0)) ## TODO: These need to be stored in Malard by DataSet and Type. maskFilterIce = MaskFilter( p_shapeFile="/data/puma1/scratch/cryotempo/masks/icesheets.shp") maskFilterLRM = MaskFilter( p_shapeFile="/data/puma1/scratch/cryotempo/sarinmasks/LRM_Greenland.shp", p_includeWithin=False) filters = [{ "column": "power", "op": "gte", "threshold": 10000 }, { "column": "coh", "op": "gte",
def gridCells( self, dataSet, boundingBox, xCol = "x", yCol="y"): bb = boundingBox gcs = json.loads(self.query.getGridCells(dataSet.parentDataSet, dataSet.dataSet, dataSet.region, bb.minX, bb.maxX, bb.minY, bb.maxY, bb.minT, bb.maxT,xCol,yCol)) return [BoundingBox( gc['gridCellMinX'], gc['gridCellMaxX'], gc['gridCellMinY'], gc['gridCellMaxY'], gc['minTime'], gc['maxTime'], gc['totalPoints'] ) for gc in gcs ]
total_match = 0 for y in years: minT = datetime(y, 3, 1, 0, 0, 0) maxT = datetime(y, 6, 30, 23, 59, 59) #minX=-200000 #maxX=-100000 #minY=-2400000 #maxY=-2300000 #bb = BoundingBox( minX, maxX, minY, maxY, minT, maxT ) bb = client.boundingBox(ds_oib) bb = BoundingBox(bb.minX, bb.maxX, bb.minY, bb.maxY, minT, maxT) gcs = client.gridCells(ds_oib, bb) nr_gcs = len(gcs) print("Nr of grid cells to process: {}".format(nr_gcs)) for i, gc in enumerate(gcs): bb = BoundingBox(gc.minX, gc.maxX, gc.minY, gc.maxY, minT, maxT) resSwath = client.executeQuery(dsSwath, bb, filters=filters, projections=projections_swath)
def regressionFromList(self, gridcells, linear=True, robust=True, weighted=None, minT=None, maxT=None, save=True, radius=None, geometry='point'): dfStats = pd.DataFrame(gridcells) if minT is None and maxT is None: bbx = self.client.boundingBox(self.inputDataSet) minT = bbx.minT maxT = bbx.maxT for idx, line in dfStats.iterrows(): self.logger.info( "Calculating gridcell minX=%s maxX=%s minY=%s maxY=%s minT=%s maxT=%s ..." % (line['minX'], line['maxX'], line['minY'], line['maxY'], minT, maxT)) bbx_in = BoundingBox(line['minX'].item(), line['maxX'].item(), line['minY'].item(), line['maxY'].item(), minT, maxT) results = self.gridcellRegression(bbx_in, linear=linear, robust=robust, weighted=weighted, radius=radius) self.logger.info("Adding regression results to stats...") for key in results: if isinstance(results[key], list): if not np.isin(key, dfStats.columns): newColumn = [key] # dfStats = dfStats.reindex(columns=np.append( dfStats.columns.values, newColumn)) dfStats[[key]] = dfStats[[key]].astype('object', inplace=True) dfStats.at[idx, key] = results[key] else: dfStats.at[idx, key] = results[key] size = dfStats['maxX'] - dfStats['minX'] if geometry == 'point:': self.logger.info("Converted to point geometry") geometry = [ Point(xy) for xy in zip(dfStats['minX'] + (size / 2), dfStats['minY'] + (size / 2)) ] elif geometry == 'cell': self.logger.info("Converted to cell geometry") geometry = [] for idx, line in dfStats.iterrows(): minX, maxX = line['minX'], line['maxX'] minY, maxY = line['minY'], line['maxY'] geometry.append( Polygon([(minX, minY), (minX, maxY), (maxX, maxY), (maxX, minY), (minX, minY)])) else: self.logger.info( "Error: not valid geometry specified. Should be either 'point' or 'cell'" ) dfStats = gp.GeoDataFrame(dfStats, crs=self.projection, geometry=geometry) if save: file = os.path.join(self.config("outputPath"), self.config("outputFileName")) self.logger.info("Saving results under file=%s" % file) dfStats.to_file(file, driver="GPKG") return dfStats