def __init__(self, minX, maxX, minY, maxY, logFile=None): ''' :param minX: :param maxX: :param minY: :param maxY: :param logFile: if logfile is specified logger will write into file instead of the terminal ''' if logFile is None: logging.basicConfig( format= '%(asctime)s, %(threadName)s %(thread)d: %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', level=logging.INFO) else: logging.basicConfig( filename=logFile, filemode='a', format= '%(asctime)s, %(threadName)s %(thread)d: %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', level=logging.INFO) sys.excepthook = self.uncaughtErrorHandler self.logger = logging.getLogger(__name__) self.logger.info('Process started') self.client = MalardClient(notebook=False) self.minX = minX self.maxX = maxX self.minY = minY self.maxY = maxY self.parentDsName = self.config('parentDsName') self.outputDataSet = self.config('outputDataSet') self.inputDataSet = self.config('inputDataSet') self.region = self.config('region') self.maskDataSet = self.config('maskDataSet') self.withinDataSets = self.config('withinDataSets') self.withinDataSetTypes = self.config('withinDataSetTypes') self.runName = self.config('runName') assert (self.maxX - self.minX) == (self.maxY - self.minY) self.size = maxX - minX self.dataSet = DataSet(parentDs=self.config('parentDsName'), dataSet=self.config('inputDataSet'), region=self.config('region'))
def __init__(self, logFile=None): ''' :param logFile: if logfile is specified logger will write into file instead of the terminal ''' if logFile is None: logging.basicConfig( format= '%(asctime)s, %(threadName)s %(thread)d: %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', level=logging.INFO) else: logging.basicConfig( filename=logFile, filemode='a', format= '%(asctime)s, %(threadName)s %(thread)d: %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', level=logging.INFO) sys.excepthook = self.uncaughtErrorHandler self.logger = logging.getLogger(__name__) self.logger.info('Process started') #self.parentDsName = self.config('parentDsName') self.inputDataSet = DataSet(parentDs=self.config('parentDsName'), dataSet=self.config('inputDataSet'), region=self.config('region')) #self.region = self.config('region') self.runName = self.config('runName') self.client = MalardClient(notebook=False) self.query_sync = DataSetQuery(self.config('malardSyncURL'), self.config('malardEnvironmentName')) #self.query_async = AsyncDataSetQuery.AsyncDataSetQuery(self.config('malardAsyncURL'), self.config('malardEnvironmentName'), False) # get projection #self.projection = json.loads(self.client.getProjection(self.parentDsName, self.region))['proj4'] self.projection = self.client.getProjection(self.inputDataSet).proj4
with open(out_xyz, 'r') as f: f.readline() # skip first row for l in f: row = l.split() xy.append([float(row[0]), float(row[1])]) values.append(float(row[2])) # remove xyz file if deleteTemporaryFiles: os.remove(out_xyz) return xy, values if __name__ == '__main__': logging.basicConfig(level=logging.INFO) client = MalardClient() parentDataSet = 'cryotempo' dataSet = 'GRIS_BaselineC_Q2' region = 'greenland' inputDs = DataSet(parentDataSet, dataSet, region) proj4 = client.getProjection(inputDs).proj4 print(proj4) bb = client.boundingBox(inputDs) gridCells = client.gridCells(inputDs, bb) minT = datetime.datetime(2011, 3, 1, 0, 0, 0)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Nov 20 09:33:33 2019 @author: jon """ from MalardClient.MalardClient import MalardClient from MalardClient.DataSet import DataSet from MalardClient.BoundingBox import BoundingBox client = MalardClient() ds = DataSet("cryotempo","poca","greenland" ) dsSwath = DataSet("cryotempo","GRIS_BaselineC_Q2","greenland" ) bb = client.boundingBox(ds) gcs = client.gridCells(ds, bb) minX=-1600000 maxX=-1500000 minY=-2600000 maxY=-2500000 minT=1298912551 maxT=1298912551 bb = BoundingBox( minX, maxX, minY, maxY, minT, maxT )
def main(pub_month, pub_year, loadConfig): region = loadConfig["region"] parentDataSet = loadConfig["parentDataSet"] uncertainty_threshold = loadConfig[ "uncertainty_threshold"] if "uncertainty_threshold" in loadConfig else None powerdB = loadConfig["powerdB"] coh = loadConfig["coh"] dataSetName = loadConfig["resultsetName"] pocaParentDataSet = loadConfig["pocaParentDataSet"] pocaDataSetName = loadConfig["pocaDataSet"] pocaDemDiff = loadConfig["pocaDemDiff"] output_path = os.path.join(loadConfig["resultPath"], "pointProduct") ensure_dir(output_path) malardEnv = loadConfig["MalardEnvironment"] client = MalardClient(malardEnv) uncDatasetName = "{}_unc".format( dataSetName) if uncertainty_threshold is not None else dataSetName uncDataSet = DataSet(parentDataSet, uncDatasetName, region) dataSet = DataSet(parentDataSet, dataSetName, region) pocaDataSet = DataSet(pocaParentDataSet, pocaDataSetName, region) pocaDataSet_noDemDiff = DataSet(pocaParentDataSet, pocaDataSetName.replace("_demDiff", ""), region) projections = [ 'x', 'y', 'time', 'elev', 'powerdB', 'coh', 'demDiff', 'demDiffMad', 'swathFileId', 'Q_uStd' ] filters = [{ 'column': 'Q_uStd', 'op': 'lte', 'threshold': uncertainty_threshold }, { 'column': 'powerdB', 'op': 'gte', 'threshold': powerdB }, { 'column': 'coh', 'op': 'gte', 'threshold': coh }, { 'column': 'inRegionMask', 'op': 'eq', 'threshold': 1.0 }] filters_poca = [{ "column": "demDiff", "op": "lte", "threshold": pocaDemDiff }, { "column": "demDiff", "op": "gte", "threshold": -pocaDemDiff }, { 'column': 'inRegionMask', 'op': 'eq', 'threshold': 1.0 }] from_dt = datetime(pub_year, pub_month, 1, 0, 0, 0) to_dt = from_dt + relativedelta(months=1) - timedelta(seconds=1) bb = client.boundingBox(uncDataSet) gridcells = client.gridCells( uncDataSet, BoundingBox(bb.minX, bb.maxX, bb.minY, bb.maxY, from_dt, to_dt)) proj4 = client.getProjection(uncDataSet).proj4 print("Number of Gridcells found to process {}".format(len(gridcells))) process_start = datetime.now() print("MinT={} MaxT={}".format(from_dt, to_dt)) #Create a shapefile index for each month index = s.ShapeFileIndex(output_path, "THEM_POINT", proj4, uncDataSet.region, from_dt) for i, gc in enumerate(gridcells): gc_start = datetime.now() month_gc = BoundingBox(gc.minX, gc.maxX, gc.minY, gc.maxY, from_dt, to_dt) queryInfo = client.executeQuery(uncDataSet, month_gc, projections=projections, filters=filters) if queryInfo.status == "Success" and not queryInfo.resultFileName.startswith( "Error"): data = queryInfo.to_df dataSwathStr = np.array(len(data), "S5") dataSwathStr.fill("swath") data["swathPoca"] = dataSwathStr swath_file_ids = data['swathFileId'].unique() pocaInfo = client.executeQuery(pocaDataSet, gc, filters=filters_poca) pocaDf = pd.DataFrame() if pocaInfo.status == "Success" and not pocaInfo.resultFileName.startswith( "Error"): pocaDf = pocaInfo.to_df if len(pocaDf) > 0: pocaStr = np.empty(len(pocaDf), "S5") pocaStr.fill("poca") pocaDf["swathPoca"] = pocaStr poca_file_ids = pocaDf['swathFileId'].unique() print("Poca points to include {}".format(len(pocaDf))) data = pd.concat([data, pocaDf], sort=False) print("Found {} data rows".format(len(data))) if len(data) > 0: results = client.getSwathNamesFromIds(dataSet, swath_file_ids) if len(pocaDf) > 0: try: results.update( client.getSwathNamesFromIds( pocaDataSet_noDemDiff, poca_file_ids)) except KeyError as ex: print( "Exception caught while retrieving swathIds for data set {} file ids {}" .format(pocaDataSet_noDemDiff, poca_file_ids)) raise KeyError(ex) writePointProduct(output_path, dataSet, month_gc, data, proj4, results, index) client.releaseCacheHandle(pocaInfo.resultFileName) else: print("Grid Cells skipped X=[{}] Y=[{}] with message [{}] ".format( gc.minX, gc.minY, queryInfo.status)) client.releaseCacheHandle(queryInfo.resultFileName) index.close() gc_elapsed = (datetime.now() - gc_start).total_seconds() print('Processed [{}] grid cells. Took=[{}]s'.format(i + 1, gc_elapsed)) process_elapsed = (datetime.now() - process_start).total_seconds() print("Took [{}s] to process".format(process_elapsed))
class TimeseriesRun: # __conf = { # "outputFileName": "himalayas-mad-tdx2.json", # "inputDataSet": "HimMad2", # "runName": "HimMad2", # "region":"himalayas", # "parentDsName": "mtngla", # "outputPath": "timeseries_results", # "malardEnvironmentName": "DEVv2", # "malardSyncURL": "http://localhost:9000", # "malardAsyncURL": "ws://localhost:9000", # "filters" : [{'column':'power','op':'gt','threshold':10000},{'column':'coh','op':'gt','threshold':0.6}, \ # {'column':'demDiff','op':'lt','threshold':100}, {'column':'demDiffMadNew','op':'lt','threshold':10}, \ # {'column':'demDiff','op':'gt','threshold':-100}, \ # {'column':'refDifference','op':'gt','threshold':-150}, {'column':'refDifference','op':'lt','threshold':150}, \ # {'column':'within_DataSet','op':'gt','threshold':1}] # } __conf = { "outputFileName": "alaska-gridcells-double.json", "inputDataSet": "AlaskaMad", "runName": "AlaskaMad", "region":"alaska", "parentDsName": "mtngla", "outputPath": "timeseries_results", "malardEnvironmentName": "DEVv2", "malardSyncURL": "http://localhost:9000", "malardAsyncURL": "ws://localhost:9000", "filters" : [{'column':'power','op':'gt','threshold':10000},{'column':'coh','op':'gt','threshold':0.6}, \ {'column':'demDiff','op':'lt','threshold':100}, {'column':'demDiffMad','op':'lt','threshold':10}, \ {'column':'demDiff','op':'gt','threshold':-100}, {'column':'demDiffMad','op':'gt','threshold':-10}, \ {'column':'refDifference','op':'gt','threshold':-150}, {'column':'refDifference','op':'lt','threshold':150}] } def __init__(self, logFile=None): ''' :param logFile: if logfile is specified logger will write into file instead of the terminal ''' if logFile is None: logging.basicConfig( format= '%(asctime)s, %(threadName)s %(thread)d: %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', level=logging.INFO) else: logging.basicConfig( filename=logFile, filemode='a', format= '%(asctime)s, %(threadName)s %(thread)d: %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', level=logging.INFO) sys.excepthook = self.uncaughtErrorHandler self.logger = logging.getLogger(__name__) self.logger.info('Process started') #self.parentDsName = self.config('parentDsName') self.inputDataSet = DataSet(parentDs=self.config('parentDsName'), dataSet=self.config('inputDataSet'), region=self.config('region')) #self.region = self.config('region') self.runName = self.config('runName') self.client = MalardClient(notebook=False) self.query_sync = DataSetQuery(self.config('malardSyncURL'), self.config('malardEnvironmentName')) #self.query_async = AsyncDataSetQuery.AsyncDataSetQuery(self.config('malardAsyncURL'), self.config('malardEnvironmentName'), False) # get projection #self.projection = json.loads(self.client.getProjection(self.parentDsName, self.region))['proj4'] self.projection = self.client.getProjection(self.inputDataSet).proj4 def gridcellTimeseries(self, boundingBox, startdate, enddate, interval, weighted=[]): filters = self.config('filters') self.logger.info("Filtering dataset=%s with criteria %s" % (self.inputDataSet, filters)) result = self.client.executeQuery(self.inputDataSet, boundingBox, projections=[], filters=filters) self.logger.info("Result message: result=%s, message=%s" % (result.status, result.message)) data = PointDataSet(result.resultFileName, self.projection) # release cache of file self.client.releaseCacheHandle(result.resultFileName) results = {} if data.hasData(): self.logger.info('Data length={}'.format(data.length())) r = data.timeSeries(startdate=startdate, enddate=enddate, interval=interval, weighted=weighted) results = {**results, **r} self.logger.info(results) else: self.logger.info('No data in file') return results def timeseriesFromStats(self, startdate, enddate, interval=3, minT=None, maxT=None, minCount=0, save=True, weighted=None): self.logger.info("Get run statistics for parentDS=%s runName=%s ..." % (self.inputDataSet.parentDataSet, self.runName)) stats = self.query_sync.getRunStatistics( self.inputDataSet.parentDataSet, self.runName) stats = json.loads(stats) dfStats = json_normalize(stats) if minT is None and maxT is None: bbx = self.client.boundingBox(self.inputDataSet) minT = bbx.minT maxT = bbx.maxT for idx, line in dfStats.iterrows(): if line['statistics.afterGlacierMask'] > minCount: minX, maxX = line['gridCell.minX'], line[ 'gridCell.minX'] + line['gridCell.size'] minY, maxY = line['gridCell.minY'], line[ 'gridCell.minY'] + line['gridCell.size'] self.logger.info("Calculating gridcell minX=%s minY=%s ..." % (minX, maxX)) bbx_in = BoundingBox(minX, maxX, minY, maxY, minT, maxT) results = self.gridcellTimeseries(bbx_in, startdate, enddate, interval, weighted=weighted) self.logger.info("Adding timeseries results to stats...") for key in results: if isinstance(results[key], list): if not np.isin(key, dfStats.columns): newColumn = [key] # dfStats = dfStats.reindex(columns=np.append( dfStats.columns.values, newColumn)) dfStats[[key]] = dfStats[[key ]].astype('object', inplace=True) dfStats.at[idx, key] = results[key] else: dfStats.at[idx, key] = results[key] else: dfStats.at[idx, key] = results[key] #size = dfStats['gridCell.size'] #geometry = [Point(xy) for xy in zip(dfStats['gridCell.minX']+(size/2), dfStats['gridCell.minY']+(size/2))] #dfStats = gp.GeoDataFrame(dfStats, crs=self.projection, geometry=geometry) if save: file = os.path.join(self.config("outputPath"), self.config("outputFileName")) self.logger.info("Saving results under file=%s" % file) dfStats.to_json(file) return dfStats def timeseriesFromList(self, gridcells, startdate, enddate, interval=3, minT=None, maxT=None, save=True, weighted=None): dfStats = pd.DataFrame(gridcells) if minT is None and maxT is None: bbx = self.client.boundingBox(self.inputDataSet) minT = bbx.minT maxT = bbx.maxT for idx, line in dfStats.iterrows(): self.logger.info( "Calculating gridcell minX=%s maxX=%s minY=%s maxY=%s minT=%s maxT=%s ..." % (line['minX'], line['maxX'], line['minY'], line['maxY'], minT, maxT)) bbx_in = BoundingBox(line['minX'].item(), line['maxX'].item(), line['minY'].item(), line['maxY'].item(), minT, maxT) results = self.gridcellTimeseries(bbx_in, startdate, enddate, interval, weighted=weighted) self.logger.info("Adding timesereis results to stats...") for key in results: if isinstance(results[key], list): if not np.isin(key, dfStats.columns): newColumn = [key] # dfStats = dfStats.reindex(columns=np.append( dfStats.columns.values, newColumn)) dfStats[[key]] = dfStats[[key]].astype('object', inplace=True) dfStats.at[idx, key] = results[key] else: dfStats.at[idx, key] = results[key] if save: file = os.path.join(self.config("outputPath"), self.config("outputFileName")) self.logger.info("Saving results under file=%s" % file) dfStats.to_json(file) return dfStats def timeseriesFromFile(self, file, startdate, enddate, interval=3, minT=None, maxT=None, save=True, weighted=None): ''' Calcualtes timeseries from cells corresponding to the cells of a given input file :param file: textfile with gridcell extents :return: ''' self.logger.info("Start timeseries from file for parentDS=%s ..." % (self.inputDataSet.parentDataSet)) if minT is None and maxT is None: bbx = self.client.boundingBox(self.inputDataSet) minT = bbx.minT maxT = bbx.maxT extents = [] with open(file) as f: for line in f: split = line.strip().split(",") ext = { 'minX': int(split[0]), 'maxX': int(split[1]), 'minY': int(split[2]), 'maxY': int(split[3]) } extents.append(ext) stats = self.timeseriesFromList(extents, startdate=startdate, enddate=enddate, interval=interval, minT=minT, maxT=maxT, save=save, weighted=weighted) return stats @staticmethod def config(name): return TimeseriesRun.__conf[name] def uncaughtErrorHandler(self, type, value, tb): self.logger.error("Uncaught exception", exc_info=(type, value, tb))
# -*- coding: utf-8 -*- """ Created on Wed Nov 13 09:55:03 2019 @author: jon """ import MalardGDAL as mg from MalardClient.MalardClient import MalardClient from MalardClient.DataSet import DataSet from MalardClient.BoundingBox import BoundingBox from MalardClient.MaskFilter import MaskFilter from datetime import datetime client = MalardClient() ds = DataSet("cryotempo", "swath_c", "greenland") proj4 = client.getProjection(ds).proj4 print(proj4) minX = 700000 minY = -2200000 cell_size = 130000 bbox = BoundingBox(minX, minX + cell_size, minY, minY + cell_size, datetime(2011, 2, 1, 0, 0), datetime(2011, 5, 1, 0, 0)) ## TODO: These need to be stored in Malard by DataSet and Type.
from dateutil.relativedelta import relativedelta import math import pandas as pd import ND as nd def distance(x1s, y1s, x2s, y2s): return [ math.sqrt((x1 - x2)**2 + (y1 - y2)**2) for x1, y1, x2, y2 in zip(x1s, y1s, x2s, y2s) ] client = MalardClient() ds = DataSet("cryotempo", "poca", "greenland") dsSwath = DataSet("cryotempo", "GRIS_BaselineC_Q2", "greenland") ds_oib = DataSet("cryotempo", "oib", "greenland") filters = [{ 'column': 'coh', 'op': 'gte', 'threshold': 0.5 }, { 'column': 'power', 'op': 'gte', 'threshold': 1000.0
# -*- coding: utf-8 -*- """ Created on Fri Nov 22 19:16:07 2019 @author: jon """ from MalardClient.MalardClient import MalardClient from MalardClient.DataSet import DataSet from MalardClient.BoundingBox import BoundingBox from datetime import datetime from dateutil.relativedelta import relativedelta import numpy as np client = MalardClient() ds = DataSet("cryotempo", "poca", "greenland") dsSwath = DataSet("cryotempo", "GRIS_BaselineC_Q2", "greenland") ds_oib = DataSet("cryotempo", "oib", "greenland") bb = client.boundingBox(ds) minX = -200000 maxX = -100000 minY = -2400000 maxY = -2300000 minT = datetime(2011, 3, 1, 0, 0, 0) maxT = datetime(2011, 3, 31, 23, 59, 59)
class MtnGlaGridcellProcess: #"referenceDem":"/data/puma1/scratch/DEMs/srtm_test.tif" #"referenceDem":"/data/puma1/scratch/mtngla/dems/HMA_TDX_Masked_SRTM_Merged_coreg_aea_clip.tif" # HIMALAYAS #"runName": "ReadyHim2", #"outputDataSet": "Ready8", #"parentDsName": "mtngla", #"region":"himalayas", #"maskDataSet": "RGIv60", #"withinDataSets": ["SDCv10", "/data/puma1/scratch/mtngla/dems/Tdx_SRTM_SurfaceSplit.tiff"], #"withinDataSetTypes": ["Debris", "DataSet"], #"referenceDem": "/data/puma1/scratch/mtngla/dems/HMA_TDX_Masked_SRTM_Merged_coreg_aea_clip.tif", #"inputDataSet": "tdx2", # # "runName": "HimMad2", # "outputDataSet": "HimMad2", # "parentDsName": "mtngla", # "region":"himalayas", # ALASKA #"runName": "AlaskaRun1", #"outputDataSet": "ReadyDataAlaska2", #"parentDsName": "mtngla", #"region":"alaska", #"maskDataSet": "RGIv60", #"withinDataSets": ["SDCv10", "/data/puma1/scratch/mtngla/dems/TD_AD_Interp_SurfaceSplit.tiff"], #"withinDataSetTypes": ["Debris", "DataSet"], #"referenceDem": "/data/puma1/scratch/mtngla/dems/PCR_TdxFilledWithAD_Masked_Polar_Interp_clip.tif", #"inputDataSet": "ADwithTDX", __conf = { "runName": "AlaskaMad", "outputDataSet": "AlaskaMad", "parentDsName": "mtngla", "region":"alaska", "maskDataSet": "RGIv60", "withinDataSets": ["SDCv10", "/data/puma1/scratch/mtngla/dems/TD_AD_Interp_SurfaceSplit.tiff"], "withinDataSetTypes": ["Debris", "DataSet"], "referenceDem": "/data/puma1/scratch/mtngla/dems/PCR_TdxFilledWithAD_Masked_Polar_Interp_clip.tif", "inputDataSet": "tdx_mad", "malardEnvironmentName": "DEVv2", "malardSyncURL": "http://localhost:9000", "malardAsyncURL": "ws://localhost:9000", "filters" : [{'column':'power','op':'gt','threshold':10000},{'column':'coh','op':'gt','threshold':0.6}, \ {'column':'demDiff','op':'lt','threshold':100}, {'column':'demDiffMadNew','op':'lt','threshold':10}, \ {'column':'demDiff','op':'gt','threshold':-100}] } def __init__(self, minX, maxX, minY, maxY, logFile=None): ''' :param minX: :param maxX: :param minY: :param maxY: :param logFile: if logfile is specified logger will write into file instead of the terminal ''' if logFile is None: logging.basicConfig( format= '%(asctime)s, %(threadName)s %(thread)d: %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', level=logging.INFO) else: logging.basicConfig( filename=logFile, filemode='a', format= '%(asctime)s, %(threadName)s %(thread)d: %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', level=logging.INFO) sys.excepthook = self.uncaughtErrorHandler self.logger = logging.getLogger(__name__) self.logger.info('Process started') self.client = MalardClient(notebook=False) self.minX = minX self.maxX = maxX self.minY = minY self.maxY = maxY self.parentDsName = self.config('parentDsName') self.outputDataSet = self.config('outputDataSet') self.inputDataSet = self.config('inputDataSet') self.region = self.config('region') self.maskDataSet = self.config('maskDataSet') self.withinDataSets = self.config('withinDataSets') self.withinDataSetTypes = self.config('withinDataSetTypes') self.runName = self.config('runName') assert (self.maxX - self.minX) == (self.maxY - self.minY) self.size = maxX - minX self.dataSet = DataSet(parentDs=self.config('parentDsName'), dataSet=self.config('inputDataSet'), region=self.config('region')) def startProcess(self): self.logger.info( 'Starting gridcell: minX=%s, minY=%s, parentDs=%s, inputDataSet=%s, outputDataSet=%s, runName=%s,', self.minX, self.minY, self.parentDsName, self.inputDataSet, self.outputDataSet, self.runName) self.defineVariables() if os.path.exists(self.maskDataSetFile): self.data = self.filter() # To Geodata self.logger.info('Converting to Geodataset...') self.data = self.data.asGeoDataSet() self.applyMasks() # Calculate elevation difference if self.data.hasData(): raster = RasterDataSet(self.config('referenceDem')) assert (self.maxX - self.minX) == (self.maxY - self.minY) buffer = (self.maxX - self.minX) * 0.1 self.data.calculateElevationDifference(raster, buffer=buffer) self.addStatistics() self.publish() self.logger.info("STATISTICS: %s", self.data.getStats()) else: self.logger.info( "No valid mask (fp=%s) found for %s, %s, %s, minX=%s, minY=%s, size=%s", self.maskDataSetFile, self.maskDataSet, 'Glacier', self.region, self.minX, self.minY, self.size) # shutdown self.logger.info("Finished process for: minX=%s, minY=%s, size=%s", self.minX, self.minY, self.size) self.logger.info( '------------------------------------------------------------------' ) logging.shutdown() # clear variables sys.modules[__name__].__dict__.clear() def filter(self): filters = self.config('filters') self.logger.info("Filtering dataset=%s with criteria %s" % (self.inputDataSet, filters)) result = self.client.executeQuery(self.dataSet, self.bbx, [], filters) self.logger.info("Result message: %s, %s" % (result.status, result.message)) fp = result.resultFileName data = PointDataSet(fp, self.projection) # release cache of file self.client.releaseCacheHandle(result.resultFileName) data.addStatistic('%s_filtered' % self.inputDataSet, data.length()) self.logger.info("Filter %s result count [%d]" % (self.inputDataSet, data.length())) return data def applyMasks(self): # Mask self.data.applyMask(self.maskDataSetFile, 'Glacier') # Add column if point is inside masks for idx, i in enumerate(self.withinDataSetFiles): self.data.withinMask(i, self.withinDataSetTypes[idx]) def addStatistics(self): self.logger.info('Adding additional statistics') # number of srtm and number of tandemX self.data.addStatistic('result_total', self.data.length()) #stats['result_srtm'] = float(data.loc[data.dataset == 'SRTM', 'dataset'].count()) #stats['result_tandemx'] = float(data.loc[data.dataset == 'TandemX', 'dataset'].count()) self.data.addStatistic('result_avgX', self.data.mean('x')) self.data.addStatistic('result_avgY', self.data.mean('y')) self.data.addStatistic( 'result_offsetX', self.data.getStats()['result_avgX'] - (self.minX + (self.size / 2))) self.data.addStatistic( 'result_offsetY', self.data.getStats()['result_avgY'] - (self.minX + (self.size / 2))) # counts per year # @TODO do this in glacier years years = [x for x in range(self.minT.year, self.maxT.year + 1)] for year in years: start = datetime.datetime(year, 1, 1, 0, 0) end = datetime.datetime(year + 1, 1, 1, 0, 0) start = calendar.timegm(start.utctimetuple()) end = calendar.timegm(end.utctimetuple()) # count keyCount = "result_count_%s" % (year) peryear = float( self.data.data.loc[(self.data.data.time >= start) & (self.data.data.time < end)].shape[0]) self.data.addStatistic(keyCount, peryear) # elevation difference elevDiff = "result_refDifference_%s" % (year) if peryear > 0.0: self.data.addStatistic( elevDiff, float(self.data.data.loc[(self.data.data.time >= start) & (self.data.data.time < end), 'refDifference'].mean())) else: self.data.addStatistic(elevDiff, 0.0) def publish(self, outEnvironment='/data/puma1/scratch/mtngla/ReadyData'): # get data as normal pandas dataframe without the geo ref data = self.data.getData(geo=False) outPath = os.path.join( outEnvironment, "ReadyData_%s_x%s_y%s.nc" % (self.minX, self.minY, self.size)) xarr = data.to_xarray() xarr.to_netcdf(outPath) # publish self.logger.info('Publish new dataset...') result = self.query_async.publishGridCellPoints( self.parentDsName, self.outputDataSet, self.region, self.minX, self.minY, self.data.min('time'), self.size, outPath, self.projection) self.logger.info('Response: %s' % result.json) # delete temporary file os.remove(outPath) # publish stats self.logger.info('Publish gridcell statistics...') response = self.query_sync.publishGridCellStats( self.parentDsName, self.runName, self.minX, self.minY, self.size, self.data.getStats()) self.logger.info('Response: %s' % response) def defineVariables(self): self.query_sync = DataSetQuery(self.config('malardSyncURL'), self.config('malardEnvironmentName')) self.query_async = AsyncDataSetQuery( self.config('malardAsyncURL'), self.config('malardEnvironmentName'), False) # minT and maxT # get projection self.projection = self.client.getProjection(self.dataSet).proj4 #minT and maxT bbx = self.client.boundingBox(self.dataSet) self.minT = bbx.minT self.maxT = bbx.maxT self.bbx = BoundingBox(self.minX, self.maxX, self.minY, self.maxY, self.minT, self.maxT) # masks mGla = self.query_sync.getGridCellMask(self.parentDsName, self.maskDataSet, 'Glacier', self.region, self.minX, self.minY, self.size) self.maskDataSetFile = json.loads(mGla)['fileName'] self.withinDataSetFiles = [] for i, el in enumerate(self.withinDataSets): if os.path.exists(el): self.withinDataSetFiles.append(el) else: mask = self.query_sync.getGridCellMask( self.parentDsName, el, self.withinDataSetTypes[i], self.region, self.minX, self.minY, self.size) self.withinDataSetFiles.append(json.loads(mask)['fileName']) @staticmethod def config(name): return MtnGlaGridcellProcess.__conf[name] def uncaughtErrorHandler(self, type, value, tb): self.logger.error("Uncaught exception", exc_info=(type, value, tb))
class DemDiffMadProcess: # ALASKA #"runName": "AlaskaRun1", #"outputDataSet": "ReadyDataAlaska2", #"parentDsName": "mtngla", #"region":"alaska", #"maskDataSet": "RGIv60", #"withinDataSets": ["SDCv10", "/data/puma1/scratch/mtngla/dems/TD_AD_Interp_SurfaceSplit.tiff"], #"withinDataSetTypes": ["Debris", "DataSet"], #"referenceDem": "/data/puma1/scratch/mtngla/dems/PCR_TdxFilledWithAD_Masked_Polar_Interp_clip.tif", #"inputDataSet": "ADwithTDX", __conf = { "outputDataSet": "tdx4", "parentDsName": "mtngla", "region": "himalayas", "inputDataSet": "tdx2", "malardEnvironmentName": "DEVv2", "malardSyncURL": "http://localhost:9000", "malardAsyncURL": "ws://localhost:9000", "buffer": 15000, "maskDataSet": "RGIv60", "filters": [{ 'column': 'power', 'op': 'gt', 'threshold': 10000 }, { 'column': 'coh', 'op': 'gt', 'threshold': 0.6 }] } def __init__(self, minX, maxX, minY, maxY, logFile=None, notebook=False): ''' :param minX: :param maxX: :param minY: :param maxY: :param logFile: if logfile is specified logger will write into file instead of the terminal ''' if logFile is None: logging.basicConfig( format= '%(asctime)s, %(threadName)s %(thread)d: %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', level=logging.INFO) else: logging.basicConfig( filename=logFile, filemode='a', format= '%(asctime)s, %(threadName)s %(thread)d: %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', level=logging.INFO) sys.excepthook = self.uncaughtErrorHandler self.logger = logging.getLogger(__name__) self.logger.info('Process started') self.client = MalardClient(notebook=notebook) self.query_async = AsyncDataSetQuery( self.config('malardAsyncURL'), self.config('malardEnvironmentName'), False) self.minX = minX self.maxX = maxX self.minY = minY self.maxY = maxY assert (self.maxX - self.minX) == (self.maxY - self.minY) self.size = maxX - minX self.inputDataSet = DataSet(parentDs=self.config('parentDsName'), dataSet=self.config('inputDataSet'), region=self.config('region')) self.parentDsName = self.config('parentDsName') self.outputDataSet = self.config('outputDataSet') self.region = self.config('region') self.buffer = self.config('buffer') self.projection = self.client.getProjection(self.inputDataSet).proj4 bbx = self.client.boundingBox(self.inputDataSet) self.minT = bbx.minT self.maxT = bbx.maxT # masks maskDataSet = self.config('maskDataSet') query_sync = DataSetQuery(self.config('malardSyncURL'), self.config('malardEnvironmentName')) mGla = query_sync.getGridCellMask(self.parentDsName, maskDataSet, 'Glacier', self.region, self.minX, self.minY, self.size) self.maskDataSetFile = json.loads(mGla)['fileName'] def startProcess(self): self.logger.info( 'Starting gridcell: minX=%s, minY=%s, parentDs=%s, inputDataSet=%s, outputDataSet=%s', self.minX, self.minY, self.parentDsName, self.inputDataSet, self.outputDataSet) if os.path.exists(self.maskDataSetFile): self.data = self.filter() # Calculate elevation difference if self.data.hasData(): # magic self.logger.info('Calculate demDiffMad...') self.data.data['demDiffMadNew'] = self.data.data[ 'demDiff'].groupby([ self.data.data['swathFileId'], self.data.data['wf_number'] ]).transform('mad') # delete gridcells outside cell (the ones that are within a buffer zone) self.logger.info('Cut down to gridcell...') filtered = self.data.data[((self.data.data.x > self.minX) & (self.data.data.x < self.maxX) & (self.data.data.y > self.minY) & (self.data.data.y < self.maxY))] self.logger.info( 'Count data before cut to gridcell =%s, after cut=%s', self.data.data.shape[0], filtered.shape[0]) self.data.data = filtered if self.data.hasData(): self.publish() else: self.logger.info("No data in result query") else: self.logger.info("No glacier mask for area.") # shutdown self.logger.info("Finished process for: minX=%s, minY=%s, size=%s", self.minX, self.minY, self.size) self.logger.info( '------------------------------------------------------------------' ) logging.shutdown() # clear variables sys.modules[__name__].__dict__.clear() def filter(self): filters = self.config('filters') self.logger.info("Filtering dataset=%s with criteria %s" % (self.inputDataSet.dataSet, filters)) minXb = self.minX - self.buffer maxXb = self.maxX + self.buffer minYb = self.minY - self.buffer maxYb = self.maxY + self.buffer self.logger.info( "Bounding box with buffer: minX=%s maxX=%s, minY=%s, mayY=%s" % (minXb, maxXb, minYb, maxYb)) bbx_in = BoundingBox(minXb, maxXb, minYb, maxYb, self.minT, self.maxT) result = self.client.executeQuery(self.inputDataSet, bbx_in, projections=[], filters=filters) self.logger.info("Result message: %s, %s" % (result.status, result.message)) data = PointDataSet(result.resultFileName, self.projection) self.logger.info("Data points count: %s" % (data.data.shape[0])) # release cache of file self.client.releaseCacheHandle(result.resultFileName) return data def publish(self, outEnvironment='/data/puma1/scratch/mtngla/ReadyData'): outPath = os.path.join( outEnvironment, "ReadyData_%s_x%s_y%s.nc" % (self.minX, self.minY, self.size)) xarr = self.data.data.to_xarray() xarr.to_netcdf(outPath) # publish self.logger.info('Publish new dataset...') result = self.query_async.publishGridCellPoints( self.parentDsName, self.outputDataSet, self.region, self.minX, self.minY, self.data.min('time'), self.size, outPath, self.projection) self.logger.info('Response: %s' % result.json) # delete temporary file os.remove(outPath) @staticmethod def config(name): return DemDiffMadProcess.__conf[name] def uncaughtErrorHandler(self, type, value, tb): self.logger.error("Uncaught exception", exc_info=(type, value, tb))
def __init__(self, minX, maxX, minY, maxY, logFile=None, notebook=False): ''' :param minX: :param maxX: :param minY: :param maxY: :param logFile: if logfile is specified logger will write into file instead of the terminal ''' if logFile is None: logging.basicConfig( format= '%(asctime)s, %(threadName)s %(thread)d: %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', level=logging.INFO) else: logging.basicConfig( filename=logFile, filemode='a', format= '%(asctime)s, %(threadName)s %(thread)d: %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', level=logging.INFO) sys.excepthook = self.uncaughtErrorHandler self.logger = logging.getLogger(__name__) self.logger.info('Process started') self.client = MalardClient(notebook=notebook) self.query_async = AsyncDataSetQuery( self.config('malardAsyncURL'), self.config('malardEnvironmentName'), False) self.minX = minX self.maxX = maxX self.minY = minY self.maxY = maxY assert (self.maxX - self.minX) == (self.maxY - self.minY) self.size = maxX - minX self.inputDataSet = DataSet(parentDs=self.config('parentDsName'), dataSet=self.config('inputDataSet'), region=self.config('region')) self.parentDsName = self.config('parentDsName') self.outputDataSet = self.config('outputDataSet') self.region = self.config('region') self.buffer = self.config('buffer') self.projection = self.client.getProjection(self.inputDataSet).proj4 bbx = self.client.boundingBox(self.inputDataSet) self.minT = bbx.minT self.maxT = bbx.maxT # masks maskDataSet = self.config('maskDataSet') query_sync = DataSetQuery(self.config('malardSyncURL'), self.config('malardEnvironmentName')) mGla = query_sync.getGridCellMask(self.parentDsName, maskDataSet, 'Glacier', self.region, self.minX, self.minY, self.size) self.maskDataSetFile = json.loads(mGla)['fileName']
class RegressionRun: # __conf = { # "outputFileName": "himalayas-gridcells.gpkg", # "inputDataSet": "HimMad2", # "runName": "HimMad2", # "region":"himalayas", # "parentDsName": "mtngla", # "outputPath": "regression_results", # "malardEnvironmentName": "DEVv2", # "malardSyncURL": "http://localhost:9000", # "malardAsyncURL": "ws://localhost:9000", # "filters" : [{'column':'power','op':'gt','threshold':10000},{'column':'coh','op':'gt','threshold':0.6}, \ # {'column':'demDiff','op':'lt','threshold':100}, {'column':'demDiffMadNew','op':'lt','threshold':10}, \ # {'column':'demDiff','op':'gt','threshold':-100}, \ # {'column':'refDifference','op':'gt','threshold':-150}, {'column':'refDifference','op':'lt','threshold':150}, \ # {'column':'within_DataSet','op':'gt','threshold':1}] # } __conf = { "outputFileName": "alaska-gridcells-double.gpkg", "inputDataSet": "AlaskaMad", "runName": "AlaskaMad", "region":"alaska", "parentDsName": "mtngla", "outputPath": "regression_results", "malardEnvironmentName": "DEVv2", "malardSyncURL": "http://localhost:9000", "malardAsyncURL": "ws://localhost:9000", "filters" : [{'column':'power','op':'gt','threshold':10000},{'column':'coh','op':'gt','threshold':0.6}, \ {'column':'demDiff','op':'lt','threshold':100}, {'column':'demDiffMadNew','op':'lt','threshold':10}, \ {'column':'demDiff','op':'gt','threshold':-100}, \ {'column':'refDifference','op':'gt','threshold':-150}, {'column':'refDifference','op':'lt','threshold':150}, \ {'column':'within_DataSet','op':'gt','threshold':1}] } # __conf = { # "outputFileName": "iceland5.gpkg", # "inputDataSet": "tdx", # "runName": "RunIce", # "region":"iceland", # "parentDsName": "mtngla", # "outputPath": "regression_results", # "malardEnvironmentName": "DEVv2", # "malardSyncURL": "http://localhost:9000", # "malardAsyncURL": "ws://localhost:9000", # "filters" : [{'column':'powerScaled','op':'gt','threshold':10000},{'column':'coh','op':'gt','threshold':0.8}, \ # {'column':'demDiff','op':'lt','threshold':200}, {'column':'demDiffMadNew','op':'lt','threshold':40}, \ # ] # } def __init__(self, logFile=None, notebook=False): ''' :param logFile: if logfile is specified logger will write into file instead of the terminal ''' if logFile is None: logging.basicConfig( format= '%(asctime)s, %(threadName)s %(thread)d: %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', level=logging.INFO) else: logging.basicConfig( filename=logFile, filemode='a', format= '%(asctime)s, %(threadName)s %(thread)d: %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', level=logging.INFO) sys.excepthook = self.uncaughtErrorHandler self.logger = logging.getLogger(__name__) self.logger.info('Process started') #self.parentDsName = self.config('parentDsName') self.inputDataSet = DataSet(parentDs=self.config('parentDsName'), dataSet=self.config('inputDataSet'), region=self.config('region')) #self.region = self.config('region') self.runName = self.config('runName') self.client = MalardClient(notebook=notebook) self.query_sync = DataSetQuery(self.config('malardSyncURL'), self.config('malardEnvironmentName')) #self.query_async = AsyncDataSetQuery.AsyncDataSetQuery(self.config('malardAsyncURL'), self.config('malardEnvironmentName'), False) # get projection #self.projection = json.loads(self.client.getProjection(self.parentDsName, self.region))['proj4'] self.projection = self.client.getProjection(self.inputDataSet).proj4 def gridcellRegression(self, boundingBox, linear=True, robust=True, weighted=None, minCount=10, radius=None, filters=None): if filters is None: filters = self.config('filters') self.logger.info("Filtering dataset=%s with criteria %s" % (self.inputDataSet, filters)) result = self.client.executeQuery(self.inputDataSet, boundingBox, projections=[], filters=filters) #result = self.client.executeQuery(self.inputDataSet, boundingBox, projections=[]) self.logger.info("Result message: status=%s, message=%s" % (result.status, result.message)) data = PointDataSet(result.resultFileName, self.projection) self.logger.info("Dataset has %s points" % (data.data.shape[0])) if radius is not None: centerX = boundingBox.minX + ( abs(boundingBox.maxX - boundingBox.minX) / 2) centerY = boundingBox.minY + ( abs(boundingBox.maxY - boundingBox.minY) / 2) self.logger.info("Apply radius with centerX=%s and centerY=%s..." % (centerX, centerY)) self.logger.info("Before radius count=%s..." % (data.data.shape[0])) data.applyRadius(radius=radius, centerX=centerX, centerY=centerY) self.logger.info("After radius count=%s..." % (data.data.shape[0])) # release cache of file self.client.releaseCacheHandle(result.resultFileName) results = {} if data.data.shape[0] > minCount and not data.data['time'].nunique( ) == 1: if linear: r = data.linearRegression() results = {**results, **r} if robust: r = data.robustRegression() results = {**results, **r} if weighted is not None: for w in weighted: r = data.weightedRegression(weight=w['weight'], mask=w['mask_std_dev']) results = {**results, **r} self.logger.info(results) else: self.logger.info("Not enough data in cell (%s points)" % (data.data.shape[0])) return results def regressionFromStats(self, linear=True, robust=True, weighted=None, minT=None, maxT=None, minCount=50, save=True): self.logger.info("Get run statistics for parentDS=%s runName=%s ..." % (self.inputDataSet.parentDataSet, self.runName)) stats = self.query_sync.getRunStatistics( self.inputDataSet.parentDataSet, self.runName) stats = json.loads(stats) dfStats = json_normalize(stats) if minT is None and maxT is None: bbx = self.client.boundingBox(self.inputDataSet) minT = bbx.minT maxT = bbx.maxT for idx, line in dfStats.iterrows(): if line['statistics.afterGlacierMask'] > minCount: minX, maxX = line['gridCell.minX'], line[ 'gridCell.minX'] + line['gridCell.size'] minY, maxY = line['gridCell.minY'], line[ 'gridCell.minY'] + line['gridCell.size'] self.logger.info( "Calculating gridcell minX=%s maxX=%s minY=%s maxY=%s minT=%s maxT=%s ..." % (minX, maxX, minY, maxY, minT, maxT)) bbx_in = BoundingBox(minX, maxX, minY, maxY, minT, maxT) results = self.gridcellRegression(bbx_in, linear=linear, robust=robust, weighted=weighted) self.logger.info("Adding regression results to stats...") for key in results: if isinstance(results[key], list): if not np.isin(key, dfStats.columns): newColumn = [key] # dfStats = dfStats.reindex(columns=np.append( dfStats.columns.values, newColumn)) dfStats[[key]] = dfStats[[key ]].astype('object', inplace=True) dfStats.at[idx, key] = results[key] else: dfStats.at[idx, key] = results[key] size = dfStats['gridCell.size'] geometry = [ Point(xy) for xy in zip(dfStats['gridCell.minX'] + (size / 2), dfStats['gridCell.minY'] + (size / 2)) ] dfStats = gp.GeoDataFrame(dfStats, crs=self.projection, geometry=geometry) if save: file = os.path.join(self.config("outputPath"), self.config("outputFileName")) self.logger.info("Saving results under file=%s" % file) dfStats.to_file(file, driver="GPKG") return dfStats def regressionFromList(self, gridcells, linear=True, robust=True, weighted=None, minT=None, maxT=None, save=True, radius=None, geometry='point'): dfStats = pd.DataFrame(gridcells) if minT is None and maxT is None: bbx = self.client.boundingBox(self.inputDataSet) minT = bbx.minT maxT = bbx.maxT for idx, line in dfStats.iterrows(): self.logger.info( "Calculating gridcell minX=%s maxX=%s minY=%s maxY=%s minT=%s maxT=%s ..." % (line['minX'], line['maxX'], line['minY'], line['maxY'], minT, maxT)) bbx_in = BoundingBox(line['minX'].item(), line['maxX'].item(), line['minY'].item(), line['maxY'].item(), minT, maxT) results = self.gridcellRegression(bbx_in, linear=linear, robust=robust, weighted=weighted, radius=radius) self.logger.info("Adding regression results to stats...") for key in results: if isinstance(results[key], list): if not np.isin(key, dfStats.columns): newColumn = [key] # dfStats = dfStats.reindex(columns=np.append( dfStats.columns.values, newColumn)) dfStats[[key]] = dfStats[[key]].astype('object', inplace=True) dfStats.at[idx, key] = results[key] else: dfStats.at[idx, key] = results[key] size = dfStats['maxX'] - dfStats['minX'] if geometry == 'point:': self.logger.info("Converted to point geometry") geometry = [ Point(xy) for xy in zip(dfStats['minX'] + (size / 2), dfStats['minY'] + (size / 2)) ] elif geometry == 'cell': self.logger.info("Converted to cell geometry") geometry = [] for idx, line in dfStats.iterrows(): minX, maxX = line['minX'], line['maxX'] minY, maxY = line['minY'], line['maxY'] geometry.append( Polygon([(minX, minY), (minX, maxY), (maxX, maxY), (maxX, minY), (minX, minY)])) else: self.logger.info( "Error: not valid geometry specified. Should be either 'point' or 'cell'" ) dfStats = gp.GeoDataFrame(dfStats, crs=self.projection, geometry=geometry) if save: file = os.path.join(self.config("outputPath"), self.config("outputFileName")) self.logger.info("Saving results under file=%s" % file) dfStats.to_file(file, driver="GPKG") return dfStats def regressionFromRaster(self, file, linear=True, robust=True, weighted=None, minT=None, maxT=None, save=True, rasterNoData=-1000000, radius=None, geometry='point'): ''' Calcualtes regression from cells corresponding to the cells of a given input raster :param file: rasterfile path :param radius: if None the exact extent of the raster cell is used, else the center of the rastercell and the points within a given rasius is used :return: ''' self.logger.info( "Start regression from raster for parentDS=%s runName=%s ..." % (self.inputDataSet.parentDataSet, self.runName)) if minT is None and maxT is None: bbx = self.client.boundingBox(self.inputDataSet) minT = bbx.minT maxT = bbx.maxT raster = RasterDataSet(file) if radius is None: extents = raster.getCellsAsExtent() else: xy, values = raster.getCenterPoints() extents = [] for i, el in enumerate(xy): self.logger.info("Calculating gridcell %s / %s ..." % (i + 1, len(values))) if values[i] != rasterNoData: ext = { 'minX': el[0] - radius, 'maxX': el[0] + radius, 'minY': el[1] - radius, 'maxY': el[1] + radius } extents.append(ext) self.logger.info( "Extent with radius=%s is minX=%s maxX=%s minY=%s maxY=%s ..." % (radius, ext['minX'], ext['maxX'], ext['minY'], ext['maxY'])) else: self.logger.info( "Raster cell=%s has no data no (datavalue=%s) and is skipped ..." % (el, rasterNoData)) stats = self.regressionFromList(extents, linear=linear, robust=robust, weighted=weighted, minT=minT, maxT=maxT, save=save, radius=radius, geometry=geometry) return stats def regressionFromFile(self, file, linear=True, robust=True, weighted=None, minT=None, maxT=None, save=True, radius=None, geometry='point'): ''' Calcualtes regression from cells corresponding to the cells of a given input raster :param file: rasterfile path :param radius: if None the exact extent of the raster cell is used, else the center of the rastercell and the points within a given rasius is used :return: ''' self.logger.info("Start regression from file for parentDS=%s ..." % (self.inputDataSet.parentDataSet)) if minT is None and maxT is None: bbx = self.client.boundingBox(self.inputDataSet) minT = bbx.minT maxT = bbx.maxT extents = [] with open(file) as f: for line in f: split = line.strip().split(",") ext = { 'minX': int(split[0]), 'maxX': int(split[1]), 'minY': int(split[2]), 'maxY': int(split[3]) } extents.append(ext) stats = self.regressionFromList(extents, linear=linear, robust=robust, weighted=weighted, minT=minT, maxT=maxT, save=save, radius=radius, geometry=geometry) return stats @staticmethod def config(name): return RegressionRun.__conf[name] def uncaughtErrorHandler(self, type, value, tb): self.logger.error("Uncaught exception", exc_info=(type, value, tb))
df = pd.DataFrame() df['x'] = x df['y'] = y return df environmentName = 'DEVv2' gridCellSize = 100000 resolution = 500 mask_prefix = "SARIN" client = MalardClient(environmentName, True) dataSet = DataSet('cryotempo', 'GRIS_BaselineC_Q2', 'greenland') proj4 = client.getProjection(dataSet).proj4 mask = '/data/puma1/scratch/cryotempo/masks/icesheets.shp' if mask_prefix == "ICE" else '/data/puma1/scratch/cryotempo/sarinmasks/{}_Greenland.shp'.format( mask_prefix) tmpPath = '/home/jon/data/masks/' bbox = client.boundingBox(dataSet) gridCells = client.gridCells(dataSet, bbox) for gc in gridCells: