def get_level(resource, level): from flyingpigeon.ocgis_module import call from netCDF4 import Dataset from flyingpigeon.utils import get_variable from numpy import squeeze try: level_data = call(resource, level_range=[int(level),int(level)]) if type(resource) == list: resource.sort() variable = get_variable(level_data) logger.info('found %s in file' % variable) ds = Dataset(level_data, mode='a') var = ds.variables.pop(variable) dims = var.dimensions new_var = ds.createVariable('z%s'% level, var.dtype, dimensions=(dims[0],dims[2],dims[3])) # i = where(var[:]==level) new_var[:,:,:] = squeeze(var[:,0,:,:]) ds.close() logger.info('level %s extracted' % level) data = call(level_data , variable = 'z%s'%level) except Exception as e: logger.error('failed to extract level %s ' % e) return data
def get_level(resource, level): from flyingpigeon.ocgis_module import call from netCDF4 import Dataset from flyingpigeon.utils import get_variable from numpy import squeeze try: if type(resource) == list: resource.sort() level_data = call(resource, level_range=[int(level), int(level)]) variable = get_variable(level_data) LOGGER.info('found %s in file' % variable) ds = Dataset(level_data, mode='a') var = ds.variables.pop(variable) dims = var.dimensions new_var = ds.createVariable('z%s' % level, var.dtype, dimensions=(dims[0], dims[2], dims[3])) # i = where(var[:]==level) new_var[:, :, :] = squeeze(var[:, 0, :, :]) # TODO: Here may be an error! in case of exception, dataset will not close! # Exception arise for example for 20CRV2 data... try: new_var.setncatts({k: var.getncattr(k) for k in var.ncattrs()}) except: LOGGER.info('Could not set attributes for z%s' % level) ds.close() LOGGER.info('level %s extracted' % level) data = call(level_data, variable='z%s' % level) except: LOGGER.exception('failed to extract level') return data
def get_level(resource, level): from flyingpigeon.ocgis_module import call from netCDF4 import Dataset from flyingpigeon.utils import get_variable from numpy import squeeze try: level_data = call(resource, level_range=[int(level), int(level)]) if type(resource) == list: resource.sort() variable = get_variable(level_data) logger.info('found %s in file' % variable) ds = Dataset(level_data, mode='a') var = ds.variables.pop(variable) dims = var.dimensions new_var = ds.createVariable('z%s' % level, var.dtype, dimensions=(dims[0], dims[2], dims[3])) # i = where(var[:]==level) new_var[:, :, :] = squeeze(var[:, 0, :, :]) ds.close() logger.info('level %s extracted' % level) data = call(level_data, variable='z%s' % level) except Exception as e: logger.error('failed to extract level %s ' % e) return data
def get_reference(ncs_indices, period='all'): """ calculates the netCDF files containing the mean climatology for statistical GAM training :param ncs_indices: list of climate indices defining the growing conditions of tree species :param refperiod: time period for statistic training :return present: present conditions """ from datetime import datetime as dt from flyingpigeon.ocgis_module import call from flyingpigeon.utils import get_variable from os.path import basename if not period == 'all': s, e = period.split('-') start = dt.strptime(s+'-01-01', '%Y-%m-%d') end = dt.strptime(e+'-12-31', '%Y-%m-%d') time_range=[start, end] else: time_range=None ref_indices = [] for nc_indice in ncs_indices: variable = get_variable(nc_indice) f = basename(nc_indice).strip('.nc') prefix = '%s_ref-%s' % ('_'.join(f.split('_')[0:-1]), period) ref_indices.append(call(resource=nc_indice, variable=variable,prefix=prefix, calc=[{'func':'mean','name': variable}],calc_grouping=['all'],time_range=time_range)) return ref_indices
def get_indices(resources, indices): from flyingpigeon.utils import sort_by_filename, calc_grouping, drs_filename from flyingpigeon.ocgis_module import call from flyingpigeon.indices import indice_variable #names = [drs_filename(nc, skip_timestamp=False, skip_format=False, # variable=None, rename_file=True, add_file_path=True) for nc in resources] ncs = sort_by_filename(resources, historical_concatination=True) ncs_indices = [] logger.info('resources sorted found %s datasets' % len(ncs.keys()) ) for key in ncs.keys(): for indice in indices: try: name , month = indice.split('_') variable=key.split('_')[0] print name, month , variable if variable == indice_variable(name): logger.info('calculating indice %s ' % indice) grouping = calc_grouping(month) calc = [{'func' : 'icclim_' + name, 'name' : name}] prefix=key.replace(variable, name).replace('_day_','_%s_' % month) nc = call(resource=ncs[key], variable=variable, calc=calc, calc_grouping=grouping, prefix=prefix , memory_limit=500) #memory_limit=500 ncs_indices.append(nc) logger.info('Successful calculated indice %s %s' % (key, indice)) except Exception as e: logger.exception('failed to calculate indice %s %s' % (key, indice)) return ncs_indices
def get_reference(ncs_indices, period='all'): """ calculates the netCDF files containing the mean climatology for statistical GAM training :param ncs_indices: list of climate indices defining the growing conditions of tree species :param refperiod: time period for statistic training :return present: present conditions """ from datetime import datetime as dt from flyingpigeon.ocgis_module import call from flyingpigeon.utils import get_variable from os.path import basename if not period == 'all': s, e = period.split('-') start = dt.strptime(s + '-01-01', '%Y-%m-%d') end = dt.strptime(e + '-12-31', '%Y-%m-%d') time_range = [start, end] else: time_range = None ref_indices = [] for nc_indice in ncs_indices: variable = get_variable(nc_indice) f = basename(nc_indice).strip('.nc') prefix = '%s_ref-%s' % ('_'.join(f.split('_')[0:-1]), period) ref_indices.append(call(resource=nc_indice, variable=variable, prefix=prefix, calc=[{'func': 'mean', 'name': variable}], calc_grouping=['all'], time_range=time_range)) return ref_indices
def execute(self): from flyingpigeon.ocgis_module import call from flyingpigeon.utils import sort_by_filename, archive, get_values, get_time ncs = self.getInputValues(identifier='resource') logger.info("ncs: %s " % ncs) coords = self.getInputValues(identifier='coords') logger.info("coords %s", coords) filenames = [] nc_exp = sort_by_filename(ncs, historical_concatination=True) from numpy import savetxt, column_stack from shapely.geometry import Point for key in nc_exp.keys(): try: logger.info('start calculation for %s ' % key) ncs = nc_exp[key] times = get_time(ncs, format='%Y-%m-%d_%H:%M:%S') concat_vals = times # ['%s-%02d-%02d_%02d:%02d:%02d' % # (t.year, t.month, t.day, t.hour, t.minute, t.second) for t in times] header = 'date_time' filename = '%s.csv' % key filenames.append(filename) for p in coords: try: self.status.set('processing point : {0}'.format(p), 20) # define the point: p = p.split(',') point = Point(float(p[0]), float(p[1])) # get the values timeseries = call(resource=ncs, geom=point, select_nearest=True) vals = get_values(timeseries) # concatenation of values header = header + ',%s-%s' % (p[0], p[1]) concat_vals = column_stack([concat_vals, vals]) except Exception as e: logger.debug('failed for point %s %s' % (p, e)) self.status.set( '*** all points processed for {0} ****'.format(key), 50) savetxt(filename, concat_vals, fmt='%s', delimiter=',', header=header) except Exception as e: logger.debug('failed for %s %s' % (key, e)) # set the outputs self.status.set('*** creating output tar archive ****', 90) tarout_file = archive(filenames) self.tarout.setValue(tarout_file)
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' ncs = archiveextract( resource=rename_complexinputs(request.inputs['resource'])) LOGGER.info('ncs: {}'.format(ncs)) coords = [] for coord in request.inputs['coords']: coords.append(coord.data) LOGGER.info('coords {}'.format(coords)) filenames = [] nc_exp = sort_by_filename(ncs, historical_concatination=True) for key in nc_exp.keys(): try: LOGGER.info('start calculation for {}'.format(key)) ncs = nc_exp[key] times = get_time(ncs) # , format='%Y-%m-%d_%H:%M:%S') concat_vals = times # ['%s-%02d-%02d_%02d:%02d:%02d' % # (t.year, t.month, t.day, t.hour, t.minute, t.second) for t in times] header = 'date_time' filename = '{}.csv'.format(key) filenames.append(filename) for p in coords: try: response.update_status('processing point: {}'.format(p), 20) # define the point: p = p.split(',') point = Point(float(p[0]), float(p[1])) # get the values timeseries = call(resource=ncs, geom=point, select_nearest=True) vals = get_values(timeseries) # concatenation of values header = header + ',{}-{}'.format(p[0], p[1]) concat_vals = column_stack([concat_vals, vals]) except Exception as e: LOGGER.debug('failed for point {} {}'.format(p, e)) response.update_status('*** all points processed for {0} ****'.format(key), 50) # TODO: Ascertain whether this 'savetxt' is a valid command without string formatting argument: '%s' savetxt(filename, concat_vals, fmt='%s', delimiter=',', header=header) except Exception as ex: LOGGER.debug('failed for {}: {}'.format(key, str(ex))) # set the outputs response.update_status('*** creating output tar archive ****', 90) tarout_file = archive(filenames) response.outputs['tarout'].file = tarout_file return response
def execute(self): from flyingpigeon.ocgis_module import call from flyingpigeon.utils import sort_by_filename, archive, get_values, get_time ncs = self.getInputValues(identifier='netcdf_file') logger.info("ncs: %s " % ncs) coords = self.getInputValues(identifier='coords') logger.info("coords %s", coords) filenames = [] nc_exp = sort_by_filename(ncs, historical_concatination=True) #(fp_tar, tarout_file) = tempfile.mkstemp(dir=".", suffix='.tar') #tar = tarfile.open(tarout_file, "w") from numpy import savetxt, column_stack from shapely.geometry import Point for key in nc_exp.keys(): try: logger.info('start calculation for %s ' % key ) ncs = nc_exp[key] times = get_time(ncs) concat_vals = ['%s-%02d-%02d_%02d:%02d:%02d' % (t.year, t.month, t.day, t.hour, t.minute, t.second) for t in times] header = 'date_time' filename = '%s.csv' % key filenames.append(filename) for p in coords: try: self.status.set('processing point : {0}'.format(p), 20) # define the point: p = p.split(',') point = Point(float(p[0]), float(p[1])) # get the values timeseries = call(resource=ncs, geom=point, select_nearest=True) vals = get_values(timeseries) # concatination of values header = header + ',%s-%s' % (p[0], p[1]) concat_vals = column_stack([concat_vals, vals]) except Exception as e: logger.debug('failed for point %s %s' % (p , e)) self.status.set('*** all points processed for {0} ****'.format(key), 50) savetxt(filename, concat_vals, fmt='%s', delimiter=',', header=header) except Exception as e: logger.debug('failed for %s %s' % (key, e)) ### set the outputs self.status.set('*** creating output tar archive ****',90) tarout_file = archive(filenames) self.tarout.setValue( tarout_file )
def execute(self): from flyingpigeon.ocgis_module import call from flyingpigeon.utils import get_time, get_variable, sort_by_filename from datetime import datetime as dt from netCDF4 import Dataset from numpy import savetxt, column_stack, squeeze ncs = self.getInputValues(identifier='netcdf_file') logging.info("ncs: %s " % ncs) coords = self.getInputValues(identifier='coords') logging.info("coords %s", coords) nc_exp = sort_by_filename(ncs) # dictionary {experiment:[files]} filenames = [] (fp_tar, tarout_file) = tempfile.mkstemp(dir=".", suffix='.tar') tar = tarfile.open(tarout_file, "w") for key in nc_exp.keys(): logging.info('start calculation for %s ' % key ) ncs = nc_exp[key] nc = ncs[0] times = get_time(nc) var = get_variable(nc) concat_vals = [dt.strftime(t, format='%Y-%d-%m_%H:%M:%S') for t in times] header = 'date_time' filename = '%s.csv' % key filenames.append(filename) for ugid, p in enumerate(coords, start=1): self.status.set('processing point : {0}'.format(p), 20) p = p.split(',') self.status.set('splited x and y coord : {0}'.format(p), 20) point = Point(float(p[0]), float(p[1])) #get the timeseries at gridpoint timeseries = call(resource=ncs, geom=point, select_nearest=True) ds = Dataset(timeseries) vals = squeeze(ds.variables[var]) header = header + ',%s_%s' % (p[0], p[1]) concat_vals = column_stack([concat_vals, vals]) savetxt(filename, concat_vals, fmt='%s', delimiter=',', header=header) tar.add( filename ) tar.close() self.tarout.setValue( tarout_file )
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' ncs = archiveextract( resource=rename_complexinputs(request.inputs['resource'])) LOGGER.info("ncs: %s " % ncs) coords = request.inputs['coords'] # self.getInputValues(identifier='coords') LOGGER.info("coords %s", coords) filenames = [] nc_exp = sort_by_filename(ncs, historical_concatination=True) for key in nc_exp.keys(): try: LOGGER.info('start calculation for %s ' % key) ncs = nc_exp[key] times = get_time(ncs, format='%Y-%m-%d_%H:%M:%S') concat_vals = times # ['%s-%02d-%02d_%02d:%02d:%02d' % # (t.year, t.month, t.day, t.hour, t.minute, t.second) for t in times] header = 'date_time' filename = '%s.csv' % key filenames.append(filename) for p in coords: try: response.update_status('processing point : {0}'.format(p), 20) # define the point: p = p.split(',') point = Point(float(p[0]), float(p[1])) # get the values timeseries = call(resource=ncs, geom=point, select_nearest=True) vals = get_values(timeseries) # concatenation of values header = header + ',%s-%s' % (p[0], p[1]) concat_vals = column_stack([concat_vals, vals]) except Exception as e: LOGGER.debug('failed for point %s %s' % (p, e)) response.update_status('*** all points processed for {0} ****'.format(key), 50) savetxt(filename, concat_vals, fmt='%s', delimiter=',', header=header) except Exception as e: LOGGER.debug('failed for %s %s' % (key, e)) # set the outputs response.update_status('*** creating output tar archive ****', 90) tarout_file = archive(filenames) response.outputs['tarout'].file = tarout_file return response
def ocgis_call_wrap(tmargs): _z = tmargs[0] _origvar = tmargs[1] _level = tmargs[2] _bbox = tmargs[3] _plev = [int(_level), int(_level)] _pref = 'levdom_' + basename(_z)[0:-3] _tmpf = call(resource=_z, variable=_origvar, level_range=_plev, geom=_bbox, spatial_wrapping='wrap', prefix=_pref) return _tmpf
def aggregatTime(resource=[], variable=None, frequency=None, prefix=None, grouping='mon', calculation='mean', historical_concatination=True): """ Aggregates over the time axis. :param resource: input netCDF files :param variable: variable to be used from resource :param frequency: time frequency in resource :param grouping: time aggregation for output :param prefix: file name prefix :param calculation: calculation methode (default = mean ) :param historical_concatination: if rcps and appropriate historical runs are present thy are concatinated :return: path to netCDF file """ try: ncs = sort_by_filename(resource, historical_concatination=historical_concatination) group = calc_grouping(grouping=grouping) except Exception as e: logger.exception('failed to determine ncs or calc_grouping') raise if len(ncs.keys())!= 1: logger.exception('None or more than one data experiments found in resource') raise Exception('None or more than one data experiments found in resource') for key in ncs.keys()[0:1]: try: if frequency == None: frequency = get_frequency(ncs[key][0]) if variable == None: variable = get_variable(ncs[key][0]) meta_attrs = { 'field': {'frequency': grouping}}# 'variable': {'new_attribute': 5, 'hello': 'attribute'}, calc = [{'func' : calculation , 'name' : variable, 'meta_attrs': meta_attrs}] logger.info('calculation: %s ' % (calc)) if prefix == None: prefix = key.replace(frequency,grouping) logger.info('prefix: %s ' % (prefix)) output = call(resource=ncs[key], variable=None, calc=calc, calc_grouping=group, prefix=prefix ) logger.info('time aggregation done for %s '% (key)) except Exception as e: logger.exception('time aggregation failed for %s' % key) raise return output # key # output
def get_season(nc_file, season='DJF'): """ extacting of selected months :param nc_file: input netCDF :param season: month to be extracted (default = 'DJF') :returns str: netCDF with time subset """ try: time_region = _TIMEREGIONS_[season] nc_season = call(nc_file, time_region=time_region) logger.info('season selection done %s ' % nc_season) except Exception as e: msg = 'failed to select season %s' % e logger.error(msg) raise Exception(msg) return nc_season
def get_season(nc_file, season='DJF'): """ extacting of selected months :param nc_file: input netCDF :param season: month to be extracted (default = 'DJF') :returns str: netCDF with time subset """ try: time_region = _TIMEREGIONS_[season] LOGGER.info('time_region: %s ' % time_region) nc_season = call(nc_file, time_region=time_region) LOGGER.info('season selection done %s ' % nc_season) except: msg = 'failed to select season, input file is passed ' LOGGER.exception(msg) nc_season = nc_file return nc_season
def get_segetalflora(resource=[], culture_type='fallow', climate_type=3): """ calulation of segetalflora species numbers based on yearly mean temperature :param resource: list of netCDF yearly mean temperature (tas) files. :param culture_type: Type of culture. Possible values are: 'fallow', 'intensive', 'extensive' (default:'fallow') :param climate_type: Type of climate: number 1 to 7 or 'all' (default: 2) :return list: list of result segeltaflora files """ from flyingpigeon.ocgis_module import call from os import path if not type(culture_type) == list: culture_type = list([culture_type]) if not type(climate_type) == list: climate_type = list([climate_type]) outputs = [] for name in resource: for cult in culture_type: for climat in climate_type: try: calc = get_equation(culture_type=cult, climate_type=climat) if type(calc) != None: var = 'sf%s%s' % (cult, climat) prefix = path.basename(name).replace('tas', var).strip('.nc') outputs.append( call(resource=name, calc=calc, prefix=prefix)) logger.info('segetalflora done for %s' % (prefix)) else: logger.debug('NO EQUATION found for %s %s ' % (cult, climat)) except Exception as e: logger.debug('Segetal flora failed: %s' % (e)) return outputs
def get_season(nc_file, season='DJF'): """ extacting of selected months :param nc_file: input netCDF :param season: month to be extrected default='DJF' :returns str: netCDF with time subset """ try: if season == 'JJA': time_region = {'month':[6,7,8]} elif season == 'SON': time_region = {'month':[9,10,11]} elif season == 'DJF': time_region = {'month':[12,1,2]} elif season == 'FAM': time_region = {'month':[2,3,4]} elif season == 'MAM': time_region = {'month':[3,4,5]} elif season == 'JJAS': time_region = {'month':[6,7,8,9]} elif season == 'DJFM': time_region = {'month':[12,1,2,3]} elif season == 'MAMJ': time_region = {'month':[3,4,5,6]} elif season == 'SOND': time_region = {'month':[9,10,11,12]} elif season == 'SONDJF': time_region = {'month':[9,10,11,12,1,2]} elif season == 'MAMJJA': time_region = {'month':[3,4,5,6,7,8]} elif season == 'all': time_region = None else: logger.error('sesason %s not found' % season ) nc_season = call(nc_file, time_region=time_region) logger.info('seson exction done %s ' % nc_season) except Exception as e: msg = 'failed extract season %s ' % e logger.error(msg) raise Exception(msg) return nc_season
def get_anomalies(nc_file, frac=0.2, reference=None): ''' anomalisation of data subsets for weather classification. Anomalisation is done by substrcting a smoothed anual cycle :parm nc_file: input netCDF file :param frac: Number between 0-1 for stregth of smoothing (0 = close to the original data, 1=flat line) default=0.2 :param reference: Period to calulate anual cycle :return string: path to output netCDF file ''' try: variable = utils.get_variable(nc_file) calc = [{'func': 'mean', 'name': variable}] calc_grouping = calc_grouping = ['day','year'] nc_anual_cycle = call(nc_file, calc=calc, calc_grouping=calc_grouping, time_range=reference) logger.info('anual cycle calculated') except Exception as e: msg = 'failed to calcualte anual cycle %s' % e logger.error(msg) raise Exception(msg) ### spline for smoothing import statsmodels.api as sm from numpy import tile, empty, linspace from netCDF4 import Dataset from cdo import Cdo cdo = Cdo() try: # variable = utils.get_variable(nc_file) ds = Dataset(nc_anual_cycle, mode='a') vals = ds.variables[variable] vals_sm = empty(vals.shape) ts = vals.shape[0] x = linspace(1, ts*3 , num=ts*3 , endpoint=True) for lat in range(vals.shape[1]): for lon in range(vals.shape[2]): try: y = tile(vals[:,lat,lon], 3) # ys = smooth(y, window_size=91, order=2, deriv=0, rate=1)[ts:ts*2] ys = sm.nonparametric.lowess(y, x, frac=frac )[ts:ts*2,1] vals_sm[:,lat,lon] = ys except Exception as e: msg = 'failed for lat %s lon %s %s ' % (lat,lon,e) logger.debug('failed for lat %s lon %s %s ' % (lat,lon,e)) raise Exception(msg) print 'done for %s - %s ' % (lat, lon) vals[:,:,:] = vals_sm[:,:,:] ds.close() logger.info('smothing of anual cycle done') except Exception as e: msg = 'failed smothing of anual cycle %s ' % e logger.error(msg) raise Exception(msg) try: ip , nc_anomal = mkstemp(dir='.',suffix='.nc') nc_anomal = cdo.sub(input=[nc_file, nc_anual_cycle], output= nc_anomal ) logger.info('anomalisation done: %s ' % nc_anomal) except Exception as e: msg = 'failed substraction of anual cycle %s ' % e logger.error(msg) raise Exception(msg) return nc_anomal
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' process_start_time = time.time() # measure process execution time ... response.update_status('execution started at : %s ' % dt.now(), 5) start_time = time.time() # measure init ... resource = archiveextract( resource=rename_complexinputs(request.inputs['resource'])) refSt = request.inputs['refSt'][0].data refEn = request.inputs['refEn'][0].data dateSt = request.inputs['dateSt'][0].data dateEn = request.inputs['dateEn'][0].data regrset = request.inputs['regrset'][0].data # fix 31 December issue # refSt = dt.combine(refSt,dt_time(12,0)) # refEn = dt.combine(refEn,dt_time(12,0)) # dateSt = dt.combine(dateSt,dt_time(12,0)) # dateEn = dt.combine(dateEn,dt_time(12,0)) seasonwin = request.inputs['seasonwin'][0].data nanalog = request.inputs['nanalog'][0].data # bbox = [-80, 20, 50, 70] # TODO: Add checking for wrong cordinates and apply default if nesessary bbox = [] bboxStr = request.inputs['BBox'][0].data bboxStr = bboxStr.split(',') bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) direction = request.inputs['direction'][0].data normalize = request.inputs['normalize'][0].data distance = request.inputs['dist'][0].data outformat = request.inputs['outformat'][0].data timewin = request.inputs['timewin'][0].data model_var = request.inputs['reanalyses'][0].data model, var = model_var.split('_') try: if direction == 're2mo': anaSt = dt.combine(dateSt, dt_time( 0, 0)) #dt.strptime(dateSt[0], '%Y-%m-%d') anaEn = dt.combine(dateEn, dt_time( 0, 0)) #dt.strptime(dateEn[0], '%Y-%m-%d') refSt = dt.combine(refSt, dt_time( 12, 0)) #dt.strptime(refSt[0], '%Y-%m-%d') refEn = dt.combine(refEn, dt_time( 12, 0)) #dt.strptime(refEn[0], '%Y-%m-%d') r_time_range = [anaSt, anaEn] m_time_range = [refSt, refEn] elif direction == 'mo2re': anaSt = dt.combine(dateSt, dt_time( 12, 0)) #dt.strptime(refSt[0], '%Y-%m-%d') anaEn = dt.combine(dateEn, dt_time( 12, 0)) #dt.strptime(refEn[0], '%Y-%m-%d') refSt = dt.combine(refSt, dt_time( 0, 0)) #dt.strptime(dateSt[0], '%Y-%m-%d') refEn = dt.combine(refEn, dt_time( 0, 0)) #dt.strptime(dateEn[0], '%Y-%m-%d') r_time_range = [refSt, refEn] m_time_range = [anaSt, anaEn] else: LOGGER.exception( 'failed to find time periods for comparison direction') except: msg = 'failed to put simulation and reference time in order' LOGGER.exception(msg) raise Exception(msg) if normalize == 'None': seacyc = False else: seacyc = True if outformat == 'ascii': outformat = '.txt' elif outformat == 'netCDF': outformat = '.nc' else: LOGGER.exception('output format not valid') try: if model == 'NCEP': getlevel = True if 'z' in var: level = var.strip('z') variable = 'hgt' # conform_units_to='hPa' else: variable = 'slp' level = None # conform_units_to='hPa' elif '20CRV2' in model: getlevel = False if 'z' in var: variable = 'hgt' level = var.strip('z') # conform_units_to=None else: variable = 'prmsl' level = None # conform_units_to='hPa' else: LOGGER.exception('Reanalyses model not known') LOGGER.info('environment set') except: msg = 'failed to set environment' LOGGER.exception(msg) raise Exception(msg) # LOGGER.exception("init took %s seconds.", time.time() - start_time) response.update_status('Read in the arguments', 6) ################# # get input data ################# # TODO: do not forget to select years start_time = time.time() # measure get_input_data ... response.update_status('fetching input data', 7) try: if direction == 're2mo': nc_reanalyses = reanalyses(start=anaSt.year, end=anaEn.year, variable=var, dataset=model, getlevel=getlevel) else: nc_reanalyses = reanalyses(start=refSt.year, end=refEn.year, variable=var, dataset=model, getlevel=getlevel) if type(nc_reanalyses) == list: nc_reanalyses = sorted( nc_reanalyses, key=lambda i: path.splitext(path.basename(i))[0]) else: nc_reanalyses = [nc_reanalyses] # For 20CRV2 geopotential height, daily dataset for 100 years is about 50 Gb # So it makes sense, to operate it step-by-step # TODO: need to create dictionary for such datasets (for models as well) # TODO: benchmark the method bellow for NCEP z500 for 60 years, may be use the same (!) # TODO Now everything regrid to the reanalysis if ('20CRV2' in model) and ('z' in var): tmp_total = [] origvar = get_variable(nc_reanalyses) for z in nc_reanalyses: tmp_n = 'tmp_%s' % (uuid.uuid1()) b0 = call(resource=z, variable=origvar, level_range=[int(level), int(level)], geom=bbox, spatial_wrapping='wrap', prefix='levdom_' + path.basename(z)[0:-3]) tmp_total.append(b0) tmp_total = sorted( tmp_total, key=lambda i: path.splitext(path.basename(i))[0]) inter_subset_tmp = call(resource=tmp_total, variable=origvar, time_range=r_time_range) # Clean for i in tmp_total: tbr = 'rm -f %s' % (i) #system(tbr) # Create new variable ds = Dataset(inter_subset_tmp, mode='a') z_var = ds.variables.pop(origvar) dims = z_var.dimensions new_var = ds.createVariable('z%s' % level, z_var.dtype, dimensions=(dims[0], dims[2], dims[3])) new_var[:, :, :] = squeeze(z_var[:, 0, :, :]) # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()}) ds.close() nc_subset = call(inter_subset_tmp, variable='z%s' % level) else: nc_subset = call( resource=nc_reanalyses, variable=var, geom=bbox, spatial_wrapping='wrap', time_range=r_time_range, # conform_units_to=conform_units_to ) # nc_subset = call(resource=nc_reanalyses, variable=var, geom=bbox, spatial_wrapping='wrap') # XXXXXX wrap # LOGGER.exception("get_input_subset_model took %s seconds.", time.time() - start_time) response.update_status('**** Input reanalyses data fetched', 10) except: msg = 'failed to fetch or subset input files' LOGGER.exception(msg) raise Exception(msg) ######################## # input data preperation ######################## response.update_status('Start preparing input data', 12) # Filter resource: if type(resource) == list: resource = sorted(resource, key=lambda i: path.splitext(path.basename(i))[0]) else: resource = [resource] tmp_resource = [] m_start = m_time_range[0] m_end = m_time_range[1] for re in resource: s, e = get_timerange(re) tmpSt = dt.strptime(s, '%Y%m%d') tmpEn = dt.strptime(e, '%Y%m%d') if ((tmpSt <= m_end) and (tmpEn >= m_start)): tmp_resource.append(re) LOGGER.debug('Selected file: %s ' % (re)) resource = tmp_resource start_time = time.time() # mesure data preperation ... # TODO: Check the callendars ! for model vs reanalyses. # TODO: Check the units! model vs reanalyses. try: m_total = [] modvar = get_variable(resource) # resource properties ds = Dataset(resource[0]) m_var = ds.variables[modvar] dims = list(m_var.dimensions) dimlen = len(dims) try: model_id = ds.getncattr('model_id') except AttributeError: model_id = 'Unknown model' LOGGER.debug('MODEL: %s ' % (model_id)) lev_units = 'hPa' if (dimlen > 3): lev = ds.variables[dims[1]] # actually index [1] need to be detected... assuming zg(time, plev, lat, lon) lev_units = lev.units if (lev_units == 'Pa'): m_level = str(int(level) * 100) else: m_level = level else: m_level = None if level == None: level_range = None else: level_range = [int(m_level), int(m_level)] for z in resource: tmp_n = 'tmp_%s' % (uuid.uuid1()) # select level and regrid b0 = call( resource=z, variable=modvar, level_range=level_range, spatial_wrapping='wrap', #cdover='system', regrid_destination=nc_reanalyses[0], regrid_options='bil', prefix=tmp_n) # select domain b01 = call(resource=b0, geom=bbox, spatial_wrapping='wrap', prefix='levregr_' + path.basename(z)[0:-3]) tbr = 'rm -f %s' % (b0) #system(tbr) tbr = 'rm -f %s' % (tmp_n) #system(tbr) # get full resource m_total.append(b01) ds.close() model_subset = call(m_total, time_range=m_time_range) for i in m_total: tbr = 'rm -f %s' % (i) #system(tbr) if m_level is not None: # Create new variable in model set ds = Dataset(model_subset, mode='a') mod_var = ds.variables.pop(modvar) dims = mod_var.dimensions new_modvar = ds.createVariable('z%s' % level, mod_var.dtype, dimensions=(dims[0], dims[2], dims[3])) new_modvar[:, :, :] = squeeze(mod_var[:, 0, :, :]) # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()}) ds.close() mod_subset = call(model_subset, variable='z%s' % level) else: mod_subset = model_subset # if direction == 're2mo': # try: # response.update_status('Preparing simulation data', 15) # reanalyses_subset = call(resource=nc_subset, time_range=[anaSt, anaEn]) # except: # msg = 'failed to prepare simulation period' # LOGGER.exception(msg) # try: # response.update_status('Preparing target data', 17) # var_target = get_variable(resource) # # var_simulation = get_variable(simulation) # model_subset_tmp = call(resource=resource, variable=var_target, # time_range=[refSt, refEn], # t_calendar='standard', # spatial_wrapping='wrap', # regrid_destination=nc_reanalyses[0], # regrid_options='bil') # # model_subset = call(resource=resource, variable=var_target, # # time_range=[refSt, refEn], # # geom=bbox, # # t_calendar='standard', # # # conform_units_to=conform_units_to, # # spatial_wrapping='wrap', # # regrid_destination=reanalyses_subset, # # regrid_options='bil') # XXXXXXXXXXXX ADD WRAP rem calendar # model_subset = call(resource=model_subset_tmp,variable=var_target, geom=bbox, spatial_wrapping='wrap', t_calendar='standard') # # ISSUE: the regrided model has white border with null! Check it. # # check t_calendar! # except: # msg = 'failed subset archive model' # LOGGER.exception(msg) # raise Exception(msg) # else: # try: # response.update_status('Preparing target data', 15) # var_target = get_variable(resource) # # var_simulation = get_variable(simulation) # model_subset = call(resource=resource, variable=var_target, # time_range=[refSt, refEn], # geom=bbox, # t_calendar='standard', # # conform_units_to=conform_units_to, # # spatial_wrapping='wrap', # ) # except: # msg = 'failed subset archive model' # LOGGER.exception(msg) # raise Exception(msg) # try: # response.update_status('Preparing simulation data', 17) # reanalyses_subset = call(resource=nc_subset, # time_range=[anaSt, anaEn], # regrid_destination=model_subset, # regrid_options='bil') # except: # msg = 'failed to prepare simulation period' # LOGGER.exception(msg) except: msg = 'failed to subset simulation or reference data' LOGGER.exception(msg) raise Exception(msg) # -------------------------------------------- try: if direction == 'mo2re': simulation = mod_subset archive = nc_subset base_id = model sim_id = model_id elif direction == 're2mo': simulation = nc_subset archive = mod_subset base_id = model_id sim_id = model else: LOGGER.exception('direction not valid: %s ' % direction) except: msg = 'failed to find comparison direction' LOGGER.exception(msg) raise Exception(msg) try: if level is not None: out_var = 'z%s' % level else: var_archive = get_variable(archive) var_simulation = get_variable(simulation) if var_archive != var_simulation: rename_variable(archive, oldname=var_archive, newname=var_simulation) out_var = var_simulation LOGGER.info('varname %s in netCDF renamed to %s' % (var_archive, var_simulation)) except: msg = 'failed to rename variable in target files' LOGGER.exception(msg) raise Exception(msg) try: if seacyc is True: seasoncyc_base, seasoncyc_sim = analogs.seacyc( archive, simulation, method=normalize) else: seasoncyc_base = None seasoncyc_sim = None except: msg = 'failed to prepare seasonal cycle reference files' LOGGER.exception(msg) raise Exception(msg) ip, output = mkstemp(dir='.', suffix='.txt') output_file = path.abspath(output) files = [path.abspath(archive), path.abspath(simulation), output_file] # LOGGER.exception("data preperation took %s seconds.", time.time() - start_time) ############################ # generating the config file ############################ response.update_status('writing config file', 18) start_time = time.time() # measure write config ... try: config_file = analogs.get_configfile( files=files, seasoncyc_base=seasoncyc_base, seasoncyc_sim=seasoncyc_sim, base_id=base_id, sim_id=sim_id, timewin=timewin, varname=var, seacyc=seacyc, cycsmooth=91, nanalog=nanalog, seasonwin=seasonwin, distfun=distance, outformat=outformat, calccor=True, silent=False, period=[ dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d') ], bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3])) except: msg = 'failed to generate config file' LOGGER.exception(msg) raise Exception(msg) # LOGGER.exception("write_config took %s seconds.", time.time() - start_time) ####################### # CASTf90 call ####################### import subprocess import shlex start_time = time.time() # measure call castf90 response.update_status('Start CASTf90 call', 20) try: # response.update_status('execution of CASTf90', 50) cmd = 'analogue.out %s' % path.relpath(config_file) # system(cmd) args = shlex.split(cmd) output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('analogue.out info:\n %s ' % output) LOGGER.exception('analogue.out errors:\n %s ' % error) response.update_status('**** CASTf90 suceeded', 90) except: msg = 'CASTf90 failed' LOGGER.exception(msg) raise Exception(msg) LOGGER.debug("castf90 took %s seconds.", time.time() - start_time) response.update_status('preparting output', 91) # Stopper to keep twitcher results, for debug # dummy=dummy response.outputs[ 'config'].file = config_file #config_output_url # config_file ) response.outputs['analogs'].file = output_file response.outputs['output_netcdf'].file = simulation response.outputs['target_netcdf'].file = archive ######################## # generate analog viewer ######################## formated_analogs_file = analogs.reformat_analogs(output_file) # response.outputs['formated_analogs'].storage = FileStorage() response.outputs['formated_analogs'].file = formated_analogs_file LOGGER.info('analogs reformated') response.update_status('reformatted analog file', 95) viewer_html = analogs.render_viewer( # configfile=response.outputs['config'].get_url(), configfile=config_file, # datafile=response.outputs['formated_analogs'].get_url()) datafile=formated_analogs_file) response.outputs['output'].file = viewer_html response.update_status('Successfully generated analogs viewer', 99) LOGGER.info('rendered pages: %s ', viewer_html) response.update_status('execution ended', 100) LOGGER.debug("total execution took %s seconds.", time.time() - process_start_time) return response
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') response.update_status('execution started at : {}'.format(dt.now()), 5) process_start_time = time.time() # measure process execution time ... start_time = time.time() # measure init ... ################################ # reading in the input arguments ################################ try: response.update_status('read input parameter : %s ' % dt.now(), 7) refSt = request.inputs['refSt'][0].data refEn = request.inputs['refEn'][0].data dateSt = request.inputs['dateSt'][0].data dateEn = request.inputs['dateEn'][0].data seasonwin = request.inputs['seasonwin'][0].data nanalog = request.inputs['nanalog'][0].data timres = request.inputs['timeres'][0].data # bbox = [-80, 20, 50, 70] # TODO: Add checking for wrong cordinates and apply default if nesessary bbox = [] bboxStr = request.inputs['BBox'][0].data bboxStr = bboxStr.split(',') bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) LOGGER.debug('BBOX for ocgis: %s ' % (bbox)) LOGGER.debug('BBOX original: %s ' % (bboxStr)) normalize = request.inputs['normalize'][0].data detrend = request.inputs['detrend'][0].data distance = request.inputs['dist'][0].data outformat = request.inputs['outformat'][0].data timewin = request.inputs['timewin'][0].data model_var = request.inputs['reanalyses'][0].data model, var = model_var.split('_') # experiment = self.getInputValues(identifier='experiment')[0] # dataset, var = experiment.split('_') # LOGGER.info('environment set') LOGGER.info('input parameters set') response.update_status('Read in and convert the arguments', 8) except Exception as e: msg = 'failed to read input prameter %s ' % e LOGGER.exception(msg) raise Exception(msg) ###################################### # convert types and set environment ###################################### try: response.update_status('Preparing enviroment converting arguments', 9) LOGGER.debug('date: %s %s %s %s ' % (type(refSt), refEn, dateSt, dateSt)) start = min(refSt, dateSt) end = max(refEn, dateEn) # # refSt = dt.strftime(refSt, '%Y-%m-%d') # refEn = dt.strftime(refEn, '%Y-%m-%d') # dateSt = dt.strftime(dateSt, '%Y-%m-%d') # dateEn = dt.strftime(dateEn, '%Y-%m-%d') if normalize == 'None': seacyc = False else: seacyc = True if outformat == 'ascii': outformat = '.txt' elif outformat == 'netCDF': outformat = '.nc' else: LOGGER.exception('output format not valid') except Exception as e: msg = 'failed to set environment %s ' % e LOGGER.exception(msg) raise Exception(msg) ########################### # set the environment ########################### response.update_status('fetching data from archive', 10) try: if model == 'NCEP': getlevel = False if 'z' in var: level = var.strip('z') conform_units_to = None else: level = None conform_units_to = 'hPa' elif '20CRV2' in model: getlevel = False if 'z' in var: level = var.strip('z') conform_units_to = None else: level = None conform_units_to = 'hPa' else: LOGGER.exception('Reanalyses dataset not known') LOGGER.info('environment set for model: %s' % model) except Exception: msg = 'failed to set environment' LOGGER.exception(msg) raise Exception(msg) ########################################## # fetch Data from original data archive ########################################## # NOTE: If ref is say 1950 - 1990, and sim is just 1 week in 2017 - ALL the data will be downloaded, 1950 - 2017 try: model_nc = rl(start=start.year, end=end.year, dataset=model, variable=var, timres=timres, getlevel=getlevel) LOGGER.info('reanalyses data fetched') except Exception: msg = 'failed to get reanalyses data' LOGGER.exception(msg) raise Exception(msg) response.update_status('subsetting region of interest', 17) # from flyingpigeon.weatherregimes import get_level LOGGER.debug("start and end time: %s - %s" % (start, end)) time_range = [start, end] # For 20CRV2 geopotential height, daily dataset for 100 years is about 50 Gb # So it makes sense, to operate it step-by-step # TODO: need to create dictionary for such datasets (for models as well) # TODO: benchmark the method bellow for NCEP z500 for 60 years # if ('20CRV2' in model) and ('z' in var): if ('z' in var): tmp_total = [] origvar = get_variable(model_nc) for z in model_nc: tmp_n = 'tmp_%s' % (uuid.uuid1()) b0 = call(resource=z, variable=origvar, level_range=[int(level), int(level)], geom=bbox, spatial_wrapping='wrap', prefix='levdom_' + os.path.basename(z)[0:-3]) tmp_total.append(b0) tmp_total = sorted( tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0]) inter_subset_tmp = call(resource=tmp_total, variable=origvar, time_range=time_range) # Clean for i in tmp_total: tbr = 'rm -f %s' % (i) os.system(tbr) # Create new variable ds = Dataset(inter_subset_tmp, mode='a') z_var = ds.variables.pop(origvar) dims = z_var.dimensions new_var = ds.createVariable('z%s' % level, z_var.dtype, dimensions=(dims[0], dims[2], dims[3])) new_var[:, :, :] = squeeze(z_var[:, 0, :, :]) # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()}) ds.close() model_subset_tmp = call(inter_subset_tmp, variable='z%s' % level) else: model_subset_tmp = call( resource=model_nc, variable=var, geom=bbox, spatial_wrapping='wrap', time_range=time_range, # conform_units_to=conform_units_to ) # If dataset is 20CRV2 the 6 hourly file should be converted to daily. # Option to use previously 6h data from cache (if any) and not download daily files. if '20CRV2' in model: if timres == '6h': from cdo import Cdo cdo = Cdo() model_subset = '%s.nc' % uuid.uuid1() tmp_f = '%s.nc' % uuid.uuid1() cdo_op = getattr(cdo, 'daymean') cdo_op(input=model_subset_tmp, output=tmp_f) sti = '00:00:00' cdo_op = getattr(cdo, 'settime') cdo_op(sti, input=tmp_f, output=model_subset) LOGGER.debug('File Converted from: %s to daily' % (timres)) else: model_subset = model_subset_tmp else: model_subset = model_subset_tmp LOGGER.info('Dataset subset done: %s ', model_subset) response.update_status('dataset subsetted', 19) # BLOCK OF DETRENDING of model_subset ! # Original model subset kept to further visualisaion if needed # Now is issue with SLP: # TODO 1 Keep trend as separate file # TODO 2 Think how to add options to plot abomalies AND original data... # May be do archive and simulation = call.. over NOT detrended data and keep it as well # TODO 3 Check with faster smoother add removing trend of each grid if detrend == 'None': orig_model_subset = model_subset else: orig_model_subset = remove_mean_trend(model_subset, varname=var) # ====================================== ############################################################ # get the required bbox and time region from resource data ############################################################ # # # try: # if dataset == 'NCEP': # if 'z' in var: # variable = 'hgt' # level = var.strip('z') # # conform_units_to=None # else: # variable = 'slp' # level = None # # conform_units_to='hPa' # elif '20CRV2' in var: # if 'z' in level: # variable = 'hgt' # level = var.strip('z') # # conform_units_to=None # else: # variable = 'prmsl' # level = None # # conform_units_to='hPa' # else: # LOGGER.exception('Reanalyses dataset not known') # LOGGER.info('environment set') # except Exception as e: # msg = 'failed to set environment %s ' % e # LOGGER.exception(msg) # # raise Exception(msg) # # LOGGER.debug("init took %s seconds.", time.time() - start_time) # response.update_status('Read in and convert the arguments done', 8) # # ################# # # get input data # ################# # start_time = time.time() # measure get_input_data ... # response.update_status('fetching input data', 7) # try: # input = reanalyses(start=start.year, end=end.year, # variable=var, dataset=dataset) # LOGGER.info('input files %s' % input) # nc_subset = call(resource=input, variable=var, # geom=bbox, spatial_wrapping='wrap') # except Exception as e: # msg = 'failed to fetch or subset input files %s' % e # LOGGER.exception(msg) # # raise Exception(msg) LOGGER.debug("get_input_subset_dataset took %s seconds.", time.time() - start_time) response.update_status('**** Input data fetched', 20) ######################## # input data preperation ######################## response.update_status('Start preparing input data', 22) start_time = time.time() # measure data preperation ... try: # Construct descriptive filenames for the three files # listed in config file # TODO check strftime for years <1900 (!) refDatesString = dt.strftime( refSt, '%Y-%m-%d') + "_" + dt.strftime(refEn, '%Y-%m-%d') simDatesString = dt.strftime( dateSt, '%Y-%m-%d') + "_" + dt.strftime(dateEn, '%Y-%m-%d') archiveNameString = "base_" + var + "_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' \ % (bbox[0], bbox[2], bbox[1], bbox[3]) simNameString = "sim_" + var + "_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' \ % (bbox[0], bbox[2], bbox[1], bbox[3]) archive = call(resource=model_subset, time_range=[refSt, refEn], prefix=archiveNameString) simulation = call(resource=model_subset, time_range=[dateSt, dateEn], prefix=simNameString) LOGGER.info('archive and simulation files generated: %s, %s' % (archive, simulation)) except Exception as e: msg = 'failed to prepare archive and simulation files %s ' % e LOGGER.exception(msg) raise Exception(msg) try: if seacyc is True: LOGGER.info('normalization function with method: %s ' % normalize) seasoncyc_base, seasoncyc_sim = analogs.seacyc( archive, simulation, method=normalize) else: seasoncyc_base = seasoncyc_sim = None except Exception as e: msg = 'failed to generate normalization files %s ' % e LOGGER.exception(msg) raise Exception(msg) output_file = 'output.txt' files = [ os.path.abspath(archive), os.path.abspath(simulation), output_file ] LOGGER.debug("Data preperation took %s seconds.", time.time() - start_time) ############################ # generate the config file ############################ config_file = analogs.get_configfile( files=files, seasoncyc_base=seasoncyc_base, seasoncyc_sim=seasoncyc_sim, base_id=model, sim_id=model, timewin=timewin, varname=var, seacyc=seacyc, cycsmooth=91, nanalog=nanalog, seasonwin=seasonwin, distfun=distance, outformat=outformat, calccor=True, silent=False, period=[ dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d') ], bbox="{0[0]},{0[2]},{0[1]},{0[3]}".format(bbox)) response.update_status('generated config file', 25) ####################### # CASTf90 call ####################### start_time = time.time() # measure call castf90 # ----------------------- try: import ctypes # TODO: This lib is for linux mkl_rt = ctypes.CDLL('libmkl_rt.so') nth = mkl_rt.mkl_get_max_threads() LOGGER.debug('Current number of threads: %s' % (nth)) mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64))) nth = mkl_rt.mkl_get_max_threads() LOGGER.debug('NEW number of threads: %s' % (nth)) # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?) os.environ['MKL_NUM_THREADS'] = str(nth) os.environ['OMP_NUM_THREADS'] = str(nth) except Exception as e: msg = 'Failed to set THREADS %s ' % e LOGGER.debug(msg) # ----------------------- response.update_status('Start CASTf90 call', 30) try: # response.update_status('execution of CASTf90', 50) cmd = ['analogue.out', config_file] LOGGER.debug("castf90 command: %s", cmd) output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) LOGGER.info('analogue output:\n %s', output) response.update_status('**** CASTf90 suceeded', 70) except CalledProcessError as e: msg = 'CASTf90 failed:\n{0}'.format(e.output) LOGGER.exception(msg) raise Exception(msg) LOGGER.debug("castf90 took %s seconds.", time.time() - start_time) # TODO: Add try - except for pdfs analogs_pdf = analogs.plot_analogs(configfile=config_file) response.update_status('preparing output', 75) # response.outputs['config'].storage = FileStorage() response.outputs['analog_pdf'].file = analogs_pdf response.outputs['config'].file = config_file response.outputs['analogs'].file = output_file response.outputs['output_netcdf'].file = simulation response.outputs['target_netcdf'].file = archive if seacyc is True: response.outputs['base_netcdf'].file = seasoncyc_base response.outputs['sim_netcdf'].file = seasoncyc_sim else: # TODO: Still unclear how to overpass unknown number of outputs dummy_base = 'dummy_base.nc' dummy_sim = 'dummy_sim.nc' with open(dummy_base, 'a'): os.utime(dummy_base, None) with open(dummy_sim, 'a'): os.utime(dummy_sim, None) response.outputs['base_netcdf'].file = dummy_base response.outputs['sim_netcdf'].file = dummy_sim ######################## # generate analog viewer ######################## formated_analogs_file = analogs.reformat_analogs(output_file) # response.outputs['formated_analogs'].storage = FileStorage() response.outputs['formated_analogs'].file = formated_analogs_file LOGGER.info('analogs reformated') response.update_status('reformatted analog file', 80) viewer_html = analogs.render_viewer( # configfile=response.outputs['config'].get_url(), configfile=config_file, # datafile=response.outputs['formated_analogs'].get_url()) datafile=formated_analogs_file) response.outputs['output'].file = viewer_html response.update_status('Successfully generated analogs viewer', 90) LOGGER.info('rendered pages: %s ', viewer_html) response.update_status('execution ended', 100) LOGGER.debug("total execution took %s seconds.", time.time() - process_start_time) return response
def calc_indice_simple(resource=[], variable=None, prefix=None,indices=None, polygons=None, mosaik = False, groupings='yr', dir_output=None, dimension_map = None, memory_limit=None): """ Calculates given simple indices for suitable files in the appopriate time grouping and polygon. :param resource: list of filenames in drs convention (netcdf) :param variable: variable name to be selected in the in netcdf file (default=None) :param indices: list of indices (default ='SU') :param polygons: list of polgons (default ='FRA') :param grouping: indices time aggregation (default='yr') :param out_dir: output directory for result file (netcdf) :param dimension_map: optional dimension map if different to standard (default=None) :return: list of netcdf files with calculated indices. Files are saved into out_dir """ from os.path import join, dirname, exists from flyingpigeon import ocgis_module from flyingpigeon.subset import clipping import uuid #DIR_SHP = config.shapefiles_dir() #env.DIR_SHPCABINET = DIR_SHP #env.OVERWRITE = True if type(resource) != list: resource = list([resource]) if type(indices) != list: indices = list([indices]) if type(polygons) != list and polygons != None: polygons = list([polygons]) if type(groupings) != list: groupings = list([groupings]) if dir_output != None: if not exists(dir_output): makedirs(dir_output) #from flyingpigeon.subset import select_ugid # tile_dim = 25 output = None experiments = sort_by_filename(resource) outputs = [] for key in experiments: if variable == None: variable = get_variable(experiments[key][0]) #variable = key.split('_')[0] try: if variable == 'pr': calc = 'pr=pr*86400' ncs = ocgis_module.call(resource=experiments[key], variable=variable, dimension_map=dimension_map, calc=calc, memory_limit=memory_limit, #alc_grouping= calc_group, prefix=str(uuid.uuid4()), dir_output=dir_output, output_format='nc') else: ncs = experiments[key] for indice in indices: logger.info('indice: %s' % indice) try: calc = [{'func' : 'icclim_' + indice, 'name' : indice}] logger.info('calc: %s' % calc) for grouping in groupings: logger.info('grouping: %s' % grouping) try: calc_group = calc_grouping(grouping) logger.info('calc_group: %s' % calc_group) if polygons == None: try: if prefix == None: prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ) tmp = ocgis_module.call(resource=ncs, variable=variable, dimension_map=dimension_map, calc=calc, calc_grouping= calc_group, prefix=prefix, dir_output=dir_output, output_format='nc') outputs.extend( [tmp] ) except Exception as e: msg = 'could not calc indice %s for domain in %s' %( indice, key) logger.exception( msg ) raise Exception(msg) else: try: if prefix == None: prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ) tmp = clipping(resource=ncs, variable=variable, dimension_map=dimension_map, calc=calc, calc_grouping= calc_group, prefix=prefix, polygons=polygons, mosaik=mosaik, dir_output=dir_output, output_format='nc') outputs.extend( [tmp] ) except Exception as e: msg = 'could not calc indice %s for domain in %s' %( indice, key) logger.exception( msg ) raise Exception(msg) logger.info('indice file calculated') except Exception as e: msg = 'could not calc indice %s for key %s and grouping %s' % (indice, key, grouping) logger.exception(msg) raise Exception(msg) except Exception as e: msg = 'could not calc indice %s for key %s' % ( indice, key) logger.exception(msg) raise Exception(msg) except Exception as e: msg = 'could not calc key %s' % key logger.exception(msg) raise Exception(msg) return outputs
def calc_indice_percentile(resources=[], variable=None, prefix=None, indices='TG90p', refperiod=None, groupings='yr', polygons=None, percentile=90, mosaik = False, dir_output=None, dimension_map = None): """ Calculates given indices for suitable files in the appopriate time grouping and polygon. :param resource: list of filenames in drs convention (netcdf) :param variable: variable name to be selected in the in netcdf file (default=None) :param indices: list of indices (default ='TG90p') :param prefix: filename prefix :param refperiod: reference refperiod touple = (start,end) :param grouping: indices time aggregation (default='yr') :param dir_output: output directory for result file (netcdf) :param dimension_map: optional dimension map if different to standard (default=None) :return: list of netcdf files with calculated indices. Files are saved into out_dir """ from os.path import join, dirname, exists from os import remove import uuid from numpy import ma from datetime import datetime as dt from flyingpigeon.ocgis_module import call from flyingpigeon.subset import clipping from flyingpigeon.utils import get_values, get_time if type(resources) != list: resources = list([resources]) if type(indices) != list: indices = list([indices]) if type(groupings) != list: groupings = list([groupings]) if type(refperiod) == list: refperiod = refperiod[0] if refperiod != None: start = dt.strptime(refperiod.split('-')[0] , '%Y%m%d') end = dt.strptime(refperiod.split('-')[1] , '%Y%m%d') time_range = [start, end] else: time_range = None if dir_output != None: if not exists(dir_output): makedirs(dir_output) ######################################################################################################################## # Compute a custom percentile basis using ICCLIM. ###################################################################### ######################################################################################################################## from ocgis.contrib import library_icclim as lic nc_indices = [] nc_dic = sort_by_filename(resources) for grouping in groupings: calc_group = calc_grouping(grouping) for key in nc_dic.keys(): resource = nc_dic[key] if variable == None: variable = get_variable(resource) if polygons == None: nc_reference = call(resource=resource, prefix=str(uuid.uuid4()), time_range=time_range, output_format='nc', dir_output=dir_output) else: nc_reference = clipping(resource=resource, prefix=str(uuid.uuid4()), time_range=time_range, output_format='nc', polygons=polygons, dir_output=dir_output, mosaik = mosaik) arr = get_values(nc_files=nc_reference) dt_arr = get_time(nc_files=nc_reference) arr = ma.masked_array(arr) dt_arr = ma.masked_array(dt_arr) percentile = percentile window_width = 5 for indice in indices: name = indice.replace('_', str(percentile)) var = indice.split('_')[0] operation = None if 'T' in var: if percentile >= 50: operation = 'Icclim%s90p' % var func = 'icclim_%s90p' % var # icclim_TG90p else: operation = 'Icclim%s10p' % var func = 'icclim_%s10p' % var ################################ # load the appropriate operation ################################ ops = [op for op in dir(lic) if operation in op] if len(ops) == 0: raise Exception("operator does not exist %s", operation) exec "percentile_dict = lic.%s.get_percentile_dict(arr, dt_arr, percentile, window_width)" % ops[0] calc = [{'func': func, 'name': name, 'kwds': {'percentile_dict': percentile_dict}}] if polygons == None: nc_indices.append(call(resource=resource, prefix=key.replace(variable,name).replace('_day_', '_%s_' % grouping), calc=calc, calc_grouping=calc_group, output_format='nc', dir_output=dir_output)) else: nc_indices.extend(clipping(resource=resource, prefix=key.replace(variable,name).replace('_day_', '_%s_' % grouping), calc=calc, calc_grouping=calc_group, output_format='nc', dir_output=dir_output, polygons=polygons, mosaik = mosaik, )) return nc_indices
def execute(self): init_process_logger('log.txt') self.output_log.setValue('log.txt') logger.info('Start process') from datetime import datetime as dt from flyingpigeon import weatherregimes as wr from tempfile import mkstemp self.status.set('execution started at : %s ' % dt.now(), 5) ################################ # reading in the input arguments ################################ try: logger.info('read in the arguments') # resources = self.getInputValues(identifier='resources') season = self.getInputValues(identifier='season')[0] bbox_obj = self.BBox.getValue() model_var = self.getInputValues(identifier='reanalyses')[0] period = self.getInputValues(identifier='period')[0] anualcycle = self.getInputValues(identifier='anualcycle')[0] model, variable = model_var.split('_') kappa = int(self.getInputValues(identifier='kappa')[0]) logger.info('period %s' % str(period)) logger.info('season %s' % str(season)) except Exception as e: logger.debug('failed to read in the arguments %s ' % e) try: start = dt.strptime(period.split('-')[0], '%Y%m%d') end = dt.strptime(period.split('-')[1], '%Y%m%d') if bbox_obj is not None: logger.info("bbox_obj={0}".format(bbox_obj.coords)) bbox = [ bbox_obj.coords[0][0], bbox_obj.coords[0][1], bbox_obj.coords[1][0], bbox_obj.coords[1][1] ] logger.info("bbox={0}".format(bbox)) else: bbox = None except Exception as e: logger.debug('failed to transform BBOXObject %s ' % e) ########################### # set the environment ########################### self.status.set('fetching data from archive', 10) try: if model == 'NCEP': if 'z' in variable: level = variable.strip('z') conform_units_to = None else: level = None conform_units_to = 'hPa' elif '20CRV2' in model: if 'z' in variable: level = variable.strip('z') conform_units_to = None else: level = None conform_units_to = 'hPa' else: logger.error('Reanalyses dataset not known') logger.info('environment set') except Exception as e: msg = 'failed to set environment %s ' % e logger.error(msg) raise Exception(msg) ########################################## # fetch Data from original data archive ########################################## from flyingpigeon.datafetch import reanalyses as rl try: model_nc = rl(start=start.year, end=end.year, dataset=model, variable=variable) logger.info('reanalyses data fetched') except Exception as e: msg = 'failed to get reanalyses data %s' % e logger.debug(msg) raise Exception(msg) self.status.set('fetching data done', 15) ############################################################ # get the required bbox and time region from resource data ############################################################ self.status.set('subsetting region of interest', 17) # from flyingpigeon.weatherregimes import get_level from flyingpigeon.ocgis_module import call time_range = [start, end] model_subset = call( resource=model_nc, variable=variable, geom=bbox, spatial_wrapping='wrap', time_range=time_range, # conform_units_to=conform_units_to ) logger.info('Dataset subset done: %s ' % model_subset) self.status.set('dataset subsetted', 19) ############################################## # computing anomalies ############################################## self.status.set('computing anomalies ', 19) cycst = anualcycle.split('-')[0] cycen = anualcycle.split('-')[0] reference = [ dt.strptime(cycst, '%Y%m%d'), dt.strptime(cycen, '%Y%m%d') ] logger.debug('reference time: %s' % reference) model_anomal = wr.get_anomalies(model_subset, reference=reference) ##################### # extracting season ##################### self.status.set('normalizing data', 21) model_season = wr.get_season(model_anomal, season=season) self.status.set('anomalies computed and normalized', 24) ####################### # call the R scripts ####################### self.status.set('Start weather regime clustering ', 25) import shlex import subprocess from flyingpigeon import config from os.path import curdir, exists, join try: rworkspace = curdir Rsrc = config.Rsrc_dir() Rfile = 'weatherregimes_model.R' infile = model_season # model_subset #model_ponderate modelname = model yr1 = start.year yr2 = end.year ip, output_graphics = mkstemp(dir=curdir, suffix='.pdf') ip, file_pca = mkstemp(dir=curdir, suffix='.txt') ip, file_class = mkstemp(dir=curdir, suffix='.Rdat') args = [ 'Rscript', join(Rsrc, Rfile), '%s/' % curdir, '%s/' % Rsrc, '%s' % infile, '%s' % variable, '%s' % output_graphics, '%s' % file_pca, '%s' % file_class, '%s' % season, '%s' % start.year, '%s' % end.year, '%s' % model_var, '%s' % kappa ] logger.info('Rcall builded') except Exception as e: msg = 'failed to build the R command %s' % e logger.debug(msg) raise Exception(msg) try: output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() logger.info('R outlog info:\n %s ' % output) logger.debug('R outlog errors:\n %s ' % error) if len(output) > 0: self.status.set('**** weatherregime in R suceeded', 90) else: logger.error('NO! output returned from R call') except Exception as e: msg = 'weatherregime in R %s ' % e logger.error(msg) raise Exception(msg) self.status.set('Weather regime clustering done ', 80) ############################################ # set the outputs ############################################ self.status.set('Set the process outputs ', 95) self.Routput_graphic.setValue(output_graphics) self.output_pca.setValue(file_pca) self.output_classification.setValue(file_class) self.output_netcdf.setValue(model_season)
def get_anomalies(nc_file, frac=0.2, reference=None, method='ocgis', sseas='serial', variable=None): """ Anomalisation of data subsets for weather classification by subtracting a smoothed annual cycle :param nc_file: input netCDF file :param frac: Number between 0-1 for strength of smoothing (0 = close to the original data, 1 = flat line) default = 0.2 :param reference: Period to calculate annual cycle :returns str: path to output netCDF file """ from netCDF4 import Dataset if variable is None: variable = utils.get_variable(nc_file) # if more when 2 variables: if (variable.count(variable)==0): _ds=Dataset(nc_file) # Works only if we have one 3D variables for j in variable: if len(_ds.variables[j].dimensions)==3: _var=j variable=_var _ds.close() LOGGER.debug('3D Variable selected: %s'%(variable)) try: if (method == 'cdo'): from cdo import Cdo from os import system ip2, nc_anual_cycle = mkstemp(dir='.', suffix='.nc') cdo = Cdo() #ip, nc_anual_cycle_tmp = mkstemp(dir='.', suffix='.nc') # TODO: if reference is none, use utils.get_time for nc_file to set the ref range # But will need to fix 360_day issue (use get_time_nc from analogs) # com = 'seldate' # comcdo = 'cdo %s,%s-%s-%s,%s-%s-%s %s %s' % (com, reference[0].year, reference[0].month, reference[0].day, # reference[1].year, reference[1].month, reference[1].day, # nc_file, nc_anual_cycle_tmp) # LOGGER.debug('CDO: %s' % (comcdo)) # system(comcdo) # Sub cdo with this trick... Cdo keeps the precision and anomalies are integers... calc = '%s=%s'%(variable, variable) nc_anual_cycle_tmp = call(nc_file, time_range=reference, variable=variable, calc=calc) nc_anual_cycle = cdo.ydaymean(input=nc_anual_cycle_tmp, output=nc_anual_cycle) else: calc = [{'func': 'mean', 'name': variable}] calc_grouping = calc_grouping = ['day', 'month'] nc_anual_cycle = call(nc_file, calc=calc, calc_grouping=calc_grouping, variable=variable, time_range=reference) LOGGER.info('annual cycle calculated: %s' % (nc_anual_cycle)) except Exception as e: msg = 'failed to calcualte annual cycle %s' % e LOGGER.error(msg) raise Exception(msg) try: # spline for smoothing #import statsmodels.api as sm #from numpy import tile, empty, linspace from cdo import Cdo cdo = Cdo() # variable = utils.get_variable(nc_file) ds = Dataset(nc_anual_cycle, mode='a') vals = ds.variables[variable] vals_sm = empty(vals.shape) ts = vals.shape[0] x = linspace(1, ts*3, num=ts*3, endpoint=True) if ('serial' not in sseas): # Multiprocessing ======================= from multiprocessing import Pool pool = Pool() valex = [0.] valex = valex*vals.shape[1]*vals.shape[2] # TODO redo with reshape ind = 0 for lat in range(vals.shape[1]): for lon in range(vals.shape[2]): valex[ind] = vals[:, lat, lon] ind += 1 LOGGER.debug('Start smoothing with multiprocessing') # TODO fraction option frac=... is not used here tmp_sm = pool.map(_smooth, valex) pool.close() pool.join() # TODO redo with reshape ind=0 for lat in range(vals.shape[1]): for lon in range(vals.shape[2]): vals_sm[:, lat, lon] = tmp_sm[ind] ind+=1 else: # Serial ================================== vals_sm = empty(vals.shape) for lat in range(vals.shape[1]): for lon in range(vals.shape[2]): try: y = tile(vals[:, lat, lon], 3) # ys = smooth(y, window_size=91, order=2, deriv=0, rate=1)[ts:ts*2] ys = sm.nonparametric.lowess(y, x, frac=frac)[ts:ts*2, 1] vals_sm[:, lat, lon] = ys except: msg = 'failed for lat %s lon %s' % (lat, lon) LOGGER.exception(msg) raise Exception(msg) LOGGER.debug('done for %s - %s ' % (lat, lon)) vals[:, :, :] = vals_sm[:, :, :] ds.close() LOGGER.info('smothing of annual cycle done') except: msg = 'failed smothing of annual cycle' LOGGER.exception(msg) raise Exception(msg) try: ip, nc_anomal = mkstemp(dir='.', suffix='.nc') try: nc_anomal = cdo.sub(input=[nc_file, nc_anual_cycle], output=nc_anomal) LOGGER.info('cdo.sub; anomalisation done: %s ' % nc_anomal) except: # bug cdo: https://code.mpimet.mpg.de/boards/1/topics/3909 ip3, nc_in1 = mkstemp(dir='.', suffix='.nc') ip4, nc_in2 = mkstemp(dir='.', suffix='.nc') ip5, nc_out = mkstemp(dir='.', suffix='.nc') nc_in1 = cdo.selvar(variable, input=nc_file, output=nc_in1) nc_in2 = cdo.selvar(variable, input=nc_anual_cycle, output=nc_in2) nc_out = cdo.sub(input=[nc_in1, nc_in2], output=nc_out) nc_anomal = nc_out except: msg = 'failed substraction of annual cycle' LOGGER.exception(msg) raise Exception(msg) return nc_anomal
def execute(self): logger.info('Start process') from datetime import datetime as dt from flyingpigeon import weatherregimes as wr from tempfile import mkstemp ################################ # reading in the input arguments ################################ try: resource = self.getInputValues(identifier='resource') url_Rdat = self.getInputValues(identifier='Rdat')[0] url_dat = self.getInputValues(identifier='dat')[0] url_ref_file = self.getInputValues(identifier='netCDF') # can be None season = self.getInputValues(identifier='season')[0] period = self.getInputValues(identifier='period')[0] anualcycle = self.getInputValues(identifier='anualcycle')[0] except Exception as e: logger.debug('failed to read in the arguments %s ' % e) try: start = dt.strptime(period.split('-')[0] , '%Y%m%d') end = dt.strptime(period.split('-')[1] , '%Y%m%d') # kappa = int(self.getInputValues(identifier='kappa')[0]) logger.info('period %s' % str(period)) logger.info('season %s' % str(season)) logger.info('read in the arguments') logger.info('url_ref_file: %s' % url_ref_file) logger.info('url_Rdat: %s' % url_Rdat) logger.info('url_dat: %s' % url_dat) except Exception as e: logger.debug('failed to convert arguments %s ' % e) ############################ # fetching trainging data ############################ from flyingpigeon.utils import download, get_time from os.path import abspath try: dat = abspath(download(url_dat)) Rdat = abspath(download(url_Rdat)) logger.info('training data fetched') except Exception as e: logger.error('failed to fetch training data %s' % e) ############################################################ ### get the required bbox and time region from resource data ############################################################ # from flyingpigeon.weatherregimes import get_level from flyingpigeon.ocgis_module import call from flyingpigeon.utils import get_variable time_range = [start, end] variable = get_variable(resource) if len(url_ref_file) > 0: ref_file = download(url_ref_file[0]) model_subset = call(resource=resource, variable=variable, time_range=time_range, # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap', regrid_destination=ref_file, regrid_options='bil') logger.info('Dataset subset with regridding done: %s ' % model_subset) else: model_subset = call(resource=resource, variable=variable, time_range=time_range, # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap', ) logger.info('Dataset time period extracted: %s ' % model_subset) ############################################## ### computing anomalies ############################################## cycst = anualcycle.split('-')[0] cycen = anualcycle.split('-')[0] reference = [dt.strptime(cycst,'%Y%m%d'), dt.strptime(cycen,'%Y%m%d')] model_anomal = wr.get_anomalies(model_subset, reference=reference) ##################### ### extracting season ##################### model_season = wr.get_season(model_anomal, season=season) ####################### ### call the R scripts ####################### import shlex import subprocess from flyingpigeon import config from os.path import curdir, exists, join try: rworkspace = curdir Rsrc = config.Rsrc_dir() Rfile = 'weatherregimes_projection.R' yr1 = start.year yr2 = end.year time = get_time(model_season, format='%Y%m%d') #ip, output_graphics = mkstemp(dir=curdir ,suffix='.pdf') ip, file_pca = mkstemp(dir=curdir ,suffix='.txt') ip, file_class = mkstemp(dir=curdir ,suffix='.Rdat') ip, output_frec = mkstemp(dir=curdir ,suffix='.txt') args = ['Rscript', join(Rsrc,Rfile), '%s/' % curdir, '%s/' % Rsrc, '%s' % model_season, '%s' % variable, '%s' % str(time).strip("[]").replace("'","").replace(" ",""), # '%s' % output_graphics, '%s' % dat, '%s' % Rdat, '%s' % file_pca, '%s' % file_class, '%s' % output_frec, '%s' % season, '%s' % start.year, '%s' % end.year, '%s' % 'MODEL'] logger.info('Rcall builded') except Exception as e: msg = 'failed to build the R command %s' % e logger.error(msg) raise Exception(msg) try: output,error = subprocess.Popen(args, stdout = subprocess.PIPE, stderr= subprocess.PIPE).communicate() #, shell=True logger.info('R outlog info:\n %s ' % output) logger.debug('R outlog errors:\n %s ' % error) if len(output) > 0: self.status.set('**** weatherregime in R suceeded', 90) else: logger.error('NO! output returned from R call') except Exception as e: msg = 'weatherregime in R %s ' % e logger.error(msg) raise Exception(msg) ############################################ ### set the outputs ############################################ #self.Routput_graphic.setValue( output_graphics ) self.output_pca.setValue( file_pca ) self.output_classification.setValue( file_class ) self.output_netcdf.setValue( model_season ) self.output_frequency.setValue( output_frec )
def execute(self): init_process_logger('log.txt') self.output_log.setValue('log.txt') import time # performance test process_start_time = time.time() # measure process execution time ... from os import path from tempfile import mkstemp from datetime import datetime as dt from flyingpigeon.ocgis_module import call from flyingpigeon import analogs from flyingpigeon.datafetch import reanalyses self.status.set('execution started at : %s ' % dt.now(), 5) start_time = time.time() # measure init ... ####################### # read input parameters ####################### try: self.status.set('read input parameter : %s ' % dt.now(), 5) refSt = self.getInputValues(identifier='refSt') refEn = self.getInputValues(identifier='refEn') dateSt = self.getInputValues(identifier='dateSt') dateEn = self.getInputValues(identifier='dateEn') seasonwin = int(self.getInputValues(identifier='seasonwin')[0]) nanalog = int(self.getInputValues(identifier='nanalog')[0]) bbox_obj = self.BBox.getValue() normalize = self.getInputValues(identifier='normalize')[0] distance = self.getInputValues(identifier='dist')[0] outformat = self.getInputValues(identifier='outformat')[0] timewin = int(self.getInputValues(identifier='timewin')[0]) experiment = self.getInputValues(identifier='experiment')[0] logger.info('input parameters set') self.status.set('Read in and convert the arguments', 5) except Exception as e: msg = 'failed to read input prameter %s ' % e logger.error(msg) raise Exception(msg) ###################################### # convert types and set environment ###################################### try: self.status.set('Preparing enviroment converting arguments', 7) refSt = dt.strptime(refSt[0], '%Y-%m-%d') refEn = dt.strptime(refEn[0], '%Y-%m-%d') dateSt = dt.strptime(dateSt[0], '%Y-%m-%d') dateEn = dt.strptime(dateEn[0], '%Y-%m-%d') if normalize == 'None': seacyc = False else: seacyc = True if outformat == 'ascii': outformat = '.txt' elif outformat == 'netCDF': outformat = '.nc' else: logger.error('output format not valid') start = min(refSt, dateSt) end = max(refEn, dateEn) if bbox_obj is not None: logger.info("bbox_obj={0}".format(bbox_obj.coords)) bbox = [bbox_obj.coords[0][0], bbox_obj.coords[0][1], bbox_obj.coords[1][0], bbox_obj.coords[1][1]] logger.info("bbox={0}".format(bbox)) else: bbox = None # region = self.getInputValues(identifier='region')[0] # bbox = [float(b) for b in region.split(',')] dataset, var = experiment.split('_') logger.info('environment set') except Exception as e: msg = 'failed to set environment %s ' % e logger.error(msg) raise Exception(msg) try: if dataset == 'NCEP': if 'z' in var: variable = 'hgt' level = var.strip('z') # conform_units_to=None else: variable = 'slp' level = None # conform_units_to='hPa' elif '20CRV2' in var: if 'z' in level: variable = 'hgt' level = var.strip('z') # conform_units_to=None else: variable = 'prmsl' level = None # conform_units_to='hPa' else: logger.error('Reanalyses dataset not known') logger.info('environment set') except Exception as e: msg = 'failed to set environment %s ' % e logger.error(msg) raise Exception(msg) logger.debug("init took %s seconds.", time.time() - start_time) self.status.set('Read in and convert the arguments done', 8) ################# # get input data ################# start_time = time.time() # measure get_input_data ... self.status.set('fetching input data', 7) try: input = reanalyses(start=start.year, end=end.year, variable=var, dataset=dataset) logger.info('input files %s' % input) nc_subset = call(resource=input, variable=var, geom=bbox, spatial_wrapping='wrap') except Exception as e: msg = 'failed to fetch or subset input files %s' % e logger.error(msg) raise Exception(msg) logger.debug("get_input_subset_dataset took %s seconds.", time.time() - start_time) self.status.set('**** Input data fetched', 10) ######################## # input data preperation ######################## self.status.set('Start preparing input data', 12) start_time = time.time() # measure data preperation ... try: # Construct descriptive filenames for the three files # listed in config file refDatesString = dt.strftime(refSt, '%Y-%m-%d') + "_" + dt.strftime(refEn, '%Y-%m-%d') simDatesString = dt.strftime(dateSt, '%Y-%m-%d') + "_" + dt.strftime(dateEn, '%Y-%m-%d') archiveNameString = "base_" + var + "_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' \ % (bbox[0], bbox[2], bbox[1], bbox[3]) simNameString = "sim_" + var + "_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' \ % (bbox[0], bbox[2], bbox[1], bbox[3]) archive = call(resource=nc_subset, time_range=[refSt, refEn], prefix=archiveNameString) simulation = call(resource=nc_subset, time_range=[dateSt, dateEn], prefix=simNameString) logger.info('archive and simulation files generated: %s, %s' % (archive, simulation)) except Exception as e: msg = 'failed to prepare archive and simulation files %s ' % e logger.debug(msg) raise Exception(msg) try: if seacyc is True: logger.info('normalization function with method: %s ' % normalize) seasoncyc_base, seasoncyc_sim = analogs.seacyc( archive, simulation, method=normalize) else: seasoncyc_base = seasoncyc_sim = None except Exception as e: msg = 'failed to generate normalization files %s ' % e logger.debug(msg) raise Exception(msg) ip, output_file = mkstemp(dir='.', suffix='.txt') files = [path.abspath(archive), path.abspath(simulation), output_file] logger.debug("Data preperation took %s seconds.", time.time() - start_time) ############################ # generate the config file ############################ self.status.set('writing config file', 15) start_time = time.time() # measure write config ... try: config_file = analogs.get_configfile( files=files, seasoncyc_base=seasoncyc_base, seasoncyc_sim=seasoncyc_sim, timewin=timewin, varname=var, seacyc=seacyc, cycsmooth=91, nanalog=nanalog, seasonwin=seasonwin, distfun=distance, outformat=outformat, calccor=True, silent=False, period=[dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d')], bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3])) except Exception as e: msg = 'failed to generate config file %s ' % e logger.debug(msg) raise Exception(msg) logger.debug("write_config took %s seconds.", time.time() - start_time) ####################### # CASTf90 call ####################### import subprocess import shlex start_time = time.time() # measure call castf90 self.status.set('Start CASTf90 call', 20) try: # self.status.set('execution of CASTf90', 50) cmd = 'analogue.out %s' % path.relpath(config_file) # system(cmd) args = shlex.split(cmd) output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE ).communicate() logger.info('analogue.out info:\n %s ' % output) logger.debug('analogue.out errors:\n %s ' % error) self.status.set('**** CASTf90 suceeded', 90) except Exception as e: msg = 'CASTf90 failed %s ' % e logger.error(msg) raise Exception(msg) logger.debug("castf90 took %s seconds.", time.time() - start_time) ######################## # generate analog viewer ######################## try: f = analogs.reformat_analogs(output_file) logger.info('analogs reformated') self.status.set('Successfully reformatted analog file', 50) # put config file into output folder config_output_path, config_output_url = analogs.copy_configfile( config_file ) output_av = analogs.get_viewer( f, path.basename(config_output_path)) logger.info('Viewer generated') self.status.set('Successfully generated analogs viewer', 90) logger.info('output_av: %s ' % output_av) except Exception as e: msg = 'Failed to reformat analogs file or generate viewer%s ' % e logger.debug(msg) self.status.set('preparting output', 99) self.config.setValue(config_output_url) # config_file ) self.analogs.setValue(output_file) self.output_netcdf.setValue(simulation) self.output_html.setValue(output_av) self.status.set('execution ended', 100) logger.debug("total execution took %s seconds.", time.time() - process_start_time)
def reanalyses(start=1948, end=None, variable='slp', dataset='NCEP', timres='day', getlevel=True): """ Fetches the reanalysis data (NCEP, 20CR or ERA_20C) to local file system :param start: int for start year to fetch source data :param end: int for end year to fetch source data (if None, current year will be the end) :param variable: variable name (default='slp'), geopotential height is given as e.g. z700 :param dataset: default='NCEP' :return list: list of path/files.nc """ # used for NETCDF convertion from netCDF4 import Dataset from os import path, system from flyingpigeon.ocgis_module import call from shutil import move # used for NETCDF convertion try: from datetime import datetime as dt if end is None: end = dt.now().year obs_data = [] if start is None: if dataset == 'NCEP': start = 1948 if dataset == '20CR': start = 1851 LOGGER.info('start / end date set') except: msg = "get_OBS module failed to get start end dates" LOGGER.exception(msg) raise Exception(msg) if 'z' in variable: level = variable.strip('z') else: level = None LOGGER.info('level: %s' % level) try: for year in range(start, end + 1): LOGGER.debug('fetching single file for %s year %s ' % (dataset, year)) try: if dataset == 'NCEP': if variable == 'slp': url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/surface/%s.%s.nc' % ( variable, year) # noqa if 'z' in variable: url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/pressure/hgt.%s.nc' % ( year) # noqa elif dataset == '20CRV2': if variable == 'prmsl': if timres == '6h': url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/monolevel/prmsl.%s.nc' % year # noqa else: url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/Dailies/monolevel/prmsl.%s.nc' % year # noqa if 'z' in variable: if timres == '6h': url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/pressure/hgt.%s.nc' % ( year) # noqa else: url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/Dailies/pressure/hgt.%s.nc' % ( year) # noqa elif dataset == '20CRV2c': if variable == 'prmsl': if timres == '6h': url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/monolevel/prmsl.%s.nc' % year # noqa else: url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/Dailies/monolevel/prmsl.%s.nc' % year # noqa if 'z' in variable: if timres == '6h': url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/pressure/hgt.%s.nc' % ( year) # noqa else: url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/Dailies/pressure/hgt.%s.nc' % ( year) # noqa else: LOGGER.debug('Dataset %s not known' % dataset) LOGGER.debug('url: %s' % url) except: msg = "could not set url" LOGGER.exception(msg) try: df = utils.download(url, cache=True) LOGGER.debug('single file fetched %s ' % year) # convert to NETCDF4_CLASSIC try: ds = Dataset(df) df_time = ds.variables['time'] # Here, need to check not just calendar, but that file is ncdf_classic already... if (hasattr(df_time, 'calendar')) is False: p, f = path.split(path.abspath(df)) LOGGER.debug("path = %s , file %s " % (p, f)) # May be an issue if several users are working at the same time move(df, f) conv = call(resource=f, output_format_options={ 'data_model': 'NETCDF4_CLASSIC' }, dir_output=p, prefix=f.replace('.nc', '')) obs_data.append(conv) LOGGER.debug('file %s to NETCDF4_CLASSIC converted' % conv) # Cleaning, could be 50gb... for each (!) user # TODO Check how links work cmdrm = 'rm -f %s' % (f) system(cmdrm) else: obs_data.append(df) ds.close() except: LOGGER.exception('failed to convert into NETCDF4_CLASSIC') except: msg = "download failed on {0}.".format(url) LOGGER.exception(msg) LOGGER.info('Reanalyses data fetched for %s files' % len(obs_data)) except: msg = "get reanalyses module failed to fetch data" LOGGER.exception(msg) raise Exception(msg) if (level is None) or (getlevel == False): data = obs_data else: LOGGER.info('get level: %s' % level) data = get_level(obs_data, level=level) return data
def reanalyses(start=1948, end=None, variable='slp', dataset='NCEP'): """ Fetches the reanalysis data (NCEP, 20CR or ERA_20C) to local file system :param start: int for start year to fetch source data :param end: int for end year to fetch source data (if None, current year will be the end) :param variable: variable name (default='slp'), geopotential height is given as e.g. z700 :param dataset: default='NCEP' :return list: list of path/files.nc """ # used for NETCDF convertion from os import path from flyingpigeon.ocgis_module import call from shutil import move # used for NETCDF convertion try: from datetime import datetime as dt if end is None: end = dt.now().year obs_data = [] if start is None: if dataset == 'NCEP': start = 1948 if dataset == '20CR': start = 1851 LOGGER.info('start / end date set') except: msg = "get_OBS module failed to get start end dates" LOGGER.exception(msg) raise Exception(msg) if 'z' in variable: level = variable.strip('z') else: level = None LOGGER.info('level: %s' % level) try: for year in range(start, end + 1): LOGGER.debug('fetching single file for %s year %s ' % (dataset, year)) try: if dataset == 'NCEP': if variable == 'slp': url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/surface/%s.%s.nc' % ( variable, year) # noqa if 'z' in variable: url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/pressure/hgt.%s.nc' % ( year) # noqa elif dataset == '20CRV2': if variable == 'prmsl': url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/monolevel/prmsl.%s.nc' % year # noqa if 'z' in variable: url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/pressure/hgt.%s.nc' % ( year) # noqa elif dataset == '20CRV2c': if variable == 'prmsl': url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/monolevel/prmsl.%s.nc' % year # noqa if 'z' in variable: url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/pressure/hgt.%s.nc' % ( year) # noqa else: LOGGER.debug('Dataset %s not known' % dataset) LOGGER.debug('url: %s' % url) except: msg = "could not set url" LOGGER.exception(msg) try: df = utils.download(url, cache=True) LOGGER.debug('single file fetched %s ' % year) # convert to NETCDF4_CLASSIC try: p, f = path.split(path.abspath(df)) LOGGER.debug("path = %s , file %s " % (p, f)) move(df, f) conv = call(resource=f, output_format_options={ 'data_model': 'NETCDF4_CLASSIC' }, dir_output=p, prefix=f.replace('.nc', '')) obs_data.append(conv) LOGGER.debug('file %s to NETCDF4_CLASSIC converted' % conv) except: LOGGER.exception('failed to convert into NETCDF4_CLASSIC') except: msg = "download failed on {0}.".format(url) LOGGER.exception(msg) LOGGER.info('Reanalyses data fetched for %s files' % len(obs_data)) except: msg = "get reanalyses module failed to fetch data" LOGGER.exception(msg) raise Exception(msg) if level is None: data = obs_data else: LOGGER.info('get level: %s' % level) data = get_level(obs_data, level=level) return data
def get_gam(ncs_indices, coordinate): from netCDF4 import Dataset from os.path import basename from shapely.geometry import Point from numpy import squeeze, ravel, isnan, nan, array, reshape from flyingpigeon.utils import get_variable, get_values, unrotate_pole from flyingpigeon.ocgis_module import call try: from rpy2.robjects.packages import importr import rpy2.robjects as ro import rpy2.robjects.numpy2ri rpy2.robjects.numpy2ri.activate() base = importr("base") stats = importr("stats") mgcv = importr("mgcv") logger.info('rpy2 modules imported') except Exception as e: msg = 'failed to import rpy2 modules %s' % e logger.debug(msg) raise Exception(msg) for i, ncs in enumerate(ncs_indices): # ocgis need unrotated coordinates to extract points # unrotate_pole writes lats lons into the file. # ACHTUNG: will fail if the data is stored on a file system with no write permissions try: lats, lons = unrotate_pole(ncs, write_to_file=True) point = Point(float(coordinate[0]), float(coordinate[1])) # get the values variable = get_variable(ncs) agg = basename(ncs).split('_')[-2] indice = '%s_%s' % (variable, agg) timeseries = call(resource=ncs, geom=point, select_nearest=True) ts = Dataset(timeseries) vals = squeeze(ts.variables[variable][:]) from numpy import min, max, mean, append, zeros, ones dif = max(vals) - min(vals) a = append(vals - dif ,vals) vals = append(a, vals+dif) if i == 0 : from numpy import zeros, ones a = append (zeros(len(vals)) , ones(len(vals)) ) PA = append(a , zeros(len(vals))) data = {'PA': ro.FloatVector(PA)} data[str(indice)] = ro.FloatVector(vals) form = 'PA ~ ' form = form + 's(%s, k=3)' % indice else: form = form + ' + s(%s, k=3)' % indice data[str(indice)] = ro.FloatVector(vals) except Exception as e: msg = 'Failed to prepare data %s' % e logger.debug(msg) try: logger.info(data) dataf = ro.DataFrame(data) eq = ro.Formula(str(form)) gam_model = mgcv.gam(base.eval(eq), data=dataf, family=stats.binomial(), scale=-1, na_action=stats.na_exclude) # logger.info('GAM model trained') except Exception as e: msg = 'Failed to generate GAM model %s' % e logger.debug(msg) # ### ########################### # # plot response curves # ### ########################### try: from flyingpigeon.visualisation import concat_images from tempfile import mkstemp grdevices = importr('grDevices') graphicDev = importr('Cairo') infos = [] for i in range(1,len(ncs_indices)+1): ip, info = mkstemp(dir='.',suffix='.png') #grdevices.png(filename=info) #graphicDev.CairoPDF(info, width = 7, height = 7, pointsize = 12) graphicDev.CairoPNG(info, width = 640 , height = 480, pointsize = 12) # 640, 480) #, pointsize = 12 width = 30, height = 30, print 'file opened!' infos.append(info) #grdevices.png(filename=info) ylim = ro.IntVector([-6,6]) trans = ro.r('function(x){exp(x)/(1+exp(x))}') mgcv.plot_gam(gam_model, trans=trans, shade='T', col='black',select=i,ylab='Predicted Probability',rug=False , cex_lab = 1.4, cex_axis = 1.4, ) # print 'gam plotted ;-)' #ylim=ylim, , grdevices.dev_off() #graphicDev.dev_off() #graphicDev.Cairo_onSave( dev_cur(), onSave=True ) print(' %s plots generated ' % len(infos)) infos_concat = concat_images(infos, orientation='h') except Exception as e: msg = 'Failed to plot statistical graphic %s' % e logger.debug(msg) raise Exception(msg) return gam_model, infos_concat
def calc_indice_percentile(resource=[], variable=None, prefix=None, indices='TG90p', refperiod=None, grouping='yr', polygons=None, percentile=90, mosaic=False, dir_output=None, dimension_map=None): """ Calculates given indices for suitable dataset in the appropriate time grouping and polygon. :param resource: list of filenames in data reference syntax (DRS) convention (netcdf) :param variable: variable name to be selected in the in netcdf file (default=None) :param indices: string of indice (default ='TG90p') :param prefix: filename prefix :param refperiod: reference period = [datetime,datetime] :param grouping: indices time aggregation (default='yr') :param dir_output: output directory for result file (netcdf) :param dimension_map: optional dimension map if different to standard (default=None) :return: reference_file, indice_file """ from os.path import join, dirname, exists from os import remove import uuid from numpy import ma from datetime import datetime as dt from flyingpigeon.ocgis_module import call from flyingpigeon.subset import clipping from flyingpigeon.utils import get_values, get_time # TODO: see ticket https://github.com/bird-house/flyingpigeon/issues/200 raise NotImplementedError('Sorry! Function is under construction.') if type(resource) != list: resource = list([resource]) # if type(indices) != list: # indices = list([indices]) # # if type(groupings) != list: # groupings = list([groupings]) # # if type(refperiod) == list: # refperiod = refperiod[0] # # if refperiod is not None: # start = dt.strptime(refperiod.split('-')[0], '%Y%m%d') # end = dt.strptime(refperiod.split('-')[1], '%Y%m%d') # time_range = [start, end] # else: # time_range = None ################################################ # Compute a custom percentile basis using ICCLIM ################################################ from ocgis.contrib import library_icclim as lic calc_group = calc_grouping(grouping) if variable is None: variable = get_variable(resource) if polygons is None: nc_reference = call(resource=resource, prefix=str(uuid.uuid4()), time_range=refperiod, output_format='nc') else: nc_reference = clipping(resource=resource, prefix=str(uuid.uuid4()), time_range=refperiod, output_format='nc', polygons=polygons, mosaic=mosaic) # arr = get_values(resource=nc_reference) # dt_arr = get_time(resource=nc_reference) # arr = ma.masked_array(arr) # dt_arr = ma.masked_array(dt_arr) # percentile = percentile # window_width = 5 # for indice in indices: # name = indice.replace('_', str(percentile)) # var = indice.split('_')[0] # # operation = None # if 'T' in var: # if percentile >= 50: # operation = 'Icclim%s90p' % var # func = 'icclim_%s90p' % var # icclim_TG90p # else: # operation = 'Icclim%s10p' % var # func = 'icclim_%s10p' % var # # ################################ # # load the appropriate operation # ################################ # # ops = [op for op in dir(lic) if operation in op] # if len(ops) == 0: # raise Exception("operator does not exist %s", operation) # # exec "percentile_dict = lic.%s.get_percentile_dict(arr, dt_arr, percentile, window_width)" % ops[0] # calc = [{'func': func, 'name': name, 'kwds': {'percentile_dict': percentile_dict}}] # # if polygons is None: # nc_indices.extend(call(resource=resource, # prefix=key.replace(variable, name).replace('_day_', '_%s_' % grouping), # calc=calc, # calc_grouping=calc_group, # output_format='nc')) # else: # nc_indices.extend(clipping(resource=resource, # prefix=key.replace(variable, name).replace('_day_', '_%s_' % grouping), # calc=calc, # calc_grouping=calc_group, # output_format='nc', # polygons=polygons, # mosaic=mosaic, # )) # if len(nc_indices) is 0: # LOGGER.debug('No indices are calculated') # return None return nc_indices
def execute(self): import time # performance test process_start_time = time.time() # measure process execution time ... from os import path from tempfile import mkstemp from datetime import datetime as dt from flyingpigeon.ocgis_module import call from flyingpigeon import analogs from flyingpigeon.datafetch import reanalyses self.status.set('execution started at : %s ' % dt.now(),5) start_time = time.time() # measure init ... ####################### ### read input parameters ####################### try: self.status.set('read input parameter : %s ' % dt.now(),5) refSt = self.getInputValues(identifier='refSt') refEn = self.getInputValues(identifier='refEn') dateSt = self.getInputValues(identifier='dateSt') dateEn = self.getInputValues(identifier='dateEn') seasonwin = int(self.getInputValues(identifier='seasonwin')[0]) nanalog = int(self.getInputValues(identifier='nanalog')[0]) bbox_obj = self.BBox.getValue() normalize = self.getInputValues(identifier='normalize')[0] distance = self.getInputValues(identifier='dist')[0] outformat = self.getInputValues(identifier='outformat')[0] timewin = int(self.getInputValues(identifier='timewin')[0]) experiment = self.getInputValues(identifier='experiment')[0] logger.info('input parameters set') self.status.set('Read in and convert the arguments', 5) except Exception as e: msg = 'failed to read input prameter %s ' % e logger.error(msg) raise Exception(msg) ###################################### ### convert types and set environment ###################################### try: self.status.set('Start preparing enviroment converting arguments', 7) refSt = dt.strptime(refSt[0],'%Y-%m-%d') refEn = dt.strptime(refEn[0],'%Y-%m-%d') dateSt = dt.strptime(dateSt[0],'%Y-%m-%d') dateEn = dt.strptime(dateEn[0],'%Y-%m-%d') if normalize == 'None': seacyc = False else: seacyc = True if outformat == 'ascii': outformat = '.txt' elif outformat == 'netCDF': outformat = '.nc' else: logger.error('output format not valid') start = min( refSt, dateSt ) end = max( refEn, dateEn ) if bbox_obj is not None: logger.info("bbox_obj={0}".format(bbox_obj.coords)) bbox = [bbox_obj.coords[0][0], bbox_obj.coords[0][1],bbox_obj.coords[1][0],bbox_obj.coords[1][1]] logger.info("bbox={0}".format(bbox)) else: bbox=None # region = self.getInputValues(identifier='region')[0] # bbox = [float(b) for b in region.split(',')] dataset , var = experiment.split('_') logger.info('environment set') except Exception as e: msg = 'failed to set environment %s ' % e logger.error(msg) raise Exception(msg) try: if dataset == 'NCEP': if 'z' in var: variable='hgt' level=var.strip('z') #conform_units_to=None else: variable='slp' level=None #conform_units_to='hPa' elif '20CRV2' in var: if 'z' in level: variable='hgt' level=var.strip('z') #conform_units_to=None else: variable='prmsl' level=None #conform_units_to='hPa' else: logger.error('Reanalyses dataset not known') logger.info('environment set') except Exception as e: msg = 'failed to set environment %s ' % e logger.error(msg) raise Exception(msg) logger.debug("init took %s seconds.", time.time() - start_time) self.status.set('Read in and convert the arguments done', 8) ################# # get input data ################# start_time = time.time() # measure get_input_data ... self.status.set('fetching input data', 7) try: input = reanalyses(start = start.year, end = end.year, variable=var, dataset=dataset) logger.info('input files %s' % input) nc_subset = call(resource=input, variable=var, geom=bbox, spatial_wrapping='wrap') except Exception as e : msg = 'failed to fetch or subset input files %s' % e logger.error(msg) raise Exception(msg) logger.debug("get_input_subset_dataset took %s seconds.", time.time() - start_time) self.status.set('**** Input data fetched', 10) ######################## # input data preperation ######################## self.status.set('Start preparing input data', 12) start_time = time.time() # measure data preperation ... try: #Construct descriptive filenames for the three files listed in config file refDatesString = dt.strftime(refSt,'%Y-%m-%d') + "_" + dt.strftime(refEn,'%Y-%m-%d') simDatesString = dt.strftime(dateSt,'%Y-%m-%d') + "_" + dt.strftime(dateEn,'%Y-%m-%d') archiveNameString = "base_" + var +"_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' % (bbox[0], bbox[2], bbox[1], bbox[3]) simNameString = "sim_" + var +"_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' % (bbox[0], bbox[2], bbox[1], bbox[3]) archive = call(resource=nc_subset, time_range=[refSt , refEn], prefix=archiveNameString) simulation = call(resource=nc_subset, time_range=[dateSt , dateEn], prefix=simNameString) logger.info('archive and simulation files generated: %s, %s' % (archive, simulation)) except Exception as e: msg = 'failed to prepare archive and simulation files %s ' % e logger.debug(msg) raise Exception(msg) try: if seacyc == True: logger.info('normalization function with method: %s ' % normalize) seasoncyc_base, seasoncyc_sim = analogs.seacyc(archive, simulation, method=normalize) else: seasoncyc_base = seasoncyc_sim = None except Exception as e: msg = 'failed to generate normalization files %s ' % e logger.debug(msg) raise Exception(msg) ip, output_file = mkstemp(dir='.',suffix='.txt') # ======= # #Create an empty config with with random name # ip, output = mkstemp(dir='.', suffix='.txt') # #Rename random name of config file to more descriptive string # import os # anlgname = "ana_" + var + "_" + distance + "_sim_" + simDatesString + "_ref_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f_seasonwin%ddays_%danalogs.txt' % (bbox[0], bbox[2], bbox[1], bbox[3], seasonwin, nanalog) #+ seasonwin # os.rename(output,anlgname) # #Put config file in temporary working dir # tmppath = os.path.dirname(output) # output_file = os.path.join(tmppath, anlgname) # #Put all three files with their paths in array # >>>>>>> analogs detn gives descriptive names to files in config file files=[path.abspath(archive), path.abspath(simulation), output_file] logger.debug("Data preperation took %s seconds.", time.time() - start_time) ############################ # generate the config file ############################ self.status.set('writing config file', 15) start_time = time.time() # measure write config ... try: config_file = analogs.get_configfile( files=files, seasoncyc_base = seasoncyc_base, seasoncyc_sim=seasoncyc_sim, timewin=timewin, varname=var, seacyc=seacyc, cycsmooth=91, nanalog=nanalog, seasonwin=seasonwin, distfun=distance, outformat=outformat, calccor=True, silent=False, period=[dt.strftime(refSt,'%Y-%m-%d'),dt.strftime(refEn,'%Y-%m-%d')], bbox="%s,%s,%s,%s" % (bbox[0],bbox[2],bbox[1],bbox[3])) except Exception as e: msg = 'failed to generate config file %s ' % e logger.debug(msg) raise Exception(msg) logger.debug("write_config took %s seconds.", time.time() - start_time) ####################### # CASTf90 call ####################### import subprocess import shlex start_time = time.time() # measure call castf90 self.status.set('Start CASTf90 call', 20) try: #self.status.set('execution of CASTf90', 50) cmd = 'analogue.out %s' % path.relpath(config_file) #system(cmd) args = shlex.split(cmd) output,error = subprocess.Popen(args, stdout = subprocess.PIPE, stderr= subprocess.PIPE).communicate() logger.info('analogue.out info:\n %s ' % output) logger.debug('analogue.out errors:\n %s ' % error) self.status.set('**** CASTf90 suceeded', 90) except Exception as e: msg = 'CASTf90 failed %s ' % e logger.error(msg) raise Exception(msg) logger.debug("castf90 took %s seconds.", time.time() - start_time) ######################## # generate analog viewer ######################## try: f = analogs.reformat_analogs(output_file) logger.info('analogs reformated') self.status.set('Successfully reformatted analog file', 50) # put config file into output folder config_output_path, config_output_url = analogs.copy_configfile(config_file) output_av = analogs.get_viewer(f, path.basename(config_output_path)) logger.info('Viewer generated') self.status.set('Successfully generated analogs viewer', 90) logger.info('output_av: %s ' % output_av) except Exception as e: msg = 'Failed to reformat analogs file or generate viewer%s ' % e logger.debug(msg) self.status.set('preparting output', 99) self.config.setValue( config_output_url ) #config_file ) self.analogs.setValue( output_file ) self.output_netcdf.setValue( simulation ) self.output_html.setValue( output_av ) self.status.set('execution ended', 100) logger.debug("total execution took %s seconds.", time.time() - process_start_time)
def calc_indice_percentile(resources=[], variable=None, prefix=None, indices='TG90p', refperiod=None, groupings='yr', polygons=None, percentile=90, mosaic = False, dir_output=None, dimension_map = None): """ Calculates given indices for suitable files in the appropriate time grouping and polygon. :param resource: list of filenames in data reference syntax (DRS) convention (netcdf) :param variable: variable name to be selected in the in netcdf file (default=None) :param indices: list of indices (default ='TG90p') :param prefix: filename prefix :param refperiod: reference period tuple = (start,end) :param grouping: indices time aggregation (default='yr') :param dir_output: output directory for result file (netcdf) :param dimension_map: optional dimension map if different to standard (default=None) :return: list of netcdf files with calculated indices. Files are saved into out_dir. """ from os.path import join, dirname, exists from os import remove import uuid from numpy import ma from datetime import datetime as dt from flyingpigeon.ocgis_module import call from flyingpigeon.subset import clipping from flyingpigeon.utils import get_values, get_time if type(resources) != list: resources = list([resources]) if type(indices) != list: indices = list([indices]) if type(groupings) != list: groupings = list([groupings]) if type(refperiod) == list: refperiod = refperiod[0] if refperiod != None: start = dt.strptime(refperiod.split('-')[0] , '%Y%m%d') end = dt.strptime(refperiod.split('-')[1] , '%Y%m%d') time_range = [start, end] else: time_range = None if dir_output != None: if not exists(dir_output): makedirs(dir_output) ######################################################################################################################## # Compute a custom percentile basis using ICCLIM. ###################################################################### ######################################################################################################################## from ocgis.contrib import library_icclim as lic nc_indices = [] nc_dic = sort_by_filename(resources) for grouping in groupings: calc_group = calc_grouping(grouping) for key in nc_dic.keys(): resource = nc_dic[key] if variable == None: variable = get_variable(resource) if polygons == None: nc_reference = call(resource=resource, prefix=str(uuid.uuid4()), time_range=time_range, output_format='nc', dir_output=dir_output) else: nc_reference = clipping(resource=resource, prefix=str(uuid.uuid4()), time_range=time_range, output_format='nc', polygons=polygons, dir_output=dir_output, mosaic = mosaic) arr = get_values(resource=nc_reference) dt_arr = get_time(resource=nc_reference) arr = ma.masked_array(arr) dt_arr = ma.masked_array(dt_arr) percentile = percentile window_width = 5 for indice in indices: name = indice.replace('_', str(percentile)) var = indice.split('_')[0] operation = None if 'T' in var: if percentile >= 50: operation = 'Icclim%s90p' % var func = 'icclim_%s90p' % var # icclim_TG90p else: operation = 'Icclim%s10p' % var func = 'icclim_%s10p' % var ################################ # load the appropriate operation ################################ ops = [op for op in dir(lic) if operation in op] if len(ops) == 0: raise Exception("operator does not exist %s", operation) exec "percentile_dict = lic.%s.get_percentile_dict(arr, dt_arr, percentile, window_width)" % ops[0] calc = [{'func': func, 'name': name, 'kwds': {'percentile_dict': percentile_dict}}] if polygons == None: nc_indices.append(call(resource=resource, prefix=key.replace(variable,name).replace('_day_', '_%s_' % grouping), calc=calc, calc_grouping=calc_group, output_format='nc', dir_output=dir_output)) else: nc_indices.extend(clipping(resource=resource, prefix=key.replace(variable,name).replace('_day_', '_%s_' % grouping), calc=calc, calc_grouping=calc_group, output_format='nc', dir_output=dir_output, polygons=polygons, mosaic = mosaic, )) return nc_indices #def calc_indice_unconventional(resource=[], variable=None, prefix=None, #indices=None, polygons=None, groupings=None, #dir_output=None, dimension_map = None): #""" #Calculates given indices for suitable files in the appropriate time grouping and polygon. #:param resource: list of filenames in data reference syntax (DRS) convention (netcdf) #:param variable: variable name to be selected in the in netcdf file (default=None) #:param indices: list of indices (default ='TGx') #:param polygons: list of polygons (default =None) #:param grouping: indices time aggregation (default='yr') #:param out_dir: output directory for result file (netcdf) #:param dimension_map: optional dimension map if different to standard (default=None) #:return: list of netcdf files with calculated indices. Files are saved into dir_output #""" #from os.path import join, dirname, exists #from os import remove #import uuid #from flyingpigeon import ocgis_module #from flyingpigeon.subset import get_ugid, get_geom #if type(resource) != list: #resource = list([resource]) #if type(indices) != list: #indices = list([indices]) #if type(polygons) != list and polygons != None: #polygons = list([polygons]) #elif polygons == None: #polygons = [None] #else: #logger.error('Polygons not found') #if type(groupings) != list: #groupings = list([groupings]) #if dir_output != None: #if not exists(dir_output): #makedirs(dir_output) #experiments = sort_by_filename(resource) #outputs = [] #print('environment for calc_indice_unconventional set') #logger.info('environment for calc_indice_unconventional set') #for key in experiments: #if variable == None: #variable = get_variable(experiments[key][0]) #try: #ncs = experiments[key] #for indice in indices: #logger.info('indice: %s' % indice) #try: #for grouping in groupings: #logger.info('grouping: %s' % grouping) #try: #calc_group = calc_grouping(grouping) #logger.info('calc_group: %s' % calc_group) #for polygon in polygons: #try: #domain = key.split('_')[1].split('-')[0] #if polygon == None: #if prefix == None: #prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ) #geom = None #ugid = None #else: #if prefix == None: #prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ).replace(domain,polygon) #geom = get_geom(polygon=polygon) #ugid = get_ugid(polygons=polygon, geom=geom) #if indice == 'TGx': #calc=[{'func': 'max', 'name': 'TGx'}] #tmp = ocgis_module.call(resource=ncs,# conform_units_to='celcius', #variable=variable, dimension_map=dimension_map, #calc=calc, calc_grouping=calc_group, prefix=prefix, #dir_output=dir_output, geom=geom, select_ugid=ugid) #elif indice == 'TGn': #calc=[{'func': 'min', 'name': 'TGn'}] #tmp = ocgis_module.call(resource=ncs, #conform_units_to='celcius', #variable=variable, dimension_map=dimension_map, #calc=calc, calc_grouping= calc_group, prefix=prefix, #dir_output=dir_output, geom=geom, select_ugid = ugid) #elif indice == 'TGx5day': #calc = [{'func': 'moving_window', 'name': 'TGx5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}] #tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius', #variable=variable, dimension_map=dimension_map, #calc=calc, prefix=str(uuid.uuid4()), #geom=geom, select_ugid = ugid) #calc=[{'func': 'max', 'name': 'TGx5day'}] #logger.info('moving window calculated : %s' % tmp2) #tmp = ocgis_module.call(resource=tmp2, #variable=indice, dimension_map=dimension_map, #calc=calc, calc_grouping=calc_group, prefix=prefix, #dir_output=dir_output) #remove(tmp2) #elif indice == 'TGn5day': #calc = [{'func': 'moving_window', 'name': 'TGn5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}] #tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius', #variable=variable, dimension_map=dimension_map, #calc=calc, prefix=str(uuid.uuid4()), #geom=geom, select_ugid = ugid) #calc=[{'func': 'min', 'name': 'TGn5day'}] #logger.info('moving window calculated : %s' % tmp2) #tmp = ocgis_module.call(resource=tmp2, #variable=indice, dimension_map=dimension_map, #calc=calc, calc_grouping=calc_group, prefix=prefix, #dir_output=dir_output) #remove(tmp2) #else: #logger.error('Indice %s is not a known inidce' % (indice)) #outputs.append(tmp) #logger.info('indice file calcualted %s ' % (tmp)) #except Exception as e: #logger.debug('could not calc indice %s for key %s, polygon %s and calc_grouping %s : %s' % (indice, key, polygon, grouping, e )) #except Exception as e: #logger.debug('could not calc indice %s for key %s and calc_grouping %s : %s' % ( indice, key, polygon, e )) #except Exception as e: #logger.debug('could not calc indice %s for key %s: %s'% (indice, key, e )) #except Exception as e: #logger.debug('could not calc key %s: %s' % (key, e)) #return outputs
def _handler(self, request, response): ocgis.env.DIR_OUTPUT = tempfile.mkdtemp(dir=os.getcwd()) ocgis.env.OVERWRITE = True tic = dt.now() init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') response.update_status('Execution started at : {}'.format(tic), 1) ###################################### # Read inputs ###################################### try: candidate = archiveextract( resource=rename_complexinputs(request.inputs['candidate'])) target = archiveextract( resource=rename_complexinputs(request.inputs['target'])) location = request.inputs['location'][0].data indices = [el.data for el in request.inputs['indices']] dist = request.inputs['dist'][0].data dateStartCandidate = request.inputs['dateStartCandidate'][0].data dateEndCandidate = request.inputs['dateEndCandidate'][0].data dateStartTarget = request.inputs['dateStartTarget'][0].data dateEndTarget = request.inputs['dateEndTarget'][0].data except Exception as ex: msg = 'Failed to read input parameter {}'.format(ex) LOGGER.error(msg) raise Exception(msg) response.update_status('Input parameters ingested', 2) ###################################### # Process inputs ###################################### try: point = Point(*map(float, location.split(','))) dateStartCandidate = dt.strptime(dateStartCandidate, '%Y-%m-%d') dateEndCandidate = dt.strptime(dateEndCandidate, '%Y-%m-%d') dateStartTarget = dt.strptime(dateStartTarget, '%Y-%m-%d') dateEndTarget = dt.strptime(dateEndTarget, '%Y-%m-%d') except Exception as ex: msg = 'failed to process inputs {}'.format(ex) LOGGER.error(msg) raise Exception(msg) LOGGER.debug("init took {}".format(dt.now() - tic)) response.update_status('Processed input parameters', 3) ###################################### # Extract target time series ###################################### savetarget = False try: # Using `call` creates a netCDF file in the tmp directory. # # Here we keep this stuff in memory if savetarget: prefix = 'target_ts' target_ts = call(resource=target, geom=point, variable=indices, time_range=[dateStartTarget, dateEndTarget], select_nearest=True, prefix=prefix) # target_ts = [get_values(prefix+'.nc', ind) for ind in indices] else: trd = RequestDataset( target, variable=indices, time_range=[dateStartTarget, dateEndTarget]) op = OcgOperations(trd, geom=point, select_nearest=True, search_radius_mult=1.75) out = op.execute() target_ts = out.get_element() except Exception as ex: msg = 'Target extraction failed {}'.format(ex) LOGGER.debug(msg) raise Exception(msg) response.update_status('Extracted target series', 5) ###################################### # Compute dissimilarity metric ###################################### response.update_status('Computing spatial analog', 6) try: output = call( resource=candidate, calc=[{ 'func': 'dissimilarity', 'name': 'spatial_analog', 'kwds': { 'dist': dist, 'target': target_ts, 'candidate': indices } }], time_range=[dateStartCandidate, dateEndCandidate], ) except Exception as ex: msg = 'Spatial analog failed: {}'.format(ex) LOGGER.exception(msg) raise Exception(msg) add_metadata(output, dist=dist, indices=",".join(indices), target_location=location, candidate_time_range="{},{}".format( dateStartCandidate, dateEndCandidate), target_time_range="{},{}".format(dateStartTarget, dateEndTarget)) response.update_status('Computed spatial analog', 95) response.outputs['output_netcdf'].file = output response.update_status('Execution completed', 100) LOGGER.debug("Total execution took {}".format(dt.now() - tic)) return response
def get_anomalies(nc_file, frac=0.2, reference=None): """ Anomalisation of data subsets for weather classification by subtracting a smoothed annual cycle :param nc_file: input netCDF file :param frac: Number between 0-1 for strength of smoothing (0 = close to the original data, 1 = flat line) default = 0.2 :param reference: Period to calculate annual cycle :returns str: path to output netCDF file """ try: variable = utils.get_variable(nc_file) calc = [{'func': 'mean', 'name': variable}] calc_grouping = calc_grouping = ['day', 'month'] nc_anual_cycle = call(nc_file, calc=calc, calc_grouping=calc_grouping, time_range=reference) logger.info('annual cycle calculated') except Exception as e: msg = 'failed to calcualte annual cycle %s' % e logger.error(msg) raise Exception(msg) try: # spline for smoothing import statsmodels.api as sm from numpy import tile, empty, linspace from netCDF4 import Dataset from cdo import Cdo cdo = Cdo() # variable = utils.get_variable(nc_file) ds = Dataset(nc_anual_cycle, mode='a') vals = ds.variables[variable] vals_sm = empty(vals.shape) ts = vals.shape[0] x = linspace(1, ts * 3, num=ts * 3, endpoint=True) for lat in range(vals.shape[1]): for lon in range(vals.shape[2]): try: y = tile(vals[:, lat, lon], 3) # ys = smooth(y, window_size=91, order=2, deriv=0, rate=1)[ts:ts*2] ys = sm.nonparametric.lowess(y, x, frac=frac)[ts:ts * 2, 1] vals_sm[:, lat, lon] = ys except: msg = 'failed for lat %s lon %s' % (lat, lon) logger.exception(msg) raise Exception(msg) logger.debug('done for %s - %s ' % (lat, lon)) vals[:, :, :] = vals_sm[:, :, :] ds.close() logger.info('smothing of annual cycle done') except: msg = 'failed smothing of annual cycle' logger.exception(msg) raise Exception(msg) try: ip, nc_anomal = mkstemp(dir='.', suffix='.nc') nc_anomal = cdo.sub(input=[nc_file, nc_anual_cycle], output=nc_anomal) logger.info('cdo.sub; anomalisation done: %s ' % nc_anomal) except: msg = 'failed substraction of annual cycle' logger.exception(msg) raise Exception(msg) return nc_anomal
def execute(self): logger.info('Start process') init_process_logger('log.txt') self.output_log.setValue('log.txt') from datetime import datetime as dt from flyingpigeon import weatherregimes as wr from tempfile import mkstemp ################################ # reading in the input arguments ################################ try: resource = self.getInputValues(identifier='resource') url_Rdat = self.getInputValues(identifier='Rdat')[0] url_dat = self.getInputValues(identifier='dat')[0] url_ref_file = self.getInputValues( identifier='netCDF') # can be None season = self.getInputValues(identifier='season')[0] period = self.getInputValues(identifier='period')[0] anualcycle = self.getInputValues(identifier='anualcycle')[0] except Exception as e: logger.debug('failed to read in the arguments %s ' % e) try: start = dt.strptime(period.split('-')[0], '%Y%m%d') end = dt.strptime(period.split('-')[1], '%Y%m%d') # kappa = int(self.getInputValues(identifier='kappa')[0]) logger.info('period %s' % str(period)) logger.info('season %s' % str(season)) logger.info('read in the arguments') logger.info('url_ref_file: %s' % url_ref_file) logger.info('url_Rdat: %s' % url_Rdat) logger.info('url_dat: %s' % url_dat) except Exception as e: logger.debug('failed to convert arguments %s ' % e) ############################ # fetching trainging data ############################ from flyingpigeon.utils import download, get_time from os.path import abspath try: dat = abspath(download(url_dat)) Rdat = abspath(download(url_Rdat)) logger.info('training data fetched') except Exception as e: logger.error('failed to fetch training data %s' % e) ########################################################## # get the required bbox and time region from resource data ########################################################## # from flyingpigeon.weatherregimes import get_level from flyingpigeon.ocgis_module import call from flyingpigeon.utils import get_variable time_range = [start, end] variable = get_variable(resource) if len(url_ref_file) > 0: ref_file = download(url_ref_file[0]) model_subset = call( resource=resource, variable=variable, time_range= time_range, # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap', regrid_destination=ref_file, regrid_options='bil') logger.info('Dataset subset with regridding done: %s ' % model_subset) else: model_subset = call( resource=resource, variable=variable, time_range= time_range, # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap', ) logger.info('Dataset time period extracted: %s ' % model_subset) ####################### # computing anomalies ####################### cycst = anualcycle.split('-')[0] cycen = anualcycle.split('-')[0] reference = [ dt.strptime(cycst, '%Y%m%d'), dt.strptime(cycen, '%Y%m%d') ] model_anomal = wr.get_anomalies(model_subset, reference=reference) ##################### # extracting season ##################### model_season = wr.get_season(model_anomal, season=season) ####################### # call the R scripts ####################### import shlex import subprocess from flyingpigeon import config from os.path import curdir, exists, join try: rworkspace = curdir Rsrc = config.Rsrc_dir() Rfile = 'weatherregimes_projection.R' yr1 = start.year yr2 = end.year time = get_time(model_season, format='%Y%m%d') # ip, output_graphics = mkstemp(dir=curdir ,suffix='.pdf') ip, file_pca = mkstemp(dir=curdir, suffix='.txt') ip, file_class = mkstemp(dir=curdir, suffix='.Rdat') ip, output_frec = mkstemp(dir=curdir, suffix='.txt') args = [ 'Rscript', join(Rsrc, Rfile), '%s/' % curdir, '%s/' % Rsrc, '%s' % model_season, '%s' % variable, '%s' % str(time).strip("[]").replace("'", "").replace(" ", ""), # '%s' % output_graphics, '%s' % dat, '%s' % Rdat, '%s' % file_pca, '%s' % file_class, '%s' % output_frec, '%s' % season, '%s' % start.year, '%s' % end.year, '%s' % 'MODEL' ] logger.info('Rcall builded') except Exception as e: msg = 'failed to build the R command %s' % e logger.error(msg) raise Exception(msg) try: output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() # , shell=True logger.info('R outlog info:\n %s ' % output) logger.debug('R outlog errors:\n %s ' % error) if len(output) > 0: self.status.set('**** weatherregime in R suceeded', 90) else: logger.error('NO! output returned from R call') except Exception as e: msg = 'weatherregime in R %s ' % e logger.error(msg) raise Exception(msg) ################# # set the outputs ################# # self.Routput_graphic.setValue( output_graphics ) self.output_pca.setValue(file_pca) self.output_classification.setValue(file_class) self.output_netcdf.setValue(model_season) self.output_frequency.setValue(output_frec)
def clipping(resource=[], variable=None, dimension_map=None, calc=None, output_format='nc', calc_grouping=None, time_range=None, time_region=None, historical_concatination=True, prefix=None, spatial_wrapping='wrap', polygons=None, mosaic=False, dir_output=None, memory_limit=None): """ returns list of clipped netCDF files :param resource: list of input netCDF files :param variable: variable (string) to be used in netCDF :param dimesion_map: specify a dimension map if input netCDF has unconventional dimension :param calc: ocgis calculation argument :param calc_grouping: ocgis calculation grouping :param historical_concatination: concat files of RCPs with appropriate historical runs into one timeseries :param prefix: prefix for output file name :param polygons: list of polygons to be used. If more than 1 in the list, an appropriate mosaic will be clipped :param mosaic: Whether the polygons are aggregated into a single geometry (True) or individual files are created for each geometry (False). :param output_format: output_format (default='nc') :param dir_output: specify an output location :param time_range: [start, end] of time subset :param time_region: year, months or days to be extracted in the timeseries :returns list: path to clipped files """ if type(resource) != list: resource = list([resource]) if type(polygons) != list: polygons = list([polygons]) if prefix is not None: if type(prefix) != list: prefix = list([prefix]) geoms = set() ncs = sort_by_filename(resource, historical_concatination=historical_concatination ) # historical_concatenation=True geom_files = [] if mosaic is True: try: nameadd = '_' for polygon in polygons: geoms.add(get_geom(polygon)) nameadd = nameadd + polygon.replace(' ', '') if len(geoms) > 1: LOGGER.error( 'polygons belong to different shapefiles! mosaic option is not possible %s', geoms) else: geom = geoms.pop() ugids = get_ugid(polygons=polygons, geom=geom) except: LOGGER.exception('geom identification failed') for i, key in enumerate(ncs.keys()): try: # if variable is None: variable = get_variable(ncs[key]) LOGGER.info('variable %s detected in resource' % (variable)) if prefix is None: name = key + nameadd else: name = prefix[i] geom_file = call(resource=ncs[key], variable=variable, calc=calc, calc_grouping=calc_grouping, output_format=output_format, prefix=name, geom=geom, select_ugid=ugids, time_range=time_range, time_region=time_region, spatial_wrapping=spatial_wrapping, memory_limit=memory_limit, dir_output=dir_output, dimension_map=dimension_map) geom_files.append(geom_file) LOGGER.info('ocgis mosaik clipping done for %s ' % (key)) except: msg = 'ocgis mosaik clipping failed for %s ' % (key) LOGGER.exception(msg) else: for i, polygon in enumerate(polygons): try: geom = get_geom(polygon) ugid = get_ugid(polygons=polygon, geom=geom) for key in ncs.keys(): try: # if variable is None: variable = get_variable(ncs[key]) LOGGER.info('variable %s detected in resource' % (variable)) if prefix is None: name = key + '_' + polygon.replace(' ', '') else: name = prefix[i] geom_file = call( resource=ncs[key], variable=variable, calc=calc, calc_grouping=calc_grouping, output_format=output_format, prefix=name, geom=geom, select_ugid=ugid, dir_output=dir_output, dimension_map=dimension_map, spatial_wrapping=spatial_wrapping, memory_limit=memory_limit, time_range=time_range, time_region=time_region, ) geom_files.append(geom_file) LOGGER.info('ocgis clipping done for %s ' % (key)) except: msg = 'ocgis clipping failed for %s ' % (key) LOGGER.exception(msg) except: LOGGER.exception('geom identification failed') return geom_files
def get_data(variable, resource = None, polygons=None, dir_output=None, start = 1950, end = 2014): from os import rename, path, makedirs from flyingpigeon import utils from flyingpigeon import subset as sb from flyingpigeon import ocgis_module as om try: # ocgis.env.OVERWRITE=True # ocgis.env.DIR_SHPCABINET = config.shapefiles_dir() # geoms = sb.get_geom() # sci = ShpCabinetIterator(geoms) if dir_output != None and path.exists(dir_output) == False: makedirs(dir_output) if polygons != None: geom = sb.get_geom(polygon=polygons[0:1]) ugid = sb.get_ugid(polygons=polygons, geom= geom) else: ugid = None geom = None if resource == None: resource = get_url(variable) dimension_map = {'X': {'variable': 'Actual_longitude', 'dimension': 'x', 'pos': 2}, 'Y': {'variable': 'Actual_latitude', 'dimension': 'y', 'pos': 1}, 'T': {'variable': 'time', 'dimension': 'time', 'pos': 0 }} time_region = {'year': range(start,end+1)} if variable == 'tg': var = 'tas' unit = 'K' elif variable == 'tn': var = 'tasmin' unit = 'K' elif variable == 'tx': var = 'tasmax' unit = 'K' elif variable == 'rr': var = 'pr' unit = 'kg m-2 s-1' prefix = path.split(resource)[1].replace(variable,var).replace('.nc', '') logger.info('processing variable %s' % (var)) except Exception as e: logger.exception('could not set processing environment') raise if variable == 'rr': try: calc = 'rr=rr/84600' EOBS_file = om.call(resource=resource, variable=variable, memory_limit=450, dimension_map=dimension_map, prefix=prefix, calc=calc, geom=geom, select_ugid=ugid, dir_output=dir_output, time_region = time_region) except Exception as e: logger.error('ocgis failed for rr with url : %s' %(resource)) else: try: unit = 'K' EOBS_file = om.call(resource=resource, variable=variable, memory_limit=450, dimension_map=dimension_map, conform_units_to=unit , prefix=prefix, geom=geom, select_ugid=ugid, dir_output=dir_output, time_region = time_region) except Exception as e: logger.exception('ocgis failed for tg, tx or tn') try: if polygons == None: domain = att_dict['CORDEX_domain'] else: domain = att_dict['CORDEX_domain'].replace('EUR', polygons) EOBS_filename = '%s_%s_%s_%s_%s_%s_%s_%s_%s-%s.nc' % (var, domain, att_dict['driving_model_id'], att_dict['experiment_id'], att_dict['driving_model_ensemble_member'], att_dict['model_id'], att_dict['rcm_version_id'], att_dict['frequency'], start, end) fpath, basename = path.split(EOBS_file) set_attributes(EOBS_file, variable) rename(EOBS_file, path.join(fpath, EOBS_filename)) except Exception as e: logger.exception('attributes not set for : %s' % EOBS_file) return path.join(fpath, EOBS_filename)
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') response.update_status('execution started at : {}'.format(dt.now()), 5) process_start_time = time.time() # measure process execution time ... start_time = time.time() # measure init ... ################################ # reading in the input arguments ################################ response.update_status('execution started at : %s ' % dt.now(), 5) start_time = time.time() # measure init ... ################################ # reading in the input arguments ################################ try: response.update_status('read input parameter : %s ' % dt.now(), 5) resource = archiveextract(resource=rename_complexinputs(request.inputs['resource'])) refSt = request.inputs['refSt'][0].data refEn = request.inputs['refEn'][0].data dateSt = request.inputs['dateSt'][0].data dateEn = request.inputs['dateEn'][0].data seasonwin = request.inputs['seasonwin'][0].data nanalog = request.inputs['nanalog'][0].data # bbox = [-80, 20, 50, 70] # TODO: Add checking for wrong cordinates and apply default if nesessary #level = 500 level = request.inputs['level'][0].data if (level == 500): dummylevel = 1000 # dummy workaround for cdo sellevel else: dummylevel = 500 LOGGER.debug('LEVEL selected: %s hPa' % (level)) bbox=[] bboxStr = request.inputs['BBox'][0].data bboxStr = bboxStr.split(',') #for i in bboxStr: bbox.append(int(i)) bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) LOGGER.debug('BBOX for ocgis: %s ' % (bbox)) LOGGER.debug('BBOX original: %s ' % (bboxStr)) # if bbox_obj is not None: # LOGGER.info("bbox_obj={0}".format(bbox_obj.coords)) # bbox = [bbox_obj.coords[0][0], # bbox_obj.coords[0][1], # bbox_obj.coords[1][0], # bbox_obj.coords[1][1]] # LOGGER.info("bbox={0}".format(bbox)) # else: # bbox = None # region = self.getInputValues(identifier='region')[0] # bbox = [float(b) for b in region.split(',')] # bbox_obj = self.BBox.getValue() normalize = request.inputs['normalize'][0].data distance = request.inputs['dist'][0].data outformat = request.inputs['outformat'][0].data timewin = request.inputs['timewin'][0].data # model_var = request.inputs['reanalyses'][0].data # model, var = model_var.split('_') # experiment = self.getInputValues(identifier='experiment')[0] # dataset, var = experiment.split('_') # LOGGER.info('environment set') LOGGER.info('input parameters set') response.update_status('Read in and convert the arguments', 5) except Exception as e: msg = 'failed to read input prameter %s ' % e LOGGER.error(msg) raise Exception(msg) ###################################### # convert types and set environment ###################################### try: # refSt = dt.strptime(refSt[0], '%Y-%m-%d') # refEn = dt.strptime(refEn[0], '%Y-%m-%d') # dateSt = dt.strptime(dateSt[0], '%Y-%m-%d') # dateEn = dt.strptime(dateEn[0], '%Y-%m-%d') #not nesessary if fix ocgis_module.py refSt = dt.combine(refSt,dt_time(12,0)) refEn = dt.combine(refEn,dt_time(12,0)) dateSt = dt.combine(dateSt,dt_time(12,0)) dateEn = dt.combine(dateEn,dt_time(12,0)) # refSt = refSt.replace(hour=12) # refEn = refEn.replace(hour=12) # dateSt = dateSt.replace(hour=12) # dateEn = dateEn.replace(hour=12) if normalize == 'None': seacyc = False else: seacyc = True if outformat == 'ascii': outformat = '.txt' elif outformat == 'netCDF': outformat = '.nc' else: LOGGER.error('output format not valid') start = min(refSt, dateSt) end = max(refEn, dateEn) # if bbox_obj is not None: # LOGGER.info("bbox_obj={0}".format(bbox_obj.coords)) # bbox = [bbox_obj.coords[0][0], # bbox_obj.coords[0][1], # bbox_obj.coords[1][0], # bbox_obj.coords[1][1]] # LOGGER.info("bbox={0}".format(bbox)) # else: # bbox = None LOGGER.info('environment set') except Exception as e: msg = 'failed to set environment %s ' % e LOGGER.error(msg) raise Exception(msg) LOGGER.debug("init took %s seconds.", time.time() - start_time) response.update_status('Read in and convert the arguments', 5) ######################## # input data preperation ######################## # TODO: Check if files containing more than one dataset response.update_status('Start preparing input data', 12) start_time = time.time() # mesure data preperation ... try: # TODO: Add selection of the level. maybe bellow in call(..., level_range=[...,...]) if type(resource) == list: #resource.sort() resource = sorted(resource, key=lambda i: path.splitext(path.basename(i))[0]) else: resource=[resource] #=============================================================== # TODO: REMOVE resources which are out of interest from the list # (years > and < than requested for calculation) tmp_resource = [] for re in resource: s,e = get_timerange(re) tmpSt = dt.strptime(s,'%Y%m%d') tmpEn = dt.strptime(e,'%Y%m%d') if ((tmpSt <= end ) and (tmpEn >= start)): tmp_resource.append(re) LOGGER.debug('Selected file: %s ' % (re)) resource = tmp_resource # =============================================================== #================================================================ # Try to fix memory issue... (ocgis call for files like 20-30 gb... ) # IF 4D - select pressure level before domain cut # # resource properties ds = Dataset(resource[0]) variable = get_variable(resource[0]) var = ds.variables[variable] dims = list(var.dimensions) dimlen = len(dims) try: model_id = ds.getncattr('model_id') except AttributeError: model_id = 'Unknown model' LOGGER.debug('MODEL: %s ' % (model_id)) lev_units = 'hPa' if (dimlen>3) : lev = ds.variables[dims[1]] # actually index [1] need to be detected... assuming zg(time, plev, lat, lon) lev_units = lev.units if (lev_units=='Pa'): level = level*100 dummylevel=dummylevel*100 # TODO: OR check the NAME and units of vertical level and find 200 , 300, or 500 mbar in it # Not just level = level * 100. # Get Levels from cdo import Cdo cdo = Cdo() lev_res=[] if(dimlen>3): for res_fn in resource: tmp_f = 'lev_' + path.basename(res_fn) comcdo = '%s,%s' % (level,dummylevel) cdo.sellevel(comcdo, input=res_fn, output=tmp_f) lev_res.append(tmp_f) else: lev_res = resource # Get domain regr_res=[] for res_fn in lev_res: tmp_f = 'dom_' + path.basename(res_fn) comcdo = '%s,%s,%s,%s' % (bbox[0],bbox[2],bbox[1],bbox[3]) cdo.sellonlatbox(comcdo, input=res_fn, output=tmp_f) regr_res.append(tmp_f) #archive_tmp = call(resource=resource, time_range=[refSt, refEn], geom=bbox, spatial_wrapping='wrap') #simulation_tmp = call(resource=resource, time_range=[dateSt, dateEn], geom=bbox, spatial_wrapping='wrap') #============================ archive_tmp = call(resource=regr_res, time_range=[refSt, refEn], spatial_wrapping='wrap') simulation_tmp = call(resource=regr_res, time_range=[dateSt, dateEn], spatial_wrapping='wrap') ####################################################################################### # TEMORAL dirty workaround to get the level and it's units - will be func in utils.py if (dimlen>3) : archive = get_level(archive_tmp, level = level) simulation = get_level(simulation_tmp,level = level) variable = 'z%s' % level # TODO: here should be modulated else: archive = archive_tmp simulation = simulation_tmp # 3D, move forward ####################################################################################### if seacyc is True: seasoncyc_base, seasoncyc_sim = analogs.seacyc(archive, simulation, method=normalize) else: seasoncyc_base = None seasoncyc_sim = None except Exception as e: msg = 'failed to prepare archive and simulation files %s ' % e LOGGER.debug(msg) raise Exception(msg) ip, output = mkstemp(dir='.', suffix='.txt') output_file = path.abspath(output) files = [path.abspath(archive), path.abspath(simulation), output_file] LOGGER.debug("data preperation took %s seconds.", time.time() - start_time) ############################ # generating the config file ############################ # TODO: add MODEL name as argument response.update_status('writing config file', 15) start_time = time.time() # measure write config ... try: config_file = analogs.get_configfile( files=files, seasoncyc_base=seasoncyc_base, seasoncyc_sim=seasoncyc_sim, base_id=model_id, sim_id=model_id, timewin=timewin, varname=variable, seacyc=seacyc, cycsmooth=91, nanalog=nanalog, seasonwin=seasonwin, distfun=distance, outformat=outformat, calccor=True, silent=False, period=[dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d')], bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3])) except Exception as e: msg = 'failed to generate config file %s ' % e LOGGER.debug(msg) raise Exception(msg) LOGGER.debug("write_config took %s seconds.", time.time() - start_time) ############## # CASTf90 call ############## import subprocess import shlex start_time = time.time() # measure call castf90 response.update_status('Start CASTf90 call', 20) try: # response.update_status('execution of CASTf90', 50) cmd = 'analogue.out %s' % path.relpath(config_file) # system(cmd) args = shlex.split(cmd) output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('analogue.out info:\n %s ' % output) LOGGER.debug('analogue.out errors:\n %s ' % error) response.update_status('**** CASTf90 suceeded', 70) except Exception as e: msg = 'CASTf90 failed %s ' % e LOGGER.error(msg) raise Exception(msg) LOGGER.debug("castf90 took %s seconds.", time.time() - start_time) response.update_status('preparing output', 70) response.outputs['config'].file = config_file #config_output_url # config_file ) response.outputs['analogs'].file = output_file response.outputs['output_netcdf'].file = simulation ######################## # generate analog viewer ######################## formated_analogs_file = analogs.reformat_analogs(output_file) # response.outputs['formated_analogs'].storage = FileStorage() response.outputs['formated_analogs'].file = formated_analogs_file LOGGER.info('analogs reformated') response.update_status('reformatted analog file', 80) viewer_html = analogs.render_viewer( # configfile=response.outputs['config'].get_url(), configfile=config_file, # datafile=response.outputs['formated_analogs'].get_url()) datafile=formated_analogs_file) response.outputs['output'].file = viewer_html response.update_status('Successfully generated analogs viewer', 90) LOGGER.info('rendered pages: %s ', viewer_html) response.update_status('execution ended', 100) LOGGER.debug("total execution took %s seconds.", time.time() - process_start_time) return response
def execute(self): import time # performance test process_start_time = time.time() # measure process execution time ... from os import path from tempfile import mkstemp from flyingpigeon import analogs from datetime import datetime as dt from flyingpigeon.ocgis_module import call from flyingpigeon.datafetch import reanalyses from flyingpigeon.utils import get_variable, rename_variable self.status.set('execution started at : %s ' % dt.now(),5) start_time = time.time() # measure init ... resource = self.getInputValues(identifier='resource') bbox_obj = self.BBox.getValue() refSt = self.getInputValues(identifier='refSt') refEn = self.getInputValues(identifier='refEn') dateSt = self.getInputValues(identifier='dateSt') dateEn = self.getInputValues(identifier='dateEn') normalize = self.getInputValues(identifier='normalize')[0] distance = self.getInputValues(identifier='dist')[0] outformat = self.getInputValues(identifier='outformat')[0] timewin = int(self.getInputValues(identifier='timewin')[0]) experiment = self.getInputValues(identifier='experiment')[0] dataset , var = experiment.split('_') refSt = dt.strptime(refSt[0],'%Y-%m-%d') refEn = dt.strptime(refEn[0],'%Y-%m-%d') dateSt = dt.strptime(dateSt[0],'%Y-%m-%d') dateEn = dt.strptime(dateEn[0],'%Y-%m-%d') if normalize == 'None': seacyc = False else: seacyc = True if outformat == 'ascii': outformat = '.txt' elif outformat == 'netCDF': outformat = '.nc' else: logger.error('output format not valid') if bbox_obj is not None: logger.info("bbox_obj={0}".format(bbox_obj.coords)) bbox = [bbox_obj.coords[0][0], bbox_obj.coords[0][1],bbox_obj.coords[1][0],bbox_obj.coords[1][1]] logger.info("bbox={0}".format(bbox)) else: bbox=None #start = min( refSt, dateSt ) #end = max( refEn, dateEn ) # region = self.getInputValues(identifier='region')[0] # bbox = [float(b) for b in region.split(',')] try: if dataset == 'NCEP': if 'z' in var: variable='hgt' level=var.strip('z') #conform_units_to=None else: variable='slp' level=None #conform_units_to='hPa' elif '20CRV2' in var: if 'z' in level: variable='hgt' level=var.strip('z') #conform_units_to=None else: variable='prmsl' level=None #conform_units_to='hPa' else: logger.error('Reanalyses dataset not known') logger.info('environment set') except Exception as e: msg = 'failed to set environment %s ' % e logger.error(msg) raise Exception(msg) logger.debug("init took %s seconds.", time.time() - start_time) self.status.set('Read in the arguments', 5) ################# # get input data ################# start_time = time.time() # measure get_input_data ... self.status.set('fetching input data', 7) try: input = reanalyses(start = dateSt.year, end = dateEn.year, variable=var, dataset=dataset) nc_subset = call(resource=input, variable=var, geom=bbox) except Exception as e : msg = 'failed to fetch or subset input files %s' % e logger.error(msg) raise Exception(msg) logger.debug("get_input_subset_dataset took %s seconds.", time.time() - start_time) self.status.set('**** Input data fetched', 10) ######################## # input data preperation ######################## self.status.set('Start preparing input data', 12) start_time = time.time() # mesure data preperation ... try: self.status.set('Preparing simulation data', 15) simulation = call(resource=nc_subset, time_range=[dateSt , dateEn]) except: msg = 'failed to prepare simulation period' logger.debug(msg) try: self.status.set('Preparing target data', 17) var_target = get_variable(resource) #var_simulation = get_variable(simulation) archive = call(resource=resource, variable=var_target, time_range=[refSt , refEn], geom=bbox, t_calendar='standard',# conform_units_to=conform_units_to, spatial_wrapping='wrap', regrid_destination=simulation, regrid_options='bil') except Exception as e: msg = 'failed subset archive dataset %s ' % e logger.debug(msg) raise Exception(msg) try: if var != var_target: rename_variable(archive, oldname=var_target, newname=var) logger.info('varname %s in netCDF renamed to %s' %(var_target, var)) except Exception as e: msg = 'failed to rename variable in target files %s ' % e logger.debug(msg) raise Exception(msg) try: if seacyc == True: seasoncyc_base , seasoncyc_sim = analogs.seacyc(archive, simulation, method=normalize) else: seasoncyc_base , seasoncyc_sim = None except Exception as e: msg = 'failed to prepare seasonal cycle reference files %s ' % e logger.debug(msg) raise Exception(msg) ip, output = mkstemp(dir='.',suffix='.txt') output_file = path.abspath(output) files=[path.abspath(archive), path.abspath(simulation), output_file] logger.debug("data preperation took %s seconds.", time.time() - start_time) ############################ # generating the config file ############################ self.status.set('writing config file', 15) start_time = time.time() # measure write config ... try: config_file = analogs.get_configfile( files=files, seasoncyc_base = seasoncyc_base, seasoncyc_sim = seasoncyc_sim, timewin=timewin, varname=var, seacyc=seacyc, cycsmooth=91, nanalog=nanalog, seasonwin=seasonwin, distfun=distance, outformat=outformat, calccor=True, silent=False, period=[dt.strftime(refSt,'%Y-%m-%d'),dt.strftime(refEn,'%Y-%m-%d')], bbox="%s,%s,%s,%s" % (bbox[0],bbox[2],bbox[1],bbox[3])) except Exception as e: msg = 'failed to generate config file %s ' % e logger.debug(msg) raise Exception(msg) logger.debug("write_config took %s seconds.", time.time() - start_time) ####################### # CASTf90 call ####################### import subprocess import shlex start_time = time.time() # measure call castf90 self.status.set('Start CASTf90 call', 20) try: #self.status.set('execution of CASTf90', 50) cmd = 'analogue.out %s' % path.relpath(config_file) #system(cmd) args = shlex.split(cmd) output,error = subprocess.Popen(args, stdout = subprocess.PIPE, stderr= subprocess.PIPE).communicate() logger.info('analogue.out info:\n %s ' % output) logger.debug('analogue.out errors:\n %s ' % error) self.status.set('**** CASTf90 suceeded', 90) except Exception as e: msg = 'CASTf90 failed %s ' % e logger.error(msg) raise Exception(msg) logger.debug("castf90 took %s seconds.", time.time() - start_time) self.status.set('preparting output', 99) self.config.setValue( config_file ) self.analogs.setValue( output_file ) self.simulation_netcdf.setValue( simulation ) self.target_netcdf.setValue( archive ) self.status.set('execution ended', 100) logger.debug("total execution took %s seconds.", time.time() - process_start_time)
def execute(self): init_process_logger('log.txt') self.output_log.setValue('log.txt') import time # performance test process_start_time = time.time() # measure process execution time ... from os import path from tempfile import mkstemp from datetime import datetime as dt from flyingpigeon import analogs from flyingpigeon.ocgis_module import call from flyingpigeon.datafetch import reanalyses from flyingpigeon.utils import get_variable self.status.set('execution started at : %s ' % dt.now(), 5) start_time = time.time() # measure init ... ####################### # read input parameters ####################### try: self.status.set('read input parameter : %s ' % dt.now(), 5) resource = self.getInputValues(identifier='resource') refSt = self.getInputValues(identifier='refSt') refEn = self.getInputValues(identifier='refEn') dateSt = self.getInputValues(identifier='dateSt') dateEn = self.getInputValues(identifier='dateEn') normalize = self.getInputValues(identifier='normalize')[0] distance = self.getInputValues(identifier='dist')[0] outformat = self.getInputValues(identifier='outformat')[0] timewin = int(self.getInputValues(identifier='timewin')[0]) bbox_obj = self.BBox.getValue() seasonwin = int(self.getInputValues(identifier='seasonwin')[0]) nanalog = int(self.getInputValues(identifier='nanalog')[0]) # region = self.getInputValues(identifier='region')[0] # bbox = [float(b) for b in region.split(',')] # experiment = self.getInputValues(identifier='experiment')[0] # dataset , var = experiment.split('_') logger.info('input parameters set') except Exception as e: msg = 'failed to read input prameter %s ' % e logger.error(msg) raise Exception(msg) ###################################### # convert types and set environment ###################################### try: refSt = dt.strptime(refSt[0], '%Y-%m-%d') refEn = dt.strptime(refEn[0], '%Y-%m-%d') dateSt = dt.strptime(dateSt[0], '%Y-%m-%d') dateEn = dt.strptime(dateEn[0], '%Y-%m-%d') if normalize == 'None': seacyc = False else: seacyc = True if outformat == 'ascii': outformat = '.txt' elif outformat == 'netCDF': outformat = '.nc' else: logger.error('output format not valid') start = min(refSt, dateSt) end = max(refEn, dateEn) if bbox_obj is not None: logger.info("bbox_obj={0}".format(bbox_obj.coords)) bbox = [bbox_obj.coords[0][0], bbox_obj.coords[0][1], bbox_obj.coords[1][0], bbox_obj.coords[1][1]] logger.info("bbox={0}".format(bbox)) else: bbox = None logger.info('environment set') except Exception as e: msg = 'failed to set environment %s ' % e logger.error(msg) raise Exception(msg) logger.debug("init took %s seconds.", time.time() - start_time) self.status.set('Read in and convert the arguments', 5) ######################## # input data preperation ######################## # TODO: Check if files containing more than one dataset self.status.set('Start preparing input data', 12) start_time = time.time() # mesure data preperation ... try: variable = get_variable(resource) archive = call(resource=resource, time_range=[refSt, refEn], geom=bbox, spatial_wrapping='wrap') simulation = call(resource=resource, time_range=[dateSt, dateEn], geom=bbox, spatial_wrapping='wrap') if seacyc is True: seasoncyc_base, seasoncyc_sim = analogs.seacyc(archive, simulation, method=normalize) else: seasoncyc_base = None seasoncyc_sim = None except Exception as e: msg = 'failed to prepare archive and simulation files %s ' % e logger.debug(msg) raise Exception(msg) ip, output = mkstemp(dir='.', suffix='.txt') output_file = path.abspath(output) files = [path.abspath(archive), path.abspath(simulation), output_file] logger.debug("data preperation took %s seconds.", time.time() - start_time) ############################ # generating the config file ############################ self.status.set('writing config file', 15) start_time = time.time() # measure write config ... try: config_file = analogs.get_configfile( files=files, seasoncyc_base=seasoncyc_base, seasoncyc_sim=seasoncyc_sim, timewin=timewin, varname=variable, seacyc=seacyc, cycsmooth=91, nanalog=nanalog, seasonwin=seasonwin, distfun=distance, outformat=outformat, calccor=True, silent=False, period=[dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d')], bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3])) except Exception as e: msg = 'failed to generate config file %s ' % e logger.debug(msg) raise Exception(msg) logger.debug("write_config took %s seconds.", time.time() - start_time) ############## # CASTf90 call ############## import subprocess import shlex start_time = time.time() # measure call castf90 self.status.set('Start CASTf90 call', 20) try: # self.status.set('execution of CASTf90', 50) cmd = 'analogue.out %s' % path.relpath(config_file) # system(cmd) args = shlex.split(cmd) output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() logger.info('analogue.out info:\n %s ' % output) logger.debug('analogue.out errors:\n %s ' % error) self.status.set('**** CASTf90 suceeded', 90) except Exception as e: msg = 'CASTf90 failed %s ' % e logger.error(msg) raise Exception(msg) logger.debug("castf90 took %s seconds.", time.time() - start_time) self.status.set('preparting output', 99) self.config.setValue(config_file) self.analogs.setValue(output_file) self.output_netcdf.setValue(simulation) self.status.set('execution ended', 100) logger.debug("total execution took %s seconds.", time.time() - process_start_time)
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') from datetime import datetime as dt from flyingpigeon import weatherregimes as wr from tempfile import mkstemp response.update_status('execution started at : {}'.format(dt.now()), 5) ################################ # reading in the input arguments ################################ LOGGER.info('read in the arguments') # resources = self.getInputValues(identifier='resources') season = request.inputs['season'][0].data LOGGER.info('season %s', season) # bbox = [-80, 20, 50, 70] # TODO: Add checking for wrong cordinates and apply default if nesessary bbox = [] bboxStr = request.inputs['BBox'][0].data bboxStr = bboxStr.split(',') bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) LOGGER.debug('BBOX for ocgis: {}'.format(bbox)) LOGGER.debug('BBOX original: {}'.format(bboxStr)) model_var = request.inputs['reanalyses'][0].data model, variable = model_var.split('_') period = request.inputs['period'][0].data LOGGER.info('period: {}'.format(period)) anualcycle = request.inputs['anualcycle'][0].data kappa = request.inputs['kappa'][0].data LOGGER.info('kappa: {}', kappa) method = request.inputs['method'][0].data LOGGER.info('Calc annual cycle with {}'.format(method)) sseas = request.inputs['sseas'][0].data LOGGER.info('Annual cycle calc with {}'.format(sseas)) start = dt.strptime(period.split('-')[0], '%Y%m%d') end = dt.strptime(period.split('-')[1], '%Y%m%d') LOGGER.debug('start: {0}, end: {1}'.format(start, end)) ########################### # set the environment ########################### response.update_status('fetching data from archive', 10) try: if model == 'NCEP': getlevel = False if 'z' in variable: level = variable.strip('z') conform_units_to = None else: level = None conform_units_to = 'hPa' elif '20CRV2' in model: getlevel = False if 'z' in variable: level = variable.strip('z') conform_units_to = None else: level = None conform_units_to = 'hPa' else: LOGGER.exception('Reanalyses dataset not known') LOGGER.info('environment set for model: {}'.format(model)) except Exception as ex: msg = 'failed to set environment: {}'.format(ex) LOGGER.exception(msg) raise Exception(msg) ########################################## # fetch Data from original data archive ########################################## from flyingpigeon.datafetch import reanalyses as rl from flyingpigeon.utils import get_variable # from os.path import basename, splitext from os import system from netCDF4 import Dataset from numpy import squeeze try: model_nc = rl(start=start.year, end=end.year, dataset=model, variable=variable, getlevel=getlevel) LOGGER.info('reanalyses data fetched') except Exception as ex: msg = 'failed to get reanalyses data: {}'.format(ex) LOGGER.exception(msg) raise Exception(msg) response.update_status('fetching data done', 15) ############################################################ # get the required bbox and time region from resource data ############################################################ response.update_status('subsetting region of interest', 17) # from flyingpigeon.weatherregimes import get_level # from flyingpigeon.ocgis_module import call time_range = [start, end] ############################################################ # Block of level and domain selection for geop huge dataset ############################################################ LevMulti = False # =========================================================================================== if 'z' in variable: tmp_total = [] origvar = get_variable(model_nc) if LevMulti == False: for z in model_nc: b0 = call(resource=z, variable=origvar, level_range=[int(level), int(level)], geom=bbox, spatial_wrapping='wrap', prefix='levdom_' + basename(z)[0:-3]) tmp_total.append(b0) else: # multiproc - no inprovements yet, need to check in hi perf machine... # ----------------------- try: import ctypes import os # TODO: This lib is for linux mkl_rt = ctypes.CDLL('libmkl_rt.so') nth = mkl_rt.mkl_get_max_threads() LOGGER.debug('Current number of threads: {}'.format(nth)) mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64))) nth = mkl_rt.mkl_get_max_threads() LOGGER.debug('NEW number of threads: {}'.format(nth)) # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?) os.environ['MKL_NUM_THREADS'] = str(nth) os.environ['OMP_NUM_THREADS'] = str(nth) except Exception as ex: msg = 'Failed to set THREADS: {}'.format(ex) LOGGER.debug(msg) # ----------------------- from multiprocessing import Pool pool = Pool() # from multiprocessing.dummy import Pool as ThreadPool # pool = ThreadPool() tup_var = [origvar] * len(model_nc) tup_lev = [level] * len(model_nc) tup_bbox = [bbox] * len(model_nc) tup_args = zip(model_nc, tup_var, tup_lev, tup_bbox) tmp_total = pool.map(ocgis_call_wrap, tup_args) pool.close() pool.join() LOGGER.debug('Temporal subset files: {}'.format(tmp_total)) tmp_total = sorted(tmp_total, key=lambda i: splitext(basename(i))[0]) inter_subset_tmp = call(resource=tmp_total, variable=origvar, time_range=time_range) # FIXME: System calls to rm are dangerous! Use os.rmdir instead! # Clean for i in tmp_total: tbr = 'rm -f {}'.format(i) system(tbr) # Create new variable ds = Dataset(inter_subset_tmp, mode='a') z_var = ds.variables.pop(origvar) dims = z_var.dimensions new_var = ds.createVariable('z{}'.format(level), z_var.dtype, dimensions=(dims[0], dims[2], dims[3])) new_var[:, :, :] = squeeze(z_var[:, 0, :, :]) # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()}) ds.close() model_subset = call(inter_subset_tmp, variable='z{}'.format(level)) else: model_subset = call( resource=model_nc, variable=variable, geom=bbox, spatial_wrapping='wrap', time_range=time_range, # conform_units_to=conform_units_to ) # ============================================================================================= LOGGER.info('Dataset subset done: {}'.format(model_subset)) response.update_status('dataset subsetted', 18) ############################################## # computing anomalies ############################################## response.update_status('computing anomalies ', 19) cycst = anualcycle.split('-')[0] cycen = anualcycle.split('-')[1] reference = [ dt.strptime(cycst, '%Y%m%d'), dt.strptime(cycen, '%Y%m%d') ] LOGGER.info('reference time: {}'.format(reference)) model_anomal = wr.get_anomalies(model_subset, reference=reference, method=method, sseas=sseas) # , variable=variable) ##################### # extracting season ##################### response.update_status('normalizing data', 21) model_season = wr.get_season(model_anomal, season=season) response.update_status('anomalies computed and normalized', 24) ####################### # call the R scripts ####################### response.update_status('Start weather regime clustering ', 25) import subprocess from flyingpigeon import config from os.path import curdir, join try: rworkspace = curdir Rsrc = config.Rsrc_dir() Rfile = 'weatherregimes_model.R' infile = model_season # model_subset #model_ponderate modelname = model yr1 = start.year yr2 = end.year ip, output_graphics = mkstemp(dir=curdir, suffix='.pdf') ip, file_pca = mkstemp(dir=curdir, suffix='.txt') ip, file_class = mkstemp(dir=curdir, suffix='.Rdat') # TODO: Rewrite this using os.path.join or pathlib libraries args = [ 'Rscript', join(Rsrc, Rfile), '%s/' % curdir, '%s/' % Rsrc, '%s' % infile, '%s' % variable, '%s' % output_graphics, '%s' % file_pca, '%s' % file_class, '%s' % season, '%s' % start.year, '%s' % end.year, '%s' % model_var, '%s' % kappa ] LOGGER.info('Rcall builded') LOGGER.debug('ARGS: %s' % (args)) except Exception as ex: msg = 'failed to build the R command: {}'.format(ex) LOGGER.exception(msg) raise Exception(msg) try: output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n {}'.format(output)) LOGGER.exception('R outlog errors:\n {}'.format(error)) if len(output) > 0: response.update_status('**** weatherregime in R suceeded', 90) else: LOGGER.exception('No output returned from R call') except Exception as ex: msg = 'failed to run the R weatherregime: {}'.format(ex) LOGGER.exception(msg) raise Exception(msg) response.update_status('Weather regime clustering done ', 93) ############################################ # set the outputs ############################################ response.update_status('Set the process outputs ', 95) response.outputs['Routput_graphic'].file = output_graphics response.outputs['output_pca'].file = file_pca response.outputs['output_classification'].file = file_class response.outputs['output_netcdf'].file = model_subset response.update_status('done', 100) return response
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') response.update_status('execution started at : {}'.format(dt.now()), 5) process_start_time = time.time() # measure process execution time ... start_time = time.time() # measure init ... ################################ # reading in the input arguments ################################ try: response.update_status('read input parameter : %s ' % dt.now(), 5) refSt = request.inputs['refSt'][0].data refEn = request.inputs['refEn'][0].data dateSt = request.inputs['dateSt'][0].data dateEn = request.inputs['dateEn'][0].data seasonwin = request.inputs['seasonwin'][0].data nanalog = request.inputs['nanalog'][0].data bbox = [-80, 20, 50, 70] # if bbox_obj is not None: # LOGGER.info("bbox_obj={0}".format(bbox_obj.coords)) # bbox = [bbox_obj.coords[0][0], # bbox_obj.coords[0][1], # bbox_obj.coords[1][0], # bbox_obj.coords[1][1]] # LOGGER.info("bbox={0}".format(bbox)) # else: # bbox = None # region = self.getInputValues(identifier='region')[0] # bbox = [float(b) for b in region.split(',')] # bbox_obj = self.BBox.getValue() normalize = request.inputs['normalize'][0].data distance = request.inputs['dist'][0].data outformat = request.inputs['outformat'][0].data timewin = request.inputs['timewin'][0].data model_var = request.inputs['reanalyses'][0].data model, var = model_var.split('_') # experiment = self.getInputValues(identifier='experiment')[0] # dataset, var = experiment.split('_') # LOGGER.info('environment set') LOGGER.info('input parameters set') response.update_status('Read in and convert the arguments', 5) except Exception as e: msg = 'failed to read input prameter %s ' % e LOGGER.error(msg) raise Exception(msg) ###################################### # convert types and set environment ###################################### try: response.update_status('Preparing enviroment converting arguments', 7) LOGGER.debug('date: %s %s %s %s ' % (type(refSt), refEn, dateSt, dateSt)) start = min(refSt, dateSt) end = max(refEn, dateEn) # # refSt = dt.strftime(refSt, '%Y-%m-%d') # refEn = dt.strftime(refEn, '%Y-%m-%d') # dateSt = dt.strftime(dateSt, '%Y-%m-%d') # dateEn = dt.strftime(dateEn, '%Y-%m-%d') if normalize == 'None': seacyc = False else: seacyc = True if outformat == 'ascii': outformat = '.txt' elif outformat == 'netCDF': outformat = '.nc' else: LOGGER.error('output format not valid') except Exception as e: msg = 'failed to set environment %s ' % e LOGGER.error(msg) raise Exception(msg) ########################### # set the environment ########################### response.update_status('fetching data from archive', 10) try: if model == 'NCEP': if 'z' in var: level = var.strip('z') conform_units_to = None else: level = None conform_units_to = 'hPa' elif '20CRV2' in model: if 'z' in var: level = var.strip('z') conform_units_to = None else: level = None conform_units_to = 'hPa' else: LOGGER.error('Reanalyses dataset not known') LOGGER.info('environment set for model: %s' % model) except: msg = 'failed to set environment' LOGGER.exception(msg) raise Exception(msg) ########################################## # fetch Data from original data archive ########################################## try: model_nc = rl(start=start.year, end=end.year, dataset=model, variable=var) LOGGER.info('reanalyses data fetched') except: msg = 'failed to get reanalyses data' LOGGER.exception(msg) raise Exception(msg) response.update_status('subsetting region of interest', 17) # from flyingpigeon.weatherregimes import get_level LOGGER.debug("start and end time: %s - %s" % (start, end)) time_range = [start, end] model_subset = call(resource=model_nc, variable=var, geom=bbox, spatial_wrapping='wrap', time_range=time_range, # conform_units_to=conform_units_to ) LOGGER.info('Dataset subset done: %s ', model_subset) response.update_status('dataset subsetted', 19) ############################################################ # get the required bbox and time region from resource data ############################################################ # # # try: # if dataset == 'NCEP': # if 'z' in var: # variable = 'hgt' # level = var.strip('z') # # conform_units_to=None # else: # variable = 'slp' # level = None # # conform_units_to='hPa' # elif '20CRV2' in var: # if 'z' in level: # variable = 'hgt' # level = var.strip('z') # # conform_units_to=None # else: # variable = 'prmsl' # level = None # # conform_units_to='hPa' # else: # LOGGER.error('Reanalyses dataset not known') # LOGGER.info('environment set') # except Exception as e: # msg = 'failed to set environment %s ' % e # LOGGER.error(msg) # raise Exception(msg) # # LOGGER.debug("init took %s seconds.", time.time() - start_time) # response.update_status('Read in and convert the arguments done', 8) # # ################# # # get input data # ################# # start_time = time.time() # measure get_input_data ... # response.update_status('fetching input data', 7) # try: # input = reanalyses(start=start.year, end=end.year, # variable=var, dataset=dataset) # LOGGER.info('input files %s' % input) # nc_subset = call(resource=input, variable=var, # geom=bbox, spatial_wrapping='wrap') # except Exception as e: # msg = 'failed to fetch or subset input files %s' % e # LOGGER.error(msg) # raise Exception(msg) LOGGER.debug("get_input_subset_dataset took %s seconds.", time.time() - start_time) response.update_status('**** Input data fetched', 10) ######################## # input data preperation ######################## response.update_status('Start preparing input data', 12) start_time = time.time() # measure data preperation ... try: # Construct descriptive filenames for the three files # listed in config file refDatesString = dt.strftime(refSt, '%Y-%m-%d') + "_" + dt.strftime(refEn, '%Y-%m-%d') simDatesString = dt.strftime(dateSt, '%Y-%m-%d') + "_" + dt.strftime(dateEn, '%Y-%m-%d') archiveNameString = "base_" + var + "_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' \ % (bbox[0], bbox[2], bbox[1], bbox[3]) simNameString = "sim_" + var + "_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' \ % (bbox[0], bbox[2], bbox[1], bbox[3]) archive = call(resource=model_subset, time_range=[refSt, refEn], prefix=archiveNameString) simulation = call(resource=model_subset, time_range=[dateSt, dateEn], prefix=simNameString) LOGGER.info('archive and simulation files generated: %s, %s' % (archive, simulation)) except Exception as e: msg = 'failed to prepare archive and simulation files %s ' % e LOGGER.debug(msg) raise Exception(msg) try: if seacyc is True: LOGGER.info('normalization function with method: %s ' % normalize) seasoncyc_base, seasoncyc_sim = analogs.seacyc( archive, simulation, method=normalize) else: seasoncyc_base = seasoncyc_sim = None except Exception as e: msg = 'failed to generate normalization files %s ' % e LOGGER.debug(msg) raise Exception(msg) ip, output_file = mkstemp(dir='.', suffix='.txt') files = [path.abspath(archive), path.abspath(simulation), output_file] LOGGER.debug("Data preperation took %s seconds.", time.time() - start_time) ############################ # generate the config file ############################ response.update_status('writing config file', 15) start_time = time.time() # measure write config ... try: config_file = analogs.get_configfile( files=files, seasoncyc_base=seasoncyc_base, seasoncyc_sim=seasoncyc_sim, timewin=timewin, varname=var, seacyc=seacyc, cycsmooth=91, nanalog=nanalog, seasonwin=seasonwin, distfun=distance, outformat=outformat, calccor=True, silent=False, period=[dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d')], bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3])) except Exception as e: msg = 'failed to generate config file %s ' % e LOGGER.debug(msg) raise Exception(msg) LOGGER.debug("write_config took %s seconds.", time.time() - start_time) ####################### # CASTf90 call ####################### start_time = time.time() # measure call castf90 response.update_status('Start CASTf90 call', 20) try: # response.update_status('execution of CASTf90', 50) cmd = ['analogue.out', path.relpath(config_file)] LOGGER.debug("castf90 command: %s", cmd) output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) LOGGER.info('analogue output:\n %s', output) response.update_status('**** CASTf90 suceeded', 90) except CalledProcessError as e: msg = 'CASTf90 failed:\n{0}'.format(e.output) LOGGER.error(msg) raise Exception(msg) LOGGER.debug("castf90 took %s seconds.", time.time() - start_time) ######################## # generate analog viewer ######################## response.update_status('preparting output', 50) response.outputs['config'].file = config_file response.outputs['analogs'].file = output_file response.outputs['output_netcdf'].file = simulation try: formated_analogs_file = analogs.reformat_analogs(output_file) response.outputs['formated_analogs'].file = formated_analogs_file LOGGER.info('analogs reformated') response.update_status('Successfully reformatted analog file', 60) except Exception as e: msg = 'Failed to reformat analogs file.' % e LOGGER.error(msg) raise Exception(msg) try: output_av = analogs.get_viewer( formated_analogs_file, path.basename(config_file)) response.outputs['output_html'].file = output_av.name response.update_status('Successfully generated analogs viewer', 90) LOGGER.info('output_av: %s ', output_av) except Exception as e: msg = 'Failed to generate viewer: %s' % e LOGGER.error(msg) raise Exception(msg) response.update_status('execution ended', 100) LOGGER.debug("total execution took %s seconds.", time.time() - process_start_time) return response
def calc_indice_percentile(resources=[], variable=None, prefix=None, indices='TG90p', refperiod=None, groupings='yr', polygons=None, percentile=90, mosaic=False, dir_output=None, dimension_map=None): """ Calculates given indices for suitable files in the appropriate time grouping and polygon. :param resource: list of filenames in data reference syntax (DRS) convention (netcdf) :param variable: variable name to be selected in the in netcdf file (default=None) :param indices: list of indices (default ='TG90p') :param prefix: filename prefix :param refperiod: reference period tuple = (start,end) :param grouping: indices time aggregation (default='yr') :param dir_output: output directory for result file (netcdf) :param dimension_map: optional dimension map if different to standard (default=None) :return: list of netcdf files with calculated indices. Files are saved into out_dir. """ from os.path import join, dirname, exists from os import remove import uuid from numpy import ma from datetime import datetime as dt from flyingpigeon.ocgis_module import call from flyingpigeon.subset import clipping from flyingpigeon.utils import get_values, get_time if type(resources) != list: resources = list([resources]) if type(indices) != list: indices = list([indices]) if type(groupings) != list: groupings = list([groupings]) if type(refperiod) == list: refperiod = refperiod[0] if refperiod is None: start = dt.strptime(refperiod.split('-')[0], '%Y%m%d') end = dt.strptime(refperiod.split('-')[1], '%Y%m%d') time_range = [start, end] else: time_range = None if dir_output is None: if not exists(dir_output): makedirs(dir_output) ################################################ # Compute a custom percentile basis using ICCLIM ################################################ from ocgis.contrib import library_icclim as lic nc_indices = [] nc_dic = sort_by_filename(resources) for grouping in groupings: calc_group = calc_grouping(grouping) for key in nc_dic.keys(): resource = nc_dic[key] if variable is None: variable = get_variable(resource) if polygons is None: nc_reference = call(resource=resource, prefix=str(uuid.uuid4()), time_range=time_range, output_format='nc', dir_output=dir_output) else: nc_reference = clipping(resource=resource, prefix=str(uuid.uuid4()), time_range=time_range, output_format='nc', polygons=polygons, dir_output=dir_output, mosaic=mosaic) arr = get_values(resource=nc_reference) dt_arr = get_time(resource=nc_reference) arr = ma.masked_array(arr) dt_arr = ma.masked_array(dt_arr) percentile = percentile window_width = 5 for indice in indices: name = indice.replace('_', str(percentile)) var = indice.split('_')[0] operation = None if 'T' in var: if percentile >= 50: operation = 'Icclim%s90p' % var func = 'icclim_%s90p' % var # icclim_TG90p else: operation = 'Icclim%s10p' % var func = 'icclim_%s10p' % var ################################ # load the appropriate operation ################################ ops = [op for op in dir(lic) if operation in op] if len(ops) == 0: raise Exception("operator does not exist %s", operation) exec "percentile_dict = lic.%s.get_percentile_dict(arr, dt_arr, percentile, window_width)" % ops[ 0] calc = [{ 'func': func, 'name': name, 'kwds': { 'percentile_dict': percentile_dict } }] if polygons is None: nc_indices.extend( call(resource=resource, prefix=key.replace(variable, name).replace( '_day_', '_%s_' % grouping), calc=calc, calc_grouping=calc_group, output_format='nc', dir_output=dir_output)) else: nc_indices.extend( clipping( resource=resource, prefix=key.replace(variable, name).replace( '_day_', '_%s_' % grouping), calc=calc, calc_grouping=calc_group, output_format='nc', dir_output=dir_output, polygons=polygons, mosaic=mosaic, )) if len(nc_indices) is 0: logger.debug('No indices are calculated') return None return nc_indices
def execute(self): logger.info('Start process') from datetime import datetime as dt from flyingpigeon import weatherregimes as wr from tempfile import mkstemp ################################ # reading in the input arguments ################################ try: logger.info('read in the arguments') resource = self.getInputValues(identifier='resource') season = self.getInputValues(identifier='season')[0] bbox = self.getInputValues(identifier='BBox')[0] #model_var = self.getInputValues(identifier='reanalyses')[0] period = self.getInputValues(identifier='period')[0] anualcycle = self.getInputValues(identifier='anualcycle')[0] # model, var = model_var.split('_') bbox = [float(b) for b in bbox.split(',')] start = dt.strptime(period.split('-')[0] , '%Y%m%d') end = dt.strptime(period.split('-')[1] , '%Y%m%d') kappa = int(self.getInputValues(identifier='kappa')[0]) logger.info('bbox %s' % bbox) logger.info('period %s' % str(period)) logger.info('season %s' % str(season)) except Exception as e: logger.debug('failed to read in the arguments %s ' % e) ############################################################ ### get the required bbox and time region from resource data ############################################################ # from flyingpigeon.weatherregimes import get_level from flyingpigeon.ocgis_module import call from flyingpigeon.utils import get_variable time_range = [start, end] variable = get_variable(resource) model_subset = call(resource=resource, variable=variable, geom=bbox, spatial_wrapping='wrap', time_range=time_range, #conform_units_to=conform_units_to ) logger.info('Dataset subset done: %s ' % model_subset) ############################################## ### computing anomalies ############################################## cycst = anualcycle.split('-')[0] cycen = anualcycle.split('-')[0] reference = [dt.strptime(cycst,'%Y%m%d'), dt.strptime(cycen,'%Y%m%d')] model_anomal = wr.get_anomalies(model_subset, reference=reference) ##################### ### extracting season ##################### model_season = wr.get_season(model_anomal, season=season) ####################### ### call the R scripts ####################### import shlex import subprocess from flyingpigeon import config from os.path import curdir, exists, join try: rworkspace = curdir Rsrc = config.Rsrc_dir() Rfile = 'weatherregimes_model.R' infile = model_season #model_subset #model_ponderate modelname = 'MODEL' yr1 = start.year yr2 = end.year ip, output_graphics = mkstemp(dir=curdir ,suffix='.pdf') ip, file_pca = mkstemp(dir=curdir ,suffix='.dat') ip, file_class = mkstemp(dir=curdir ,suffix='.Rdat') args = ['Rscript', join(Rsrc,Rfile), '%s/' % curdir, '%s/' % Rsrc, '%s'% infile, '%s' % variable, '%s' % output_graphics, '%s' % file_pca, '%s' % file_class, '%s' % season, '%s' % start.year, '%s' % end.year, '%s' % 'MODEL', '%s' % kappa] logger.info('Rcall builded') except Exception as e: msg = 'failed to build the R command %s' % e logger.error(msg) raise Exception(msg) try: output,error = subprocess.Popen(args, stdout = subprocess.PIPE, stderr= subprocess.PIPE).communicate() #, shell=True logger.info('R outlog info:\n %s ' % output) logger.debug('R outlog errors:\n %s ' % error) if len(output) > 0: self.status.set('**** weatherregime in R suceeded', 90) else: logger.error('NO! output returned from R call') except Exception as e: msg = 'weatherregime in R %s ' % e logger.error(msg) raise Exception(msg) ############################################ ### set the outputs ############################################ self.Routput_graphic.setValue( output_graphics ) self.output_pca.setValue( file_pca ) self.output_classification.setValue( file_class ) self.output_netcdf.setValue( model_season )
def calc_indice_simple(resource=[], variable=None, prefix=None, indice='SU', polygons=None, mosaic=False, grouping='yr', dir_output=None, dimension_map=None, memory_limit=None): """ Calculates given simple indices for suitable files in the appropriate time grouping and polygon. :param resource: list of filenames in data reference syntax (DRS) convention (netcdf) :param variable: variable name to be selected in the in netcdf file (default=None) :param indices: Indice (default ='SU') :param polygons: list of polgons (default ='FRA') :param grouping: indices time aggregation (default='yr') :param out_dir: output directory for result file (netcdf) :param dimension_map: optional dimension map if different to standard (default=None) :return: list of netcdf files with calculated indices. Files are saved into out_dir. """ from os.path import join, dirname, exists from flyingpigeon import ocgis_module from flyingpigeon.subset import clipping import uuid if type(resource) != list: resource = list([resource]) # if type(indices) != list: # indices = list([indices]) if type(polygons) != list and polygons is None: polygons = list([polygons]) # if type(groupings) != list: # groupings = list([groupings]) if dir_output is not None: if not exists(dir_output): makedirs(dir_output) datasets = sort_by_filename(resource).keys() if len(datasets) is 1: key = datasets[0] else: LOGGER.warning('more than one dataset in resource') # from flyingpigeon.subset import select_ugid # tile_dim = 25 output = None # experiments = sort_by_filename(resource) outputs = [] # for key in experiments: if variable is None: variable = get_variable(resource) LOGGER.debug('Variable detected % s ' % variable) # variable = key.split('_')[0] try: # icclim can't handling 'kg m2 sec' needs to be 'mm/day' if variable == 'pr': calc = 'pr=pr*86400' ncs = ocgis_module.call(resource=resource, variable=variable, dimension_map=dimension_map, calc=calc, memory_limit=memory_limit, # calc_grouping= calc_group, prefix=str(uuid.uuid4()), dir_output=dir_output, output_format='nc') else: ncs = resource try: calc = [{'func': 'icclim_' + indice, 'name': indice}] LOGGER.info('calc: %s' % calc) try: calc_group = calc_grouping(grouping) LOGGER.info('calc_group: %s' % calc_group) if polygons is None: try: prefix = key.replace(variable, indice).replace('_day_', '_%s_' % grouping) LOGGER.debug(' **** dir_output = %s ' % dir_output) tmp = ocgis_module.call(resource=ncs, variable=variable, dimension_map=dimension_map, calc=calc, calc_grouping=calc_group, prefix=prefix, dir_output=dir_output, output_format='nc') if len(tmp) is not 0: outputs.extend(tmp) else: msg = 'could not calc indice %s for domain ' % (indice) LOGGER.exception(msg) except: msg = 'could not calc indice %s for domain in %s' % (indice) LOGGER.exception(msg) else: try: prefix = key.replace(variable, indice).replace('_day_', '_%s_' % grouping) tmp = clipping(resource=ncs, variable=variable, dimension_map=dimension_map, calc=calc, calc_grouping=calc_group, prefix=prefix, polygons=polygons, mosaic=mosaic, dir_output=dir_output, output_format='nc') if len(tmp) is not 0: outputs.extend(tmp) else: msg = 'could not calc clipped indice %s ' % (indice) LOGGER.exception(msg) except: msg = 'could not calc indice %s for domai' % (indice) LOGGER.debug(msg) # raise Exception(msg) LOGGER.info('indice file calculated: %s' % tmp) except: msg = 'could not calc indice %s for key %s and grouping %s' % (indice, grouping) LOGGER.exception(msg) # raise Exception(msg) except: msg = 'could not calc indice %s ' % (indice) LOGGER.exception(msg) # raise Exception(msg) except: msg = 'could not calculate indices' LOGGER.exception(msg) # raise Exception(msg) LOGGER.info('indice outputs %s ' % outputs) if len(outputs) is 0: LOGGER.debug('No indices are calculated') return None return outputs
def execute(self): logger.info('Start process') from datetime import datetime as dt from flyingpigeon import weatherregimes as wr from tempfile import mkstemp self.status.set('execution started at : %s ' % dt.now(),5) ################################ # reading in the input arguments ################################ try: logger.info('read in the arguments') # resources = self.getInputValues(identifier='resources') season = self.getInputValues(identifier='season')[0] bbox_obj = self.BBox.getValue() model_var = self.getInputValues(identifier='reanalyses')[0] period = self.getInputValues(identifier='period')[0] anualcycle = self.getInputValues(identifier='anualcycle')[0] model, variable = model_var.split('_') kappa = int(self.getInputValues(identifier='kappa')[0]) logger.info('period %s' % str(period)) logger.info('season %s' % str(season)) except Exception as e: logger.debug('failed to read in the arguments %s ' % e) try: start = dt.strptime(period.split('-')[0] , '%Y%m%d') end = dt.strptime(period.split('-')[1] , '%Y%m%d') if bbox_obj is not None: logger.info("bbox_obj={0}".format(bbox_obj.coords)) bbox = [bbox_obj.coords[0][0], bbox_obj.coords[0][1],bbox_obj.coords[1][0],bbox_obj.coords[1][1]] logger.info("bbox={0}".format(bbox)) else: bbox=None except Exception as e: logger.debug('failed to transform BBOXObject %s ' % e) ########################### ### set the environment ########################### self.status.set('fetching data from archive',10) try: if model == 'NCEP': if 'z' in variable: level=variable.strip('z') conform_units_to=None else: level=None conform_units_to='hPa' elif '20CRV2' in model: if 'z' in variable: level=variable.strip('z') conform_units_to=None else: level=None conform_units_to='hPa' else: logger.error('Reanalyses dataset not known') logger.info('environment set') except Exception as e: msg = 'failed to set environment %s ' % e logger.error(msg) raise Exception(msg) ########################################## ### fetch Data from original data archive ########################################## from flyingpigeon.datafetch import reanalyses as rl try: model_nc = rl(start=start.year , end=end.year , dataset=model, variable=variable) logger.info('reanalyses data fetched') except Exception as e: msg = 'failed to get reanalyses data %s' % e logger.debug(msg) raise Exception(msg) self.status.set('fetching data done',15) ############################################################ ### get the required bbox and time region from resource data ############################################################ self.status.set('start subsetting',17) # from flyingpigeon.weatherregimes import get_level from flyingpigeon.ocgis_module import call time_range = [start, end] model_subset = call(resource=model_nc, variable=variable, geom=bbox, spatial_wrapping='wrap', time_range=time_range, # conform_units_to=conform_units_to ) logger.info('Dataset subset done: %s ' % model_subset) self.status.set('dataset subsetted',19) ############################################## ### computing anomalies ############################################## self.status.set('computing anomalies ',19) cycst = anualcycle.split('-')[0] cycen = anualcycle.split('-')[0] reference = [dt.strptime(cycst,'%Y%m%d'), dt.strptime(cycen,'%Y%m%d')] model_anomal = wr.get_anomalies(model_subset, reference=reference) ##################### ### extracting season ##################### model_season = wr.get_season(model_anomal, season=season) self.status.set('values normalized',20) ####################### ### call the R scripts ####################### self.status.set('Start weather regime clustering ',25) import shlex import subprocess from flyingpigeon import config from os.path import curdir, exists, join try: rworkspace = curdir Rsrc = config.Rsrc_dir() Rfile = 'weatherregimes_model.R' infile = model_season #model_subset #model_ponderate modelname = model yr1 = start.year yr2 = end.year ip, output_graphics = mkstemp(dir=curdir ,suffix='.pdf') ip, file_pca = mkstemp(dir=curdir ,suffix='.txt') ip, file_class = mkstemp(dir=curdir ,suffix='.Rdat') args = ['Rscript', join(Rsrc,Rfile), '%s/' % curdir, '%s/' % Rsrc, '%s'% infile, '%s' % variable, '%s' % output_graphics, '%s' % file_pca, '%s' % file_class, '%s' % season, '%s' % start.year, '%s' % end.year, '%s' % model_var, '%s' % kappa] logger.info('Rcall builded') except Exception as e: msg = 'failed to build the R command %s' % e logger.debug(msg) raise Exception(msg) try: output,error = subprocess.Popen(args, stdout = subprocess.PIPE, stderr= subprocess.PIPE).communicate() #, shell=True logger.info('R outlog info:\n %s ' % output) logger.debug('R outlog errors:\n %s ' % error) if len(output) > 0: self.status.set('**** weatherregime in R suceeded', 90) else: logger.error('NO! output returned from R call') except Exception as e: msg = 'weatherregime in R %s ' % e logger.error(msg) raise Exception(msg) self.status.set('Weather regime clustering done ',80) ############################################ ### set the outputs ############################################ self.status.set('Set the process outputs ',95) self.Routput_graphic.setValue( output_graphics ) self.output_pca.setValue( file_pca ) self.output_classification.setValue( file_class ) self.output_netcdf.setValue( model_season )
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' try: resources = archiveextract( resource=rename_complexinputs(request.inputs['resource'])) if 'region' in request.inputs: region = request.inputs['region'][0].data else: region = None if 'mosaic' in request.inputs: mosaic = request.inputs['mosaic'][0].data else: mosaic = False percentile = request.inputs['percentile'][0].data LOGGER.debug("mosaic %s " % mosaic) LOGGER.debug('percentile: %s' % percentile) LOGGER.debug('region %s' % region) LOGGER.debug('Nr of input files %s ' % len(resources)) except: LOGGER.exception('failed to read in the arguments') from flyingpigeon.utils import sort_by_filename from flyingpigeon.ocgis_module import call datasets = sort_by_filename(resources, historical_concatination=True) results = [] kwds = {'percentile': 90, 'window_width': 5} calc = [{'func': 'daily_perc', 'name': 'dp', 'kwds': kwds}] try: for key in datasets.keys(): try: if region is None: result = call( resource=datasets[key], output_format='nc', calc=calc, # prefix=key, # time_region={'year': [1995, 2000]} # calc_grouping='year' ) results.extend([result]) LOGGER.debug('percentile based indice done for %s' % result) else: result = clipping( resource=datasets[key], # variable=None, calc=calc, # calc_grouping=None, # time_range=None, # time_region=None, polygons=region, mosaic=mosaic) results.extend(result) except: LOGGER.exception( "failed to calculate percentil based indice for %s " % key) except: LOGGER.exception("failed to calculate percentile indices") tarf = archive(results) response.outputs['output_archive'].file = tarf i = next((i for i, x in enumerate(results) if x), None) if i is None: i = "dummy.nc" response.outputs['ncout'].file = results[i] # response.update_status("done", 100) response.update_status("done", 100) return response
def calc_indice_unconventional(resource=[], variable=None, prefix=None, indices=None, polygons=None, groupings=None, dir_output=None, dimension_map = None): """ Calculates given indices for suitable files in the appopriate time grouping and polygon. :param resource: list of filenames in drs convention (netcdf) :param variable: variable name to be selected in the in netcdf file (default=None) :param indices: list of indices (default ='TGx') :param polygons: list of polgons (default =None) :param grouping: indices time aggregation (default='yr') :param out_dir: output directory for result file (netcdf) :param dimension_map: optional dimension map if different to standard (default=None) :return: list of netcdf files with calculated indices. Files are saved into dir_output """ from os.path import join, dirname, exists from os import remove import uuid from flyingpigeon import ocgis_module from flyingpigeon.subset import get_ugid, get_geom if type(resource) != list: resource = list([resource]) if type(indices) != list: indices = list([indices]) if type(polygons) != list and polygons != None: polygons = list([polygons]) elif polygons == None: polygons = [None] else: logger.error('Polygons not found') if type(groupings) != list: groupings = list([groupings]) if dir_output != None: if not exists(dir_output): makedirs(dir_output) experiments = sort_by_filename(resource) outputs = [] # print('environment for calc_indice_unconventional set') logger.info('environment for calc_indice_unconventional set') for key in experiments: if variable == None: variable = get_variable(experiments[key][0]) try: ncs = experiments[key] for indice in indices: logger.info('indice: %s' % indice) try: for grouping in groupings: logger.info('grouping: %s' % grouping) try: calc_group = calc_grouping(grouping) logger.info('calc_group: %s' % calc_group) for polygon in polygons: try: domain = key.split('_')[1].split('-')[0] if polygon == None: if prefix == None: prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ) geom = None ugid = None else: if prefix == None: prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ).replace(domain,polygon) geom = get_geom(polygon=polygon) ugid = get_ugid(polygons=polygon, geom=geom) if indice == 'TGx': calc=[{'func': 'max', 'name': 'TGx'}] tmp = ocgis_module.call(resource=ncs,# conform_units_to='celcius', variable=variable, dimension_map=dimension_map, calc=calc, calc_grouping=calc_group, prefix=prefix, dir_output=dir_output, geom=geom, select_ugid=ugid) elif indice == 'TGn': calc=[{'func': 'min', 'name': 'TGn'}] tmp = ocgis_module.call(resource=ncs, #conform_units_to='celcius', variable=variable, dimension_map=dimension_map, calc=calc, calc_grouping= calc_group, prefix=prefix, dir_output=dir_output, geom=geom, select_ugid = ugid) elif indice == 'TGx5day': calc = [{'func': 'moving_window', 'name': 'TGx5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}] tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius', variable=variable, dimension_map=dimension_map, calc=calc, prefix=str(uuid.uuid4()), geom=geom, select_ugid = ugid) calc=[{'func': 'max', 'name': 'TGx5day'}] logger.info('moving window calculated : %s' % tmp2) tmp = ocgis_module.call(resource=tmp2, variable=indice, dimension_map=dimension_map, calc=calc, calc_grouping=calc_group, prefix=prefix, dir_output=dir_output) remove(tmp2) elif indice == 'TGn5day': calc = [{'func': 'moving_window', 'name': 'TGn5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}] tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius', variable=variable, dimension_map=dimension_map, calc=calc, prefix=str(uuid.uuid4()), geom=geom, select_ugid = ugid) calc=[{'func': 'min', 'name': 'TGn5day'}] logger.info('moving window calculated : %s' % tmp2) tmp = ocgis_module.call(resource=tmp2, variable=indice, dimension_map=dimension_map, calc=calc, calc_grouping=calc_group, prefix=prefix, dir_output=dir_output) remove(tmp2) else: logger.error('Indice %s is not a known inidce' % (indice)) outputs.append(tmp) logger.info('indice file calcualted %s ' % (tmp)) except Exception as e: logger.exception('could not calc indice %s for key %s, polygon %s and calc_grouping %s : %s' % (indice, key, polygon, grouping, e )) except Exception as e: logger.exception('could not calc indice %s for key %s and calc_grouping %s : %s' % ( indice, key, polygon, e )) except Exception as e: logger.exception('could not calc indice %s for key %s: %s'% (indice, key, e )) except Exception as e: logger.exception('could not calc key %s: %s' % (key, e)) return outputs
def clipping(resource=[], variable=None, dimension_map=None, calc=None, output_format='nc', calc_grouping= None, time_range=None, time_region=None, historical_concatination=True, prefix=None, spatial_wrapping='wrap', polygons=None, mosaik=False, dir_output=None, memory_limit=None): """ returns list of clipped netCDF files possible entries: :param resource: list of input netCDF files :param variable: variable (string) to be used in netCDF :param dimesion_map: specify a dimension map input netCDF has unconventional dimension :param calc: ocgis calculation argument :param calc_grouping: ocgis calculation grouping :param historical_concatination: concat files of RCPs with appropriate historical runs to one timeseries :param prefix: perfix for output file name :param polygons: list of polygons to be used. if more than 1 in the list, a appropriate mosaik will be clipped :param output_format: output_format (default='nc') :param dir_output: specify a output location """ from flyingpigeon.utils import get_variable, drs_filename from flyingpigeon.ocgis_module import call if type(resource) != list: resource = list([resource]) if type(polygons) != list: polygons = list([polygons]) if prefix != None: if type(prefix) != list: prefix = list([prefix]) geoms = set() ncs = sort_by_filename(resource, historical_concatination=historical_concatination) # historical_concatination=True geom_files = [] if mosaik == True : try: nameadd = '_' for polygon in polygons: geoms.add(get_geom(polygon)) nameadd = nameadd + '-' + polygon if len(geoms) > 1: logger.error('polygons belong to differnt shapefiles! mosaik option is not possible %s', geoms) else: geom = geoms.pop() ugids = get_ugid(polygons=polygons, geom=geom) except Exception as e: logger.debug('geom identification failed %s ' % e) for i, key in enumerate (ncs.keys()): try: if variable == None: variable = get_variable(ncs[key]) logger.info('variable %s detected in resource' % (variable)) if prefix == None: name = key + nameadd else: name = prefix[i] geom_file = call(resource=ncs[key], variable=variable, calc=calc, calc_grouping=calc_grouping, output_format=output_format, prefix=name, geom=geom, select_ugid=ugids, time_range=time_range, time_region=time_region, spatial_wrapping=spatial_wrapping, memory_limit=memory_limit, dir_output=dir_output, dimension_map=dimension_map) geom_files.append( geom_file ) except Exception as e: msg = 'ocgis calculations failed for %s ' % (key) logger.debug(msg) else: for i, polygon in enumerate(polygons): try: geom = get_geom(polygon) ugid = get_ugid(polygons=polygon, geom=geom) for key in ncs.keys(): try: if variable == None: variable = get_variable(ncs[key]) logger.info('variable %s detected in resource' % (variable)) if prefix == None: name = key + '_' + polygon else: name = prefix[i] geom_file = call(resource=ncs[key], variable=variable, calc=calc, calc_grouping=calc_grouping,output_format=output_format, prefix=name, geom=geom, select_ugid=ugid, dir_output=dir_output, dimension_map=dimension_map, spatial_wrapping=spatial_wrapping, memory_limit=memory_limit,time_range=time_range, time_region=time_region, ) geom_files.append( geom_file ) except Exception as e: msg = 'ocgis calculations failed for %s ' % (key) logger.debug(msg) raise except Exception as e: logger.debug('geom identification failed') raise return geom_files
def execute(self): import time # performance test process_start_time = time.time() # measure process execution time ... from os import path from tempfile import mkstemp from flyingpigeon import analogs from datetime import datetime as dt from flyingpigeon.ocgis_module import call from flyingpigeon.datafetch import reanalyses from flyingpigeon.utils import get_variable, rename_variable self.status.set('execution started at : %s ' % dt.now(), 5) start_time = time.time() # measure init ... resource = self.getInputValues(identifier='resource') bbox_obj = self.BBox.getValue() refSt = self.getInputValues(identifier='refSt') refEn = self.getInputValues(identifier='refEn') dateSt = self.getInputValues(identifier='dateSt') dateEn = self.getInputValues(identifier='dateEn') normalize = self.getInputValues(identifier='normalize')[0] distance = self.getInputValues(identifier='dist')[0] outformat = self.getInputValues(identifier='outformat')[0] timewin = int(self.getInputValues(identifier='timewin')[0]) experiment = self.getInputValues(identifier='experiment')[0] dataset, var = experiment.split('_') refSt = dt.strptime(refSt[0], '%Y-%m-%d') refEn = dt.strptime(refEn[0], '%Y-%m-%d') dateSt = dt.strptime(dateSt[0], '%Y-%m-%d') dateEn = dt.strptime(dateEn[0], '%Y-%m-%d') if normalize == 'None': seacyc = False else: seacyc = True if outformat == 'ascii': outformat = '.txt' elif outformat == 'netCDF': outformat = '.nc' else: logger.error('output format not valid') if bbox_obj is not None: logger.info("bbox_obj={0}".format(bbox_obj.coords)) bbox = [ bbox_obj.coords[0][0], bbox_obj.coords[0][1], bbox_obj.coords[1][0], bbox_obj.coords[1][1] ] logger.info("bbox={0}".format(bbox)) else: bbox = None #start = min( refSt, dateSt ) #end = max( refEn, dateEn ) # region = self.getInputValues(identifier='region')[0] # bbox = [float(b) for b in region.split(',')] try: if dataset == 'NCEP': if 'z' in var: variable = 'hgt' level = var.strip('z') #conform_units_to=None else: variable = 'slp' level = None #conform_units_to='hPa' elif '20CRV2' in var: if 'z' in level: variable = 'hgt' level = var.strip('z') #conform_units_to=None else: variable = 'prmsl' level = None #conform_units_to='hPa' else: logger.error('Reanalyses dataset not known') logger.info('environment set') except Exception as e: msg = 'failed to set environment %s ' % e logger.error(msg) raise Exception(msg) logger.debug("init took %s seconds.", time.time() - start_time) self.status.set('Read in the arguments', 5) ################# # get input data ################# start_time = time.time() # measure get_input_data ... self.status.set('fetching input data', 7) try: input = reanalyses(start=dateSt.year, end=dateEn.year, variable=var, dataset=dataset) nc_subset = call(resource=input, variable=var, geom=bbox) except Exception as e: msg = 'failed to fetch or subset input files %s' % e logger.error(msg) raise Exception(msg) logger.debug("get_input_subset_dataset took %s seconds.", time.time() - start_time) self.status.set('**** Input data fetched', 10) ######################## # input data preperation ######################## self.status.set('Start preparing input data', 12) start_time = time.time() # mesure data preperation ... try: self.status.set('Preparing simulation data', 15) simulation = call(resource=nc_subset, time_range=[dateSt, dateEn]) except: msg = 'failed to prepare simulation period' logger.debug(msg) try: self.status.set('Preparing target data', 17) var_target = get_variable(resource) #var_simulation = get_variable(simulation) archive = call( resource=resource, variable=var_target, time_range=[refSt, refEn], geom=bbox, t_calendar= 'standard', # conform_units_to=conform_units_to, spatial_wrapping='wrap', regrid_destination=simulation, regrid_options='bil') except Exception as e: msg = 'failed subset archive dataset %s ' % e logger.debug(msg) raise Exception(msg) try: if var != var_target: rename_variable(archive, oldname=var_target, newname=var) logger.info('varname %s in netCDF renamed to %s' % (var_target, var)) except Exception as e: msg = 'failed to rename variable in target files %s ' % e logger.debug(msg) raise Exception(msg) try: if seacyc == True: seasoncyc_base, seasoncyc_sim = analogs.seacyc( archive, simulation, method=normalize) else: seasoncyc_base, seasoncyc_sim = None except Exception as e: msg = 'failed to prepare seasonal cycle reference files %s ' % e logger.debug(msg) raise Exception(msg) ip, output = mkstemp(dir='.', suffix='.txt') output_file = path.abspath(output) files = [path.abspath(archive), path.abspath(simulation), output_file] logger.debug("data preperation took %s seconds.", time.time() - start_time) ############################ # generating the config file ############################ self.status.set('writing config file', 15) start_time = time.time() # measure write config ... try: config_file = analogs.get_configfile( files=files, seasoncyc_base=seasoncyc_base, seasoncyc_sim=seasoncyc_sim, timewin=timewin, varname=var, seacyc=seacyc, cycsmooth=91, nanalog=nanalog, seasonwin=seasonwin, distfun=distance, outformat=outformat, calccor=True, silent=False, period=[ dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d') ], bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3])) except Exception as e: msg = 'failed to generate config file %s ' % e logger.debug(msg) raise Exception(msg) logger.debug("write_config took %s seconds.", time.time() - start_time) ####################### # CASTf90 call ####################### import subprocess import shlex start_time = time.time() # measure call castf90 self.status.set('Start CASTf90 call', 20) try: #self.status.set('execution of CASTf90', 50) cmd = 'analogue.out %s' % path.relpath(config_file) #system(cmd) args = shlex.split(cmd) output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() logger.info('analogue.out info:\n %s ' % output) logger.debug('analogue.out errors:\n %s ' % error) self.status.set('**** CASTf90 suceeded', 90) except Exception as e: msg = 'CASTf90 failed %s ' % e logger.error(msg) raise Exception(msg) logger.debug("castf90 took %s seconds.", time.time() - start_time) self.status.set('preparting output', 99) self.config.setValue(config_file) self.analogs.setValue(output_file) self.simulation_netcdf.setValue(simulation) self.target_netcdf.setValue(archive) self.status.set('execution ended', 100) logger.debug("total execution took %s seconds.", time.time() - process_start_time)