def get_indices(resources, indices): from flyingpigeon.utils import sort_by_filename, calc_grouping, drs_filename from flyingpigeon.ocgis_module import call from flyingpigeon.indices import indice_variable, calc_indice_simple #names = [drs_filename(nc, skip_timestamp=False, skip_format=False, # variable=None, rename_file=True, add_file_path=True) for nc in resources] ncs = sort_by_filename(resources, historical_concatination=True) ncs_indices = [] logger.info('resources sorted found %s datasets' % len(ncs.keys()) ) for key in ncs.keys(): for indice in indices: try: name , month = indice.split('_') variable=key.split('_')[0] # print name, month , variable if variable == indice_variable(name): logger.info('calculating indice %s ' % indice) prefix=key.replace(variable, name).replace('_day_','_%s_' % month) nc = calc_indice_simple(resource=ncs[key], variable=variable, prefix=prefix, indices=name, groupings=month, memory_limit=500) #grouping = calc_grouping(month) #calc = [{'func' : 'icclim_' + name, 'name' : name}] #nc = call(resource=ncs[key], variable=variable, calc=calc, calc_grouping=grouping, prefix=prefix , memory_limit=500) #memory_limit=500 ncs_indices.append(nc[0]) logger.info('Successful calculated indice %s %s' % (key, indice)) except Exception as e: logger.exception('failed to calculate indice %s %s' % (key, indice)) return ncs_indices
def execute(self): from flyingpigeon.ocgis_module import call from flyingpigeon.utils import sort_by_filename, archive, get_values, get_time ncs = self.getInputValues(identifier='resource') logger.info("ncs: %s " % ncs) coords = self.getInputValues(identifier='coords') logger.info("coords %s", coords) filenames = [] nc_exp = sort_by_filename(ncs, historical_concatination=True) from numpy import savetxt, column_stack from shapely.geometry import Point for key in nc_exp.keys(): try: logger.info('start calculation for %s ' % key) ncs = nc_exp[key] times = get_time(ncs, format='%Y-%m-%d_%H:%M:%S') concat_vals = times # ['%s-%02d-%02d_%02d:%02d:%02d' % # (t.year, t.month, t.day, t.hour, t.minute, t.second) for t in times] header = 'date_time' filename = '%s.csv' % key filenames.append(filename) for p in coords: try: self.status.set('processing point : {0}'.format(p), 20) # define the point: p = p.split(',') point = Point(float(p[0]), float(p[1])) # get the values timeseries = call(resource=ncs, geom=point, select_nearest=True) vals = get_values(timeseries) # concatenation of values header = header + ',%s-%s' % (p[0], p[1]) concat_vals = column_stack([concat_vals, vals]) except Exception as e: logger.debug('failed for point %s %s' % (p, e)) self.status.set( '*** all points processed for {0} ****'.format(key), 50) savetxt(filename, concat_vals, fmt='%s', delimiter=',', header=header) except Exception as e: logger.debug('failed for %s %s' % (key, e)) # set the outputs self.status.set('*** creating output tar archive ****', 90) tarout_file = archive(filenames) self.tarout.setValue(tarout_file)
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' response.update_status('Start process', 0) try: LOGGER.info('reading the arguments') resources = archiveextract( resource=rename_complexinputs(request.inputs['resource'])) indices = [inpt.data for inpt in request.inputs['indices']] LOGGER.debug("indices = %s", indices) archive_format = request.inputs['archive_format'][0].data except: msg = 'failed to read the arguments.' LOGGER.exception(msg) raise Exception(msg) LOGGER.info('indices %s ' % indices) ################################# # calculate the climate indices ################################# # indices calculation ncs_indices = None datasets = sort_by_filename(resources, historical_concatination=True) LOGGER.debug("datasets=%s", datasets.keys()) for ds_name in datasets: try: response.update_status('calculation of {}'.format(ds_name), 30) # TODO: what is happening with the results for each ds? ncs_indices = sdm.get_indices(resource=datasets[ds_name], indices=indices) except: msg = 'indice calculation failed for {}'.format(ds_name) LOGGER.exception(msg) raise Exception(msg) # archive multiple output files to one archive file try: archive_indices = archive(ncs_indices, format=archive_format) LOGGER.info('indices 3D added to tarfile') except: msg = 'failed adding indices to tar' LOGGER.exception(msg) raise Exception(msg) response.outputs['output_indices'].file = archive_indices i = next((i for i, x in enumerate(ncs_indices) if x), None) response.outputs['ncout'].file = ncs_indices[i] response.update_status('done', 100) return response
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' ncs = archiveextract( resource=rename_complexinputs(request.inputs['resource'])) LOGGER.info('ncs: {}'.format(ncs)) coords = [] for coord in request.inputs['coords']: coords.append(coord.data) LOGGER.info('coords {}'.format(coords)) filenames = [] nc_exp = sort_by_filename(ncs, historical_concatination=True) for key in nc_exp.keys(): try: LOGGER.info('start calculation for {}'.format(key)) ncs = nc_exp[key] times = get_time(ncs) # , format='%Y-%m-%d_%H:%M:%S') concat_vals = times # ['%s-%02d-%02d_%02d:%02d:%02d' % # (t.year, t.month, t.day, t.hour, t.minute, t.second) for t in times] header = 'date_time' filename = '{}.csv'.format(key) filenames.append(filename) for p in coords: try: response.update_status('processing point: {}'.format(p), 20) # define the point: p = p.split(',') point = Point(float(p[0]), float(p[1])) # get the values timeseries = call(resource=ncs, geom=point, select_nearest=True) vals = get_values(timeseries) # concatenation of values header = header + ',{}-{}'.format(p[0], p[1]) concat_vals = column_stack([concat_vals, vals]) except Exception as e: LOGGER.debug('failed for point {} {}'.format(p, e)) response.update_status('*** all points processed for {0} ****'.format(key), 50) # TODO: Ascertain whether this 'savetxt' is a valid command without string formatting argument: '%s' savetxt(filename, concat_vals, fmt='%s', delimiter=',', header=header) except Exception as ex: LOGGER.debug('failed for {}: {}'.format(key, str(ex))) # set the outputs response.update_status('*** creating output tar archive ****', 90) tarout_file = archive(filenames) response.outputs['tarout'].file = tarout_file return response
def execute(self): from flyingpigeon.ocgis_module import call from flyingpigeon.utils import sort_by_filename, archive, get_values, get_time ncs = self.getInputValues(identifier='netcdf_file') logger.info("ncs: %s " % ncs) coords = self.getInputValues(identifier='coords') logger.info("coords %s", coords) filenames = [] nc_exp = sort_by_filename(ncs, historical_concatination=True) #(fp_tar, tarout_file) = tempfile.mkstemp(dir=".", suffix='.tar') #tar = tarfile.open(tarout_file, "w") from numpy import savetxt, column_stack from shapely.geometry import Point for key in nc_exp.keys(): try: logger.info('start calculation for %s ' % key ) ncs = nc_exp[key] times = get_time(ncs) concat_vals = ['%s-%02d-%02d_%02d:%02d:%02d' % (t.year, t.month, t.day, t.hour, t.minute, t.second) for t in times] header = 'date_time' filename = '%s.csv' % key filenames.append(filename) for p in coords: try: self.status.set('processing point : {0}'.format(p), 20) # define the point: p = p.split(',') point = Point(float(p[0]), float(p[1])) # get the values timeseries = call(resource=ncs, geom=point, select_nearest=True) vals = get_values(timeseries) # concatination of values header = header + ',%s-%s' % (p[0], p[1]) concat_vals = column_stack([concat_vals, vals]) except Exception as e: logger.debug('failed for point %s %s' % (p , e)) self.status.set('*** all points processed for {0} ****'.format(key), 50) savetxt(filename, concat_vals, fmt='%s', delimiter=',', header=header) except Exception as e: logger.debug('failed for %s %s' % (key, e)) ### set the outputs self.status.set('*** creating output tar archive ****',90) tarout_file = archive(filenames) self.tarout.setValue( tarout_file )
def execute(self): from flyingpigeon.ocgis_module import call from flyingpigeon.utils import get_time, get_variable, sort_by_filename from datetime import datetime as dt from netCDF4 import Dataset from numpy import savetxt, column_stack, squeeze ncs = self.getInputValues(identifier='netcdf_file') logging.info("ncs: %s " % ncs) coords = self.getInputValues(identifier='coords') logging.info("coords %s", coords) nc_exp = sort_by_filename(ncs) # dictionary {experiment:[files]} filenames = [] (fp_tar, tarout_file) = tempfile.mkstemp(dir=".", suffix='.tar') tar = tarfile.open(tarout_file, "w") for key in nc_exp.keys(): logging.info('start calculation for %s ' % key ) ncs = nc_exp[key] nc = ncs[0] times = get_time(nc) var = get_variable(nc) concat_vals = [dt.strftime(t, format='%Y-%d-%m_%H:%M:%S') for t in times] header = 'date_time' filename = '%s.csv' % key filenames.append(filename) for ugid, p in enumerate(coords, start=1): self.status.set('processing point : {0}'.format(p), 20) p = p.split(',') self.status.set('splited x and y coord : {0}'.format(p), 20) point = Point(float(p[0]), float(p[1])) #get the timeseries at gridpoint timeseries = call(resource=ncs, geom=point, select_nearest=True) ds = Dataset(timeseries) vals = squeeze(ds.variables[var]) header = header + ',%s_%s' % (p[0], p[1]) concat_vals = column_stack([concat_vals, vals]) savetxt(filename, concat_vals, fmt='%s', delimiter=',', header=header) tar.add( filename ) tar.close() self.tarout.setValue( tarout_file )
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' ncs = archiveextract( resource=rename_complexinputs(request.inputs['resource'])) LOGGER.info("ncs: %s " % ncs) coords = request.inputs['coords'] # self.getInputValues(identifier='coords') LOGGER.info("coords %s", coords) filenames = [] nc_exp = sort_by_filename(ncs, historical_concatination=True) for key in nc_exp.keys(): try: LOGGER.info('start calculation for %s ' % key) ncs = nc_exp[key] times = get_time(ncs, format='%Y-%m-%d_%H:%M:%S') concat_vals = times # ['%s-%02d-%02d_%02d:%02d:%02d' % # (t.year, t.month, t.day, t.hour, t.minute, t.second) for t in times] header = 'date_time' filename = '%s.csv' % key filenames.append(filename) for p in coords: try: response.update_status('processing point : {0}'.format(p), 20) # define the point: p = p.split(',') point = Point(float(p[0]), float(p[1])) # get the values timeseries = call(resource=ncs, geom=point, select_nearest=True) vals = get_values(timeseries) # concatenation of values header = header + ',%s-%s' % (p[0], p[1]) concat_vals = column_stack([concat_vals, vals]) except Exception as e: LOGGER.debug('failed for point %s %s' % (p, e)) response.update_status('*** all points processed for {0} ****'.format(key), 50) savetxt(filename, concat_vals, fmt='%s', delimiter=',', header=header) except Exception as e: LOGGER.debug('failed for %s %s' % (key, e)) # set the outputs response.update_status('*** creating output tar archive ****', 90) tarout_file = archive(filenames) response.outputs['tarout'].file = tarout_file return response
def get_indices(resources, indices): """ calculating indices (netCDF files) defined in _SDMINDICES_ :param resources: :param indices: indices defined in _SDMINDICES_ :return list: list of filepathes to netCDF files """ from flyingpigeon.utils import sort_by_filename, calc_grouping, drs_filename, unrotate_pole # from flyingpigeon.ocgis_module import call from flyingpigeon.indices import indice_variable, calc_indice_simple # names = [drs_filename(nc, skip_timestamp=False, skip_format=False, # variable=None, rename_file=True, add_file_path=True) for nc in resources] ncs = sort_by_filename(resources, historical_concatination=True) ncs_indices = [] logger.info('resources sorted found %s datasets' % len(ncs.keys())) for key in ncs.keys(): for indice in indices: try: name, month = indice.split('_') variable = key.split('_')[0] # print name, month , variable if variable == indice_variable(name): logger.info('calculating indice %s ' % indice) prefix = key.replace(variable, name).replace('_day_', '_%s_' % month) nc = calc_indice_simple(resource=ncs[key], variable=variable, polygons=['Europe', 'Africa', 'Asia', 'North America', 'Oceania', 'South America', 'Antarctica'], mosaic=True, prefix=prefix, indices=name, groupings=month) if nc is not None: coords = unrotate_pole(nc[0], write_to_file=True) ncs_indices.append(nc[0]) logger.info('Successful calculated indice %s %s' % (key, indice)) else: msg = 'failed to calculate indice %s %s' % (key, indice) logger.exception(msg) except: msg = 'failed to calculate indice %s %s' % (key, indice) logger.exception(msg) raise return ncs_indices
def aggregatTime(resource=[], variable=None, frequency=None, prefix=None, grouping='mon', calculation='mean', historical_concatination=True): """ Aggregates over the time axis. :param resource: input netCDF files :param variable: variable to be used from resource :param frequency: time frequency in resource :param grouping: time aggregation for output :param prefix: file name prefix :param calculation: calculation methode (default = mean ) :param historical_concatination: if rcps and appropriate historical runs are present thy are concatinated :return: path to netCDF file """ try: ncs = sort_by_filename(resource, historical_concatination=historical_concatination) group = calc_grouping(grouping=grouping) except Exception as e: logger.exception('failed to determine ncs or calc_grouping') raise if len(ncs.keys())!= 1: logger.exception('None or more than one data experiments found in resource') raise Exception('None or more than one data experiments found in resource') for key in ncs.keys()[0:1]: try: if frequency == None: frequency = get_frequency(ncs[key][0]) if variable == None: variable = get_variable(ncs[key][0]) meta_attrs = { 'field': {'frequency': grouping}}# 'variable': {'new_attribute': 5, 'hello': 'attribute'}, calc = [{'func' : calculation , 'name' : variable, 'meta_attrs': meta_attrs}] logger.info('calculation: %s ' % (calc)) if prefix == None: prefix = key.replace(frequency,grouping) logger.info('prefix: %s ' % (prefix)) output = call(resource=ncs[key], variable=None, calc=calc, calc_grouping=group, prefix=prefix ) logger.info('time aggregation done for %s '% (key)) except Exception as e: logger.exception('time aggregation failed for %s' % key) raise return output # key # output
def get_indices(resources, indices): from flyingpigeon.utils import sort_by_filename, calc_grouping, drs_filename from flyingpigeon.ocgis_module import call from flyingpigeon.indices import indice_variable, calc_indice_simple #names = [drs_filename(nc, skip_timestamp=False, skip_format=False, # variable=None, rename_file=True, add_file_path=True) for nc in resources] ncs = sort_by_filename(resources, historical_concatination=True) ncs_indices = [] logger.info('resources sorted found %s datasets' % len(ncs.keys())) for key in ncs.keys(): for indice in indices: try: name, month = indice.split('_') variable = key.split('_')[0] # print name, month , variable if variable == indice_variable(name): logger.info('calculating indice %s ' % indice) prefix = key.replace(variable, name).replace('_day_', '_%s_' % month) nc = calc_indice_simple(resource=ncs[key], variable=variable, prefix=prefix, indices=name, groupings=month, memory_limit=500) #grouping = calc_grouping(month) #calc = [{'func' : 'icclim_' + name, 'name' : name}] #nc = call(resource=ncs[key], variable=variable, calc=calc, calc_grouping=grouping, prefix=prefix , memory_limit=500) #memory_limit=500 ncs_indices.append(nc[0]) logger.info('Successful calculated indice %s %s' % (key, indice)) except Exception as e: logger.exception('failed to calculate indice %s %s' % (key, indice)) return ncs_indices
def get_yrmean(resource=[]): """ calculation of annual mean temperature and clipping Europe :param resource: list or netCDF tas input files :return list: list of output files """ from flyingpigeon.utils import calc_grouping, sort_by_filename from flyingpigeon.ocgis_module import call from flyingpigeon.subset import clipping ncs = sort_by_filename(resource) nc_tasmean = [] try: for key in ncs.keys(): try: logger.info('process %s' % (key)) calc = [{'func': 'mean', 'name': 'tas'}] calc_group = calc_grouping('yr') prefix = key.replace(key.split('_')[7], 'yr') nc_tasmean.append( clipping(resource=ncs[key], variable='tas', calc=calc, calc_grouping=calc_group, prefix=prefix, polygons='Europe')[0]) logger.info('clipping and mean tas calculation done for %s' % (key)) except Exception as e: logger.debug('mean tas calculation failed for %s : %s ' % (key, e)) except Exception as e: logger.debug('clipping failed for %s: %s' % (key, e)) return nc_tasmean
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' try: resources = archiveextract( resource=rename_complexinputs(request.inputs['resource'])) # indices = request.inputs['indices'][0].data grouping = request.inputs['grouping'][0].data # grouping = [inpt.data for inpt in request.inputs['grouping']] if 'region' in request.inputs: region = request.inputs['region'][0].data else: region = None if 'mosaic' in request.inputs: mosaic = request.inputs['mosaic'][0].data else: mosaic = False percentile = request.inputs['percentile'][0].data # refperiod = request.inputs['refperiod'][0].data from datetime import datetime as dt # # if refperiod is not None: # start = dt.strptime(refperiod.split('-')[0], '%Y%m%d') # end = dt.strptime(refperiod.split('-')[1], '%Y%m%d') # refperiod = [start, end] # response.update_status('starting: indices=%s, grouping=%s, num_files=%s' # % (indices, grouping, len(resources)), 2) LOGGER.debug("grouping %s " % grouping) LOGGER.debug("mosaic %s " % mosaic) # LOGGER.debug("refperiod set to %s, %s " % (start, end)) # LOGGER.debug('indices= %s ' % indices) LOGGER.debug('percentile: %s' % percentile) LOGGER.debug('region %s' % region) LOGGER.debug('Nr of input files %s ' % len(resources)) except Exception as e: LOGGER.exception("failed to read in the arguments: %s" % e) from flyingpigeon.utils import sort_by_filename from flyingpigeon.ocgis_module import call datasets = sort_by_filename(resources, historical_concatination=True) results = [] kwds = {'percentile': percentile, 'window_width': 5} calc = [{'func': 'daily_perc', 'name': 'dp', 'kwds': kwds}] # # ops = OcgOperations(dataset=rd, calc=calc, # output_format='nc', # time_region={'year': [1980, 1990]} # ).execute() try: for key in datasets.keys(): try: result = calc( resource=datasets[key], calc=calc, # calc_grouping='year' ) LOGGER.debug('percentile based indice done for %s' % result) results.extend(result) except Exception as e: LOGGER.exception( "failed to calculate percentile based indice for %s: %s" % key, e) except Exception as e: LOGGER.exception("failed to calculate percentile indices: %s" % e) output_archive = archive(results) response.outputs['output_archive'].file = output_archive i = next((i for i, x in enumerate(results) if x), None) if i is None: i = "dummy.nc" response.outputs['ncout'].file = results[i] response.update_status("done", 100) return response
def clipping(resource=[], variable=None, dimension_map=None, calc=None, output_format='nc', calc_grouping=None, time_range=None, time_region=None, historical_concatination=True, prefix=None, spatial_wrapping='wrap', polygons=None, mosaic=False, dir_output=None, memory_limit=None): """ returns list of clipped netCDF files :param resource: list of input netCDF files :param variable: variable (string) to be used in netCDF :param dimesion_map: specify a dimension map if input netCDF has unconventional dimension :param calc: ocgis calculation argument :param calc_grouping: ocgis calculation grouping :param historical_concatination: concat files of RCPs with appropriate historical runs into one timeseries :param prefix: prefix for output file name :param polygons: list of polygons to be used. If more than 1 in the list, an appropriate mosaic will be clipped :param mosaic: Whether the polygons are aggregated into a single geometry (True) or individual files are created for each geometry (False). :param output_format: output_format (default='nc') :param dir_output: specify an output location :param time_range: [start, end] of time subset :param time_region: year, months or days to be extracted in the timeseries :returns list: path to clipped files """ if type(resource) != list: resource = list([resource]) if type(polygons) != list: polygons = list([polygons]) if prefix is not None: if type(prefix) != list: prefix = list([prefix]) geoms = set() ncs = sort_by_filename(resource, historical_concatination=historical_concatination ) # historical_concatenation=True geom_files = [] if mosaic is True: try: nameadd = '_' for polygon in polygons: geoms.add(get_geom(polygon)) nameadd = nameadd + polygon.replace(' ', '') if len(geoms) > 1: LOGGER.error( 'polygons belong to different shapefiles! mosaic option is not possible %s', geoms) else: geom = geoms.pop() ugids = get_ugid(polygons=polygons, geom=geom) except: LOGGER.exception('geom identification failed') for i, key in enumerate(ncs.keys()): try: # if variable is None: variable = get_variable(ncs[key]) LOGGER.info('variable %s detected in resource' % (variable)) if prefix is None: name = key + nameadd else: name = prefix[i] geom_file = call(resource=ncs[key], variable=variable, calc=calc, calc_grouping=calc_grouping, output_format=output_format, prefix=name, geom=geom, select_ugid=ugids, time_range=time_range, time_region=time_region, spatial_wrapping=spatial_wrapping, memory_limit=memory_limit, dir_output=dir_output, dimension_map=dimension_map) geom_files.append(geom_file) LOGGER.info('ocgis mosaik clipping done for %s ' % (key)) except: msg = 'ocgis mosaik clipping failed for %s ' % (key) LOGGER.exception(msg) else: for i, polygon in enumerate(polygons): try: geom = get_geom(polygon) ugid = get_ugid(polygons=polygon, geom=geom) for key in ncs.keys(): try: # if variable is None: variable = get_variable(ncs[key]) LOGGER.info('variable %s detected in resource' % (variable)) if prefix is None: name = key + '_' + polygon.replace(' ', '') else: name = prefix[i] geom_file = call( resource=ncs[key], variable=variable, calc=calc, calc_grouping=calc_grouping, output_format=output_format, prefix=name, geom=geom, select_ugid=ugid, dir_output=dir_output, dimension_map=dimension_map, spatial_wrapping=spatial_wrapping, memory_limit=memory_limit, time_range=time_range, time_region=time_region, ) geom_files.append(geom_file) LOGGER.info('ocgis clipping done for %s ' % (key)) except: msg = 'ocgis clipping failed for %s ' % (key) LOGGER.exception(msg) except: LOGGER.exception('geom identification failed') return geom_files
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' try: resources = archiveextract( resource=rename_complexinputs(request.inputs['resource'])) if 'region' in request.inputs: region = request.inputs['region'][0].data else: region = None if 'mosaic' in request.inputs: mosaic = request.inputs['mosaic'][0].data else: mosaic = False percentile = request.inputs['percentile'][0].data LOGGER.debug("mosaic %s " % mosaic) LOGGER.debug('percentile: %s' % percentile) LOGGER.debug('region %s' % region) LOGGER.debug('Nr of input files %s ' % len(resources)) except: LOGGER.exception('failed to read in the arguments') from flyingpigeon.utils import sort_by_filename from flyingpigeon.ocgis_module import call datasets = sort_by_filename(resources, historical_concatination=True) results = [] kwds = {'percentile': 90, 'window_width': 5} calc = [{'func': 'daily_perc', 'name': 'dp', 'kwds': kwds}] try: for key in datasets.keys(): try: if region is None: result = call( resource=datasets[key], output_format='nc', calc=calc, # prefix=key, # time_region={'year': [1995, 2000]} # calc_grouping='year' ) results.extend([result]) LOGGER.debug('percentile based indice done for %s' % result) else: result = clipping( resource=datasets[key], # variable=None, calc=calc, # calc_grouping=None, # time_range=None, # time_region=None, polygons=region, mosaic=mosaic) results.extend(result) except: LOGGER.exception( "failed to calculate percentil based indice for %s " % key) except: LOGGER.exception("failed to calculate percentile indices") tarf = archive(results) response.outputs['output_archive'].file = tarf i = next((i for i, x in enumerate(results) if x), None) if i is None: i = "dummy.nc" response.outputs['ncout'].file = results[i] # response.update_status("done", 100) response.update_status("done", 100) return response
def calc_indice_percentile(resources=[], variable=None, prefix=None, indices='TG90p', refperiod=None, groupings='yr', polygons=None, percentile=90, mosaic=False, dir_output=None, dimension_map=None): """ Calculates given indices for suitable files in the appropriate time grouping and polygon. :param resource: list of filenames in data reference syntax (DRS) convention (netcdf) :param variable: variable name to be selected in the in netcdf file (default=None) :param indices: list of indices (default ='TG90p') :param prefix: filename prefix :param refperiod: reference period tuple = (start,end) :param grouping: indices time aggregation (default='yr') :param dir_output: output directory for result file (netcdf) :param dimension_map: optional dimension map if different to standard (default=None) :return: list of netcdf files with calculated indices. Files are saved into out_dir. """ from os.path import join, dirname, exists from os import remove import uuid from numpy import ma from datetime import datetime as dt from flyingpigeon.ocgis_module import call from flyingpigeon.subset import clipping from flyingpigeon.utils import get_values, get_time if type(resources) != list: resources = list([resources]) if type(indices) != list: indices = list([indices]) if type(groupings) != list: groupings = list([groupings]) if type(refperiod) == list: refperiod = refperiod[0] if refperiod is None: start = dt.strptime(refperiod.split('-')[0], '%Y%m%d') end = dt.strptime(refperiod.split('-')[1], '%Y%m%d') time_range = [start, end] else: time_range = None if dir_output is None: if not exists(dir_output): makedirs(dir_output) ################################################ # Compute a custom percentile basis using ICCLIM ################################################ from ocgis.contrib import library_icclim as lic nc_indices = [] nc_dic = sort_by_filename(resources) for grouping in groupings: calc_group = calc_grouping(grouping) for key in nc_dic.keys(): resource = nc_dic[key] if variable is None: variable = get_variable(resource) if polygons is None: nc_reference = call(resource=resource, prefix=str(uuid.uuid4()), time_range=time_range, output_format='nc', dir_output=dir_output) else: nc_reference = clipping(resource=resource, prefix=str(uuid.uuid4()), time_range=time_range, output_format='nc', polygons=polygons, dir_output=dir_output, mosaic=mosaic) arr = get_values(resource=nc_reference) dt_arr = get_time(resource=nc_reference) arr = ma.masked_array(arr) dt_arr = ma.masked_array(dt_arr) percentile = percentile window_width = 5 for indice in indices: name = indice.replace('_', str(percentile)) var = indice.split('_')[0] operation = None if 'T' in var: if percentile >= 50: operation = 'Icclim%s90p' % var func = 'icclim_%s90p' % var # icclim_TG90p else: operation = 'Icclim%s10p' % var func = 'icclim_%s10p' % var ################################ # load the appropriate operation ################################ ops = [op for op in dir(lic) if operation in op] if len(ops) == 0: raise Exception("operator does not exist %s", operation) exec "percentile_dict = lic.%s.get_percentile_dict(arr, dt_arr, percentile, window_width)" % ops[ 0] calc = [{ 'func': func, 'name': name, 'kwds': { 'percentile_dict': percentile_dict } }] if polygons is None: nc_indices.extend( call(resource=resource, prefix=key.replace(variable, name).replace( '_day_', '_%s_' % grouping), calc=calc, calc_grouping=calc_group, output_format='nc', dir_output=dir_output)) else: nc_indices.extend( clipping( resource=resource, prefix=key.replace(variable, name).replace( '_day_', '_%s_' % grouping), calc=calc, calc_grouping=calc_group, output_format='nc', dir_output=dir_output, polygons=polygons, mosaic=mosaic, )) if len(nc_indices) is 0: logger.debug('No indices are calculated') return None return nc_indices
def calc_indice_percentile(resources=[], variable=None, prefix=None, indices='TG90p', refperiod=None, groupings='yr', polygons=None, percentile=90, mosaik = False, dir_output=None, dimension_map = None): """ Calculates given indices for suitable files in the appopriate time grouping and polygon. :param resource: list of filenames in drs convention (netcdf) :param variable: variable name to be selected in the in netcdf file (default=None) :param indices: list of indices (default ='TG90p') :param prefix: filename prefix :param refperiod: reference refperiod touple = (start,end) :param grouping: indices time aggregation (default='yr') :param dir_output: output directory for result file (netcdf) :param dimension_map: optional dimension map if different to standard (default=None) :return: list of netcdf files with calculated indices. Files are saved into out_dir """ from os.path import join, dirname, exists from os import remove import uuid from numpy import ma from datetime import datetime as dt from flyingpigeon.ocgis_module import call from flyingpigeon.subset import clipping from flyingpigeon.utils import get_values, get_time if type(resources) != list: resources = list([resources]) if type(indices) != list: indices = list([indices]) if type(groupings) != list: groupings = list([groupings]) if type(refperiod) == list: refperiod = refperiod[0] if refperiod != None: start = dt.strptime(refperiod.split('-')[0] , '%Y%m%d') end = dt.strptime(refperiod.split('-')[1] , '%Y%m%d') time_range = [start, end] else: time_range = None if dir_output != None: if not exists(dir_output): makedirs(dir_output) ######################################################################################################################## # Compute a custom percentile basis using ICCLIM. ###################################################################### ######################################################################################################################## from ocgis.contrib import library_icclim as lic nc_indices = [] nc_dic = sort_by_filename(resources) for grouping in groupings: calc_group = calc_grouping(grouping) for key in nc_dic.keys(): resource = nc_dic[key] if variable == None: variable = get_variable(resource) if polygons == None: nc_reference = call(resource=resource, prefix=str(uuid.uuid4()), time_range=time_range, output_format='nc', dir_output=dir_output) else: nc_reference = clipping(resource=resource, prefix=str(uuid.uuid4()), time_range=time_range, output_format='nc', polygons=polygons, dir_output=dir_output, mosaik = mosaik) arr = get_values(nc_files=nc_reference) dt_arr = get_time(nc_files=nc_reference) arr = ma.masked_array(arr) dt_arr = ma.masked_array(dt_arr) percentile = percentile window_width = 5 for indice in indices: name = indice.replace('_', str(percentile)) var = indice.split('_')[0] operation = None if 'T' in var: if percentile >= 50: operation = 'Icclim%s90p' % var func = 'icclim_%s90p' % var # icclim_TG90p else: operation = 'Icclim%s10p' % var func = 'icclim_%s10p' % var ################################ # load the appropriate operation ################################ ops = [op for op in dir(lic) if operation in op] if len(ops) == 0: raise Exception("operator does not exist %s", operation) exec "percentile_dict = lic.%s.get_percentile_dict(arr, dt_arr, percentile, window_width)" % ops[0] calc = [{'func': func, 'name': name, 'kwds': {'percentile_dict': percentile_dict}}] if polygons == None: nc_indices.append(call(resource=resource, prefix=key.replace(variable,name).replace('_day_', '_%s_' % grouping), calc=calc, calc_grouping=calc_group, output_format='nc', dir_output=dir_output)) else: nc_indices.extend(clipping(resource=resource, prefix=key.replace(variable,name).replace('_day_', '_%s_' % grouping), calc=calc, calc_grouping=calc_group, output_format='nc', dir_output=dir_output, polygons=polygons, mosaik = mosaik, )) return nc_indices
def calc_indice_unconventional(resource=[], variable=None, prefix=None, indices=None, polygons=None, groupings=None, dir_output=None, dimension_map = None): """ Calculates given indices for suitable files in the appopriate time grouping and polygon. :param resource: list of filenames in drs convention (netcdf) :param variable: variable name to be selected in the in netcdf file (default=None) :param indices: list of indices (default ='TGx') :param polygons: list of polgons (default =None) :param grouping: indices time aggregation (default='yr') :param out_dir: output directory for result file (netcdf) :param dimension_map: optional dimension map if different to standard (default=None) :return: list of netcdf files with calculated indices. Files are saved into dir_output """ from os.path import join, dirname, exists from os import remove import uuid from flyingpigeon import ocgis_module from flyingpigeon.subset import get_ugid, get_geom if type(resource) != list: resource = list([resource]) if type(indices) != list: indices = list([indices]) if type(polygons) != list and polygons != None: polygons = list([polygons]) elif polygons == None: polygons = [None] else: logger.error('Polygons not found') if type(groupings) != list: groupings = list([groupings]) if dir_output != None: if not exists(dir_output): makedirs(dir_output) experiments = sort_by_filename(resource) outputs = [] # print('environment for calc_indice_unconventional set') logger.info('environment for calc_indice_unconventional set') for key in experiments: if variable == None: variable = get_variable(experiments[key][0]) try: ncs = experiments[key] for indice in indices: logger.info('indice: %s' % indice) try: for grouping in groupings: logger.info('grouping: %s' % grouping) try: calc_group = calc_grouping(grouping) logger.info('calc_group: %s' % calc_group) for polygon in polygons: try: domain = key.split('_')[1].split('-')[0] if polygon == None: if prefix == None: prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ) geom = None ugid = None else: if prefix == None: prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ).replace(domain,polygon) geom = get_geom(polygon=polygon) ugid = get_ugid(polygons=polygon, geom=geom) if indice == 'TGx': calc=[{'func': 'max', 'name': 'TGx'}] tmp = ocgis_module.call(resource=ncs,# conform_units_to='celcius', variable=variable, dimension_map=dimension_map, calc=calc, calc_grouping=calc_group, prefix=prefix, dir_output=dir_output, geom=geom, select_ugid=ugid) elif indice == 'TGn': calc=[{'func': 'min', 'name': 'TGn'}] tmp = ocgis_module.call(resource=ncs, #conform_units_to='celcius', variable=variable, dimension_map=dimension_map, calc=calc, calc_grouping= calc_group, prefix=prefix, dir_output=dir_output, geom=geom, select_ugid = ugid) elif indice == 'TGx5day': calc = [{'func': 'moving_window', 'name': 'TGx5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}] tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius', variable=variable, dimension_map=dimension_map, calc=calc, prefix=str(uuid.uuid4()), geom=geom, select_ugid = ugid) calc=[{'func': 'max', 'name': 'TGx5day'}] logger.info('moving window calculated : %s' % tmp2) tmp = ocgis_module.call(resource=tmp2, variable=indice, dimension_map=dimension_map, calc=calc, calc_grouping=calc_group, prefix=prefix, dir_output=dir_output) remove(tmp2) elif indice == 'TGn5day': calc = [{'func': 'moving_window', 'name': 'TGn5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}] tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius', variable=variable, dimension_map=dimension_map, calc=calc, prefix=str(uuid.uuid4()), geom=geom, select_ugid = ugid) calc=[{'func': 'min', 'name': 'TGn5day'}] logger.info('moving window calculated : %s' % tmp2) tmp = ocgis_module.call(resource=tmp2, variable=indice, dimension_map=dimension_map, calc=calc, calc_grouping=calc_group, prefix=prefix, dir_output=dir_output) remove(tmp2) else: logger.error('Indice %s is not a known inidce' % (indice)) outputs.append(tmp) logger.info('indice file calcualted %s ' % (tmp)) except Exception as e: logger.exception('could not calc indice %s for key %s, polygon %s and calc_grouping %s : %s' % (indice, key, polygon, grouping, e )) except Exception as e: logger.exception('could not calc indice %s for key %s and calc_grouping %s : %s' % ( indice, key, polygon, e )) except Exception as e: logger.exception('could not calc indice %s for key %s: %s'% (indice, key, e )) except Exception as e: logger.exception('could not calc key %s: %s' % (key, e)) return outputs
def calc_indice_percentile(resources=[], variable=None, prefix=None, indices='TG90p', refperiod=None, groupings='yr', polygons=None, percentile=90, mosaic=False, dir_output=None, dimension_map=None): """ Calculates given indices for suitable files in the appropriate time grouping and polygon. :param resource: list of filenames in data reference syntax (DRS) convention (netcdf) :param variable: variable name to be selected in the in netcdf file (default=None) :param indices: list of indices (default ='TG90p') :param prefix: filename prefix :param refperiod: reference period tuple = (start,end) :param grouping: indices time aggregation (default='yr') :param dir_output: output directory for result file (netcdf) :param dimension_map: optional dimension map if different to standard (default=None) :return: list of netcdf files with calculated indices. Files are saved into out_dir. """ from os.path import join, dirname, exists from os import remove import uuid from numpy import ma from datetime import datetime as dt from flyingpigeon.ocgis_module import call from flyingpigeon.subset import clipping from flyingpigeon.utils import get_values, get_time if type(resources) != list: resources = list([resources]) if type(indices) != list: indices = list([indices]) if type(groupings) != list: groupings = list([groupings]) if type(refperiod) == list: refperiod = refperiod[0] if refperiod != None: start = dt.strptime(refperiod.split('-')[0], '%Y%m%d') end = dt.strptime(refperiod.split('-')[1], '%Y%m%d') time_range = [start, end] else: time_range = None if dir_output != None: if not exists(dir_output): makedirs(dir_output) ######################################################################################################################## # Compute a custom percentile basis using ICCLIM. ###################################################################### ######################################################################################################################## from ocgis.contrib import library_icclim as lic nc_indices = [] nc_dic = sort_by_filename(resources) for grouping in groupings: calc_group = calc_grouping(grouping) for key in nc_dic.keys(): resource = nc_dic[key] if variable == None: variable = get_variable(resource) if polygons == None: nc_reference = call(resource=resource, prefix=str(uuid.uuid4()), time_range=time_range, output_format='nc', dir_output=dir_output) else: nc_reference = clipping(resource=resource, prefix=str(uuid.uuid4()), time_range=time_range, output_format='nc', polygons=polygons, dir_output=dir_output, mosaic=mosaic) arr = get_values(resource=nc_reference) dt_arr = get_time(resource=nc_reference) arr = ma.masked_array(arr) dt_arr = ma.masked_array(dt_arr) percentile = percentile window_width = 5 for indice in indices: name = indice.replace('_', str(percentile)) var = indice.split('_')[0] operation = None if 'T' in var: if percentile >= 50: operation = 'Icclim%s90p' % var func = 'icclim_%s90p' % var # icclim_TG90p else: operation = 'Icclim%s10p' % var func = 'icclim_%s10p' % var ################################ # load the appropriate operation ################################ ops = [op for op in dir(lic) if operation in op] if len(ops) == 0: raise Exception("operator does not exist %s", operation) exec "percentile_dict = lic.%s.get_percentile_dict(arr, dt_arr, percentile, window_width)" % ops[ 0] calc = [{ 'func': func, 'name': name, 'kwds': { 'percentile_dict': percentile_dict } }] if polygons == None: nc_indices.append( call(resource=resource, prefix=key.replace(variable, name).replace( '_day_', '_%s_' % grouping), calc=calc, calc_grouping=calc_group, output_format='nc', dir_output=dir_output)) else: nc_indices.extend( clipping( resource=resource, prefix=key.replace(variable, name).replace( '_day_', '_%s_' % grouping), calc=calc, calc_grouping=calc_group, output_format='nc', dir_output=dir_output, polygons=polygons, mosaic=mosaic, )) return nc_indices #def calc_indice_unconventional(resource=[], variable=None, prefix=None, #indices=None, polygons=None, groupings=None, #dir_output=None, dimension_map = None): #""" #Calculates given indices for suitable files in the appropriate time grouping and polygon. #:param resource: list of filenames in data reference syntax (DRS) convention (netcdf) #:param variable: variable name to be selected in the in netcdf file (default=None) #:param indices: list of indices (default ='TGx') #:param polygons: list of polygons (default =None) #:param grouping: indices time aggregation (default='yr') #:param out_dir: output directory for result file (netcdf) #:param dimension_map: optional dimension map if different to standard (default=None) #:return: list of netcdf files with calculated indices. Files are saved into dir_output #""" #from os.path import join, dirname, exists #from os import remove #import uuid #from flyingpigeon import ocgis_module #from flyingpigeon.subset import get_ugid, get_geom #if type(resource) != list: #resource = list([resource]) #if type(indices) != list: #indices = list([indices]) #if type(polygons) != list and polygons != None: #polygons = list([polygons]) #elif polygons == None: #polygons = [None] #else: #logger.error('Polygons not found') #if type(groupings) != list: #groupings = list([groupings]) #if dir_output != None: #if not exists(dir_output): #makedirs(dir_output) #experiments = sort_by_filename(resource) #outputs = [] #print('environment for calc_indice_unconventional set') #logger.info('environment for calc_indice_unconventional set') #for key in experiments: #if variable == None: #variable = get_variable(experiments[key][0]) #try: #ncs = experiments[key] #for indice in indices: #logger.info('indice: %s' % indice) #try: #for grouping in groupings: #logger.info('grouping: %s' % grouping) #try: #calc_group = calc_grouping(grouping) #logger.info('calc_group: %s' % calc_group) #for polygon in polygons: #try: #domain = key.split('_')[1].split('-')[0] #if polygon == None: #if prefix == None: #prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ) #geom = None #ugid = None #else: #if prefix == None: #prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ).replace(domain,polygon) #geom = get_geom(polygon=polygon) #ugid = get_ugid(polygons=polygon, geom=geom) #if indice == 'TGx': #calc=[{'func': 'max', 'name': 'TGx'}] #tmp = ocgis_module.call(resource=ncs,# conform_units_to='celcius', #variable=variable, dimension_map=dimension_map, #calc=calc, calc_grouping=calc_group, prefix=prefix, #dir_output=dir_output, geom=geom, select_ugid=ugid) #elif indice == 'TGn': #calc=[{'func': 'min', 'name': 'TGn'}] #tmp = ocgis_module.call(resource=ncs, #conform_units_to='celcius', #variable=variable, dimension_map=dimension_map, #calc=calc, calc_grouping= calc_group, prefix=prefix, #dir_output=dir_output, geom=geom, select_ugid = ugid) #elif indice == 'TGx5day': #calc = [{'func': 'moving_window', 'name': 'TGx5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}] #tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius', #variable=variable, dimension_map=dimension_map, #calc=calc, prefix=str(uuid.uuid4()), #geom=geom, select_ugid = ugid) #calc=[{'func': 'max', 'name': 'TGx5day'}] #logger.info('moving window calculated : %s' % tmp2) #tmp = ocgis_module.call(resource=tmp2, #variable=indice, dimension_map=dimension_map, #calc=calc, calc_grouping=calc_group, prefix=prefix, #dir_output=dir_output) #remove(tmp2) #elif indice == 'TGn5day': #calc = [{'func': 'moving_window', 'name': 'TGn5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}] #tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius', #variable=variable, dimension_map=dimension_map, #calc=calc, prefix=str(uuid.uuid4()), #geom=geom, select_ugid = ugid) #calc=[{'func': 'min', 'name': 'TGn5day'}] #logger.info('moving window calculated : %s' % tmp2) #tmp = ocgis_module.call(resource=tmp2, #variable=indice, dimension_map=dimension_map, #calc=calc, calc_grouping=calc_group, prefix=prefix, #dir_output=dir_output) #remove(tmp2) #else: #logger.error('Indice %s is not a known inidce' % (indice)) #outputs.append(tmp) #logger.info('indice file calcualted %s ' % (tmp)) #except Exception as e: #logger.debug('could not calc indice %s for key %s, polygon %s and calc_grouping %s : %s' % (indice, key, polygon, grouping, e )) #except Exception as e: #logger.debug('could not calc indice %s for key %s and calc_grouping %s : %s' % ( indice, key, polygon, e )) #except Exception as e: #logger.debug('could not calc indice %s for key %s: %s'% (indice, key, e )) #except Exception as e: #logger.debug('could not calc key %s: %s' % (key, e)) #return outputs
def calc_indice_simple(resource=[], variable=None, prefix=None, indice='SU', polygons=None, mosaic=False, grouping='yr', dir_output=None, dimension_map=None, memory_limit=None): """ Calculates given simple indices for suitable files in the appropriate time grouping and polygon. :param resource: list of filenames in data reference syntax (DRS) convention (netcdf) :param variable: variable name to be selected in the in netcdf file (default=None) :param indices: Indice (default ='SU') :param polygons: list of polgons (default ='FRA') :param grouping: indices time aggregation (default='yr') :param out_dir: output directory for result file (netcdf) :param dimension_map: optional dimension map if different to standard (default=None) :return: list of netcdf files with calculated indices. Files are saved into out_dir. """ from os.path import join, dirname, exists from flyingpigeon import ocgis_module from flyingpigeon.subset import clipping import uuid if type(resource) != list: resource = list([resource]) # if type(indices) != list: # indices = list([indices]) if type(polygons) != list and polygons is None: polygons = list([polygons]) # if type(groupings) != list: # groupings = list([groupings]) if dir_output is not None: if not exists(dir_output): makedirs(dir_output) datasets = sort_by_filename(resource).keys() if len(datasets) is 1: key = datasets[0] else: LOGGER.warning('more than one dataset in resource') # from flyingpigeon.subset import select_ugid # tile_dim = 25 output = None # experiments = sort_by_filename(resource) outputs = [] # for key in experiments: if variable is None: variable = get_variable(resource) LOGGER.debug('Variable detected % s ' % variable) # variable = key.split('_')[0] try: # icclim can't handling 'kg m2 sec' needs to be 'mm/day' if variable == 'pr': calc = 'pr=pr*86400' ncs = ocgis_module.call(resource=resource, variable=variable, dimension_map=dimension_map, calc=calc, memory_limit=memory_limit, # calc_grouping= calc_group, prefix=str(uuid.uuid4()), dir_output=dir_output, output_format='nc') else: ncs = resource try: calc = [{'func': 'icclim_' + indice, 'name': indice}] LOGGER.info('calc: %s' % calc) try: calc_group = calc_grouping(grouping) LOGGER.info('calc_group: %s' % calc_group) if polygons is None: try: prefix = key.replace(variable, indice).replace('_day_', '_%s_' % grouping) LOGGER.debug(' **** dir_output = %s ' % dir_output) tmp = ocgis_module.call(resource=ncs, variable=variable, dimension_map=dimension_map, calc=calc, calc_grouping=calc_group, prefix=prefix, dir_output=dir_output, output_format='nc') if len(tmp) is not 0: outputs.extend(tmp) else: msg = 'could not calc indice %s for domain ' % (indice) LOGGER.exception(msg) except: msg = 'could not calc indice %s for domain in %s' % (indice) LOGGER.exception(msg) else: try: prefix = key.replace(variable, indice).replace('_day_', '_%s_' % grouping) tmp = clipping(resource=ncs, variable=variable, dimension_map=dimension_map, calc=calc, calc_grouping=calc_group, prefix=prefix, polygons=polygons, mosaic=mosaic, dir_output=dir_output, output_format='nc') if len(tmp) is not 0: outputs.extend(tmp) else: msg = 'could not calc clipped indice %s ' % (indice) LOGGER.exception(msg) except: msg = 'could not calc indice %s for domai' % (indice) LOGGER.debug(msg) # raise Exception(msg) LOGGER.info('indice file calculated: %s' % tmp) except: msg = 'could not calc indice %s for key %s and grouping %s' % (indice, grouping) LOGGER.exception(msg) # raise Exception(msg) except: msg = 'could not calc indice %s ' % (indice) LOGGER.exception(msg) # raise Exception(msg) except: msg = 'could not calculate indices' LOGGER.exception(msg) # raise Exception(msg) LOGGER.info('indice outputs %s ' % outputs) if len(outputs) is 0: LOGGER.debug('No indices are calculated') return None return outputs
def get_indices(resource, indices): """ calculating indices (netCDF files) defined in _SDMINDICES_ :param resources: files containing one Dataset :param indices: List of indices defined in _SDMINDICES_. Index needs to be based on the resource variable :return list: list of filepathes to netCDF files """ from flyingpigeon.utils import sort_by_filename, calc_grouping, drs_filename, unrotate_pole, get_variable # from flyingpigeon.ocgis_module import call from flyingpigeon.indices import indice_variable, calc_indice_simple from flyingpigeon.subset import masking from flyingpigeon.utils import searchfile from flyingpigeon.utils import search_landsea_mask_by_esgf from os.path import basename # names = [drs_filename(nc, skip_timestamp=False, skip_format=False, # variable=None, rename_file=True, add_file_path=True) for nc in resources] variable = get_variable(resource) masked_datasets = [] max_count = len(resource) for ds in resource: ds_name = basename(ds) LOGGER.debug('masking dataset: %s', ds_name) try: landsea_mask = search_landsea_mask_by_esgf(ds) LOGGER.debug("using landsea_mask: %s", landsea_mask) prefix = ds_name.replace('.nc', '') new_ds = masking(ds, landsea_mask, land_area=True, prefix=prefix) masked_datasets.append(new_ds) except: LOGGER.exception("Could not subset dataset.") break else: LOGGER.info("masked: %d/%d", len(masked_datasets), max_count) if not masked_datasets: raise Exception("Could not mask input files.") ncs = sort_by_filename(masked_datasets, historical_concatination=True) key = ncs.keys()[0] ncs_indices = [] LOGGER.info('resources sorted found %s datasets', len(ncs.keys())) for indice in indices: try: name, month = indice.split('_') # print name, month , variable if variable == indice_variable(name): LOGGER.info('calculating indice %s ' % indice) prefix = key.replace(variable, name).replace('_day_', '_%s_' % month) nc = calc_indice_simple(resource=resource, variable=variable, # polygons=['Europe', 'Africa', 'Asia', 'North America', 'Oceania', # 'South America', 'Antarctica'], # mosaic=True, prefix=prefix, indice=name, grouping=month) if nc is not None: # coords = unrotate_pole(nc[0], write_to_file=True) ncs_indices.append(nc[0]) LOGGER.info('Successful calculated indice %s %s' % (key, indice)) else: msg = 'failed to calculate indice %s %s' % (key, indice) LOGGER.exception(msg) except: msg = 'failed to calculate indice %s %s' % (key, indice) LOGGER.exception(msg) return ncs_indices
def uncertainty(resouces, variable=None, ylim=None, title=None, file_extension='png', window=None): """ creates a png file containing the appropriate uncertainty plot. :param resouces: list of files containing the same variable :param variable: variable to be visualised. If None (default), variable will be detected :param title: string to be used as title :param window: windowsize of the rolling mean :returns str: path/to/file.png """ LOGGER.debug('Start visualisation uncertainty plot') import pandas as pd import numpy as np from os.path import basename from flyingpigeon.utils import get_time, sort_by_filename from flyingpigeon.calculation import fieldmean from flyingpigeon.metadata import get_frequency # === prepare invironment if type(resouces) == str: resouces = list([resouces]) if variable is None: variable = utils.get_variable(resouces[0]) if title is None: title = "Field mean of %s " % variable try: fig = plt.figure(figsize=(20, 10), facecolor='w', edgecolor='k') # dpi=600, # variable = utils.get_variable(resouces[0]) df = pd.DataFrame() LOGGER.info('variable %s found in resources.' % variable) datasets = sort_by_filename(resouces, historical_concatination=True) for key in datasets.keys(): try: data = fieldmean(datasets[key]) # get_values(f) ts = get_time(datasets[key]) ds = pd.Series(data=data, index=ts, name=key) # ds_yr = ds.resample('12M', ).mean() # yearly mean loffset='6M' df[key] = ds except Exception: LOGGER.exception('failed to calculate timeseries for %s ' % (key)) frq = get_frequency(resouces[0]) print frq if window is None: if frq == 'day': window = 10951 elif frq == 'man': window = 359 elif frq == 'sem': window = 119 elif frq == 'yr': window = 30 else: LOGGER.debug('frequency %s is not included' % frq) window = 30 if len(df.index.values) >= window * 2: # TODO: calculate windowsize according to timestapms (day,mon,yr ... with get_frequency) df_smooth = df.rolling(window=window, center=True).mean() LOGGER.info('rolling mean calculated for all input data') else: df_smooth = df LOGGER.debug('timeseries too short for moving mean') fig.text(0.95, 0.05, '!!! timeseries too short for moving mean over 30years !!!', fontsize=20, color='red', ha='right', va='bottom', alpha=0.5) try: rmean = df_smooth.quantile([0.5], axis=1,) # df_smooth.median(axis=1) # skipna=False quantile([0.5], axis=1, numeric_only=False ) q05 = df_smooth.quantile([0.10], axis=1,) # numeric_only=False) q33 = df_smooth.quantile([0.33], axis=1,) # numeric_only=False) q66 = df_smooth.quantile([0.66], axis=1, ) # numeric_only=False) q95 = df_smooth.quantile([0.90], axis=1, ) # numeric_only=False) LOGGER.info('quantile calculated for all input data') except Exception: LOGGER.exception('failed to calculate quantiles') try: plt.fill_between(df_smooth.index.values, np.squeeze(q05.values), np.squeeze(q95.values), alpha=0.5, color='grey') plt.fill_between(df_smooth.index.values, np.squeeze(q33.values), np.squeeze(q66.values), alpha=0.5, color='grey') plt.plot(df_smooth.index.values, np.squeeze(rmean.values), c='r', lw=3) plt.xlim(min(df.index.values), max(df.index.values)) plt.ylim(ylim) plt.title(title, fontsize=20) plt.grid() # .grid_line_alpha=0.3 output_png = fig2plot(fig=fig, file_extension=file_extension) plt.close() LOGGER.debug('timeseries uncertainty plot done for %s' % variable) except Exception as err: raise Exception('failed to calculate quantiles. %s' % err.message) except Exception: LOGGER.exception('uncertainty plot failed for %s.' % variable) _, output_png = mkstemp(dir='.', suffix='.png') return output_png
from os import listdir from os.path import join from flyingpigeon import utils from flyingpigeon import metadata as md from pandas import DataFrame from flyingpigeon import calculation as cal p = '/home/nils/data/AFR-44/tas/' ncs = [ join(p, nc) for nc in listdir(p) if not 'tas_AFR-44_MOHC-HadGEM2-ES_historical_r1i1p1_KNMI-RACMO22T_v2_day' in nc ] ncs_dic = utils.sort_by_filename(ncs) ts = utils.get_time(ncs_dic[ncs_dic.keys()[0]]) data = cal.fieldmean(ncs_dic[ncs_dic.keys()[0]])
def execute(self): logger.info('Start process') try: logger.info('read in the arguments') resources = self.getInputValues(identifier='resources') method = self.getInputValues(identifier='method') time_region = self.getInputValues(identifier='time_region')[0] bbox = self.getInputValues(identifier='BBox')[0] logger.info('bbox %s' % str(bbox)) logger.info('time_region %s' % str(time_region)) logger.info('method: %s' % str(method)) except Exception as e: logger.error('failed to read in the arguments %s ' % e) #bbox = '-80,22.5,50,70' logger.info('bbox is set to %s' % bbox) ##################### ### get the required bbox from resource ##################### # from flyingpigeon.ocgis_module import call from flyingpigeon.utils import sort_by_filename, get_time # , calc_grouping from flyingpigeon import weatherclass as wc from flyingpigeon.visualisation import plot_tSNE, plot_kMEAN, concat_images, plot_pressuremap from datetime import datetime as dt from numpy import savetxt, column_stack import tarfile from cdo import * cdo = Cdo() # grouping = calc_grouping(time_region) ncs = sort_by_filename(resources, historical_concatination=True) png_clusters = [] txt_info = [] png_pressuremaps = [] try: # open tar files tar_info = tarfile.open('info.tar', "w") logger.info('tar files prepared') except: msg = 'tar file preparation failed' logger.exception(msg) raise Exception(msg) for key in ncs.keys(): if len(ncs[key])>1: input = cdo.timmerge(input=ncs[key], output='merge.nc' ) elif len(ncs[key])==1: input = ncs[key] else: logger.debug('invalid number of input files for dataset %s' % key) #for tr in time_region: if not time_region == 'None': nc_grouped = cdo.selmon(time_region, input=input, output='grouped.nc') else: nc_grouped = input # for bb in bbox: nc = cdo.sellonlatbox('%s' % bbox, input=nc_grouped, output='subset.nc') logger.info('nc subset: %s ' % nc) try: vals, pca = wc.get_pca(nc) logger.info('PCa calculated') except: logger.debug('failed to calculate PCs') raise for md in method: try: if md == 'tSNE': data = wc.calc_tSNE(pca) png_clusters.append(plot_tSNE(data,title='tSNE month: %s [lonlat: %s]' % (time_region,bbox), sub_title='file: %s' % key)) logger.info('tSNE calculated for %s ' % key) if md == 'kMEAN': kmeans = wc.calc_kMEAN(pca) c = kmeans.predict(pca) times = get_time(nc) timestr = [dt.strftime(t, format='%Y-%d-%m_%H:%M:%S') for t in times] tc = column_stack([timestr, c]) fn = '%s.txt' % key savetxt(fn, tc, fmt='%s', header='Date_Time WeatherRegime') tar_info.add(fn) #, arcname = basename(nc) png_clusters.append(plot_kMEAN(kmeans, pca, title='kMEAN month: %s [lonlat: %s]' % (time_region,bbox), sub_title='file: %s' % key)) logger.info('kMEAN calculated for %s ' % key) subplots = [] for i in range(4): subplots.append(plot_pressuremap((vals[c==i]/100), title='Weather Regime %s: Month %s ' % (i, time_region), sub_title='file: %s' % key)) from PIL import Image import sys from tempfile import mkstemp open_subplots = map(Image.open, subplots) w = max(i.size[0] for i in open_subplots) h = max(i.size[1] for i in open_subplots) result = Image.new("RGB", (w*2, h*2)) # p = h / len(open_subplots) c = 0 for i ,iw in enumerate([0,w]): for j, jh in enumerate([0,h]): oi = open_subplots[c] c = c +1 cw = oi.size[0] ch = oi.size[1] box = [iw,jh,iw+cw,jh+ch] result.paste(oi, box=box) ip, pressuremap = mkstemp(dir='.',suffix='.png') result.save(pressuremap) png_pressuremaps.append(pressuremap) except: logger.debug('faild to calculate cluster for %s' % key ) raise c_clusters = concat_images(png_clusters) c_maps = concat_images(png_pressuremaps) try: tar_info.close() logger.info('tar files closed') except Exception as e: logger.exception('tar file closing failed') # call # self.output_nc.setValue( nc ) self.output_clusters.setValue( c_clusters ) self.output_maps.setValue( c_maps ) self.output_info.setValue('info.tar')
def calc_indice_percentile(resources=[], variable=None, prefix=None, indices='TG90p', refperiod=None, groupings='yr', polygons=None, percentile=90, mosaic = False, dir_output=None, dimension_map = None): """ Calculates given indices for suitable files in the appropriate time grouping and polygon. :param resource: list of filenames in data reference syntax (DRS) convention (netcdf) :param variable: variable name to be selected in the in netcdf file (default=None) :param indices: list of indices (default ='TG90p') :param prefix: filename prefix :param refperiod: reference period tuple = (start,end) :param grouping: indices time aggregation (default='yr') :param dir_output: output directory for result file (netcdf) :param dimension_map: optional dimension map if different to standard (default=None) :return: list of netcdf files with calculated indices. Files are saved into out_dir. """ from os.path import join, dirname, exists from os import remove import uuid from numpy import ma from datetime import datetime as dt from flyingpigeon.ocgis_module import call from flyingpigeon.subset import clipping from flyingpigeon.utils import get_values, get_time if type(resources) != list: resources = list([resources]) if type(indices) != list: indices = list([indices]) if type(groupings) != list: groupings = list([groupings]) if type(refperiod) == list: refperiod = refperiod[0] if refperiod != None: start = dt.strptime(refperiod.split('-')[0] , '%Y%m%d') end = dt.strptime(refperiod.split('-')[1] , '%Y%m%d') time_range = [start, end] else: time_range = None if dir_output != None: if not exists(dir_output): makedirs(dir_output) ######################################################################################################################## # Compute a custom percentile basis using ICCLIM. ###################################################################### ######################################################################################################################## from ocgis.contrib import library_icclim as lic nc_indices = [] nc_dic = sort_by_filename(resources) for grouping in groupings: calc_group = calc_grouping(grouping) for key in nc_dic.keys(): resource = nc_dic[key] if variable == None: variable = get_variable(resource) if polygons == None: nc_reference = call(resource=resource, prefix=str(uuid.uuid4()), time_range=time_range, output_format='nc', dir_output=dir_output) else: nc_reference = clipping(resource=resource, prefix=str(uuid.uuid4()), time_range=time_range, output_format='nc', polygons=polygons, dir_output=dir_output, mosaic = mosaic) arr = get_values(resource=nc_reference) dt_arr = get_time(resource=nc_reference) arr = ma.masked_array(arr) dt_arr = ma.masked_array(dt_arr) percentile = percentile window_width = 5 for indice in indices: name = indice.replace('_', str(percentile)) var = indice.split('_')[0] operation = None if 'T' in var: if percentile >= 50: operation = 'Icclim%s90p' % var func = 'icclim_%s90p' % var # icclim_TG90p else: operation = 'Icclim%s10p' % var func = 'icclim_%s10p' % var ################################ # load the appropriate operation ################################ ops = [op for op in dir(lic) if operation in op] if len(ops) == 0: raise Exception("operator does not exist %s", operation) exec "percentile_dict = lic.%s.get_percentile_dict(arr, dt_arr, percentile, window_width)" % ops[0] calc = [{'func': func, 'name': name, 'kwds': {'percentile_dict': percentile_dict}}] if polygons == None: nc_indices.append(call(resource=resource, prefix=key.replace(variable,name).replace('_day_', '_%s_' % grouping), calc=calc, calc_grouping=calc_group, output_format='nc', dir_output=dir_output)) else: nc_indices.extend(clipping(resource=resource, prefix=key.replace(variable,name).replace('_day_', '_%s_' % grouping), calc=calc, calc_grouping=calc_group, output_format='nc', dir_output=dir_output, polygons=polygons, mosaic = mosaic, )) return nc_indices #def calc_indice_unconventional(resource=[], variable=None, prefix=None, #indices=None, polygons=None, groupings=None, #dir_output=None, dimension_map = None): #""" #Calculates given indices for suitable files in the appropriate time grouping and polygon. #:param resource: list of filenames in data reference syntax (DRS) convention (netcdf) #:param variable: variable name to be selected in the in netcdf file (default=None) #:param indices: list of indices (default ='TGx') #:param polygons: list of polygons (default =None) #:param grouping: indices time aggregation (default='yr') #:param out_dir: output directory for result file (netcdf) #:param dimension_map: optional dimension map if different to standard (default=None) #:return: list of netcdf files with calculated indices. Files are saved into dir_output #""" #from os.path import join, dirname, exists #from os import remove #import uuid #from flyingpigeon import ocgis_module #from flyingpigeon.subset import get_ugid, get_geom #if type(resource) != list: #resource = list([resource]) #if type(indices) != list: #indices = list([indices]) #if type(polygons) != list and polygons != None: #polygons = list([polygons]) #elif polygons == None: #polygons = [None] #else: #logger.error('Polygons not found') #if type(groupings) != list: #groupings = list([groupings]) #if dir_output != None: #if not exists(dir_output): #makedirs(dir_output) #experiments = sort_by_filename(resource) #outputs = [] #print('environment for calc_indice_unconventional set') #logger.info('environment for calc_indice_unconventional set') #for key in experiments: #if variable == None: #variable = get_variable(experiments[key][0]) #try: #ncs = experiments[key] #for indice in indices: #logger.info('indice: %s' % indice) #try: #for grouping in groupings: #logger.info('grouping: %s' % grouping) #try: #calc_group = calc_grouping(grouping) #logger.info('calc_group: %s' % calc_group) #for polygon in polygons: #try: #domain = key.split('_')[1].split('-')[0] #if polygon == None: #if prefix == None: #prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ) #geom = None #ugid = None #else: #if prefix == None: #prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ).replace(domain,polygon) #geom = get_geom(polygon=polygon) #ugid = get_ugid(polygons=polygon, geom=geom) #if indice == 'TGx': #calc=[{'func': 'max', 'name': 'TGx'}] #tmp = ocgis_module.call(resource=ncs,# conform_units_to='celcius', #variable=variable, dimension_map=dimension_map, #calc=calc, calc_grouping=calc_group, prefix=prefix, #dir_output=dir_output, geom=geom, select_ugid=ugid) #elif indice == 'TGn': #calc=[{'func': 'min', 'name': 'TGn'}] #tmp = ocgis_module.call(resource=ncs, #conform_units_to='celcius', #variable=variable, dimension_map=dimension_map, #calc=calc, calc_grouping= calc_group, prefix=prefix, #dir_output=dir_output, geom=geom, select_ugid = ugid) #elif indice == 'TGx5day': #calc = [{'func': 'moving_window', 'name': 'TGx5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}] #tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius', #variable=variable, dimension_map=dimension_map, #calc=calc, prefix=str(uuid.uuid4()), #geom=geom, select_ugid = ugid) #calc=[{'func': 'max', 'name': 'TGx5day'}] #logger.info('moving window calculated : %s' % tmp2) #tmp = ocgis_module.call(resource=tmp2, #variable=indice, dimension_map=dimension_map, #calc=calc, calc_grouping=calc_group, prefix=prefix, #dir_output=dir_output) #remove(tmp2) #elif indice == 'TGn5day': #calc = [{'func': 'moving_window', 'name': 'TGn5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}] #tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius', #variable=variable, dimension_map=dimension_map, #calc=calc, prefix=str(uuid.uuid4()), #geom=geom, select_ugid = ugid) #calc=[{'func': 'min', 'name': 'TGn5day'}] #logger.info('moving window calculated : %s' % tmp2) #tmp = ocgis_module.call(resource=tmp2, #variable=indice, dimension_map=dimension_map, #calc=calc, calc_grouping=calc_group, prefix=prefix, #dir_output=dir_output) #remove(tmp2) #else: #logger.error('Indice %s is not a known inidce' % (indice)) #outputs.append(tmp) #logger.info('indice file calcualted %s ' % (tmp)) #except Exception as e: #logger.debug('could not calc indice %s for key %s, polygon %s and calc_grouping %s : %s' % (indice, key, polygon, grouping, e )) #except Exception as e: #logger.debug('could not calc indice %s for key %s and calc_grouping %s : %s' % ( indice, key, polygon, e )) #except Exception as e: #logger.debug('could not calc indice %s for key %s: %s'% (indice, key, e )) #except Exception as e: #logger.debug('could not calc key %s: %s' % (key, e)) #return outputs
def method_A(resource=[], start=None, end=None, timeslice=20, variable=None, title=None, cmap='seismic'): """returns the result :param resource: list of paths to netCDF files :param start: beginning of reference period (if None (default), the first year of the consistent ensemble will be detected) :param end: end of comparison period (if None (default), the last year of the consistent ensemble will be detected) :param timeslice: period length for mean calculation of reference and comparison period :param variable: OBSOLETE :param title: str to be used as title for the signal mal :param cmap: define the color scheme for signal map plotting :return: signal.nc, low_agreement_mask.nc, high_agreement_mask.nc, text.txt, # graphic.png, """ from os.path import split from tempfile import mkstemp from cdo import Cdo cdo = Cdo() cdo.forceOutput = True # preparing the resource try: file_dic = sort_by_filename(resource, historical_concatination=True) LOGGER.info('file names sorted experimets: %s' % len(file_dic.keys())) except: msg = 'failed to sort the input files' LOGGER.exception(msg) # check that all datasets contains the same variable try: var_name = set() for key in file_dic.keys(): var_name = var_name.union([get_variable(file_dic[key])]) LOGGER.debug(var_name) except: LOGGER.exception('failed to get the variable in common') if len(var_name) == 1: variable = [str(n) for n in var_name][0] LOGGER.info('varible %s detected in all members of the ensemble' % variable) else: raise Exception( 'none or more than one variables are found in the ensemble members' ) # TODO: drop missfitting grids # timemerge for seperate datasets try: mergefiles = [] for key in file_dic.keys(): # if variable is None: # variable = get_variable(file_dic[key]) # LOGGER.info('variable detected %s ' % variable) try: if type(file_dic[key]) == list and len(file_dic[key]) > 1: _, nc_merge = mkstemp(dir='.', suffix='.nc') mergefiles.append( cdo.mergetime(input=file_dic[key], output=nc_merge)) else: mergefiles.extend(file_dic[key]) except: LOGGER.exception('failed to merge files for %s ' % key) LOGGER.info('datasets merged %s ' % mergefiles) except: msg = 'seltime and mergetime failed' LOGGER.exception(msg) # dataset documentation try: text_src = open('infiles.txt', 'a') for key in file_dic.keys(): text_src.write(key + '\n') text_src.close() except: msg = 'failed to write source textfile' LOGGER.exception(msg) _, text_src = mkstemp(dir='.', suffix='.txt') # configure reference and compare period # TODO: filter files by time try: if start is None: st_set = set() en_set = set() for f in mergefiles: times = get_time(f) st_set.update([times[0].year]) if end is None: en_set.update([times[-1].year]) start = max(st_set) if end is None: end = min(en_set) LOGGER.info('Start and End: %s - %s ' % (start, end)) if start >= end: LOGGER.error( 'ensemble is inconsistent!!! start year is later than end year' ) except: msg = 'failed to detect start and end times of the ensemble' LOGGER.exception(msg) # set the periodes: try: LOGGER.debug(type(start)) # start = int(start) # end = int(end) if timeslice is None: timeslice = int((end - start) / 3) if timeslice == 0: timeslice = 1 else: timeslice = int(timeslice) start1 = start start2 = start1 + timeslice - 1 end1 = end - timeslice + 1 end2 = end LOGGER.info('timeslice and periodes set') except: msg = 'failed to set the periodes' LOGGER.exception(msg) try: files = [] for i, mf in enumerate(mergefiles): files.append( cdo.selyear('{0}/{1}'.format(start1, end2), input=[mf.replace(' ', '\ ')], output='file_{0}_.nc'.format(i))) # python version LOGGER.info('timeseries selected from defined start to end year') except: msg = 'seltime and mergetime failed' LOGGER.exception(msg) try: # ensemble mean nc_ensmean = cdo.ensmean(input=files, output='nc_ensmean.nc') LOGGER.info('ensemble mean calculation done') except: msg = 'ensemble mean failed' LOGGER.exception(msg) try: # ensemble std nc_ensstd = cdo.ensstd(input=files, output='nc_ensstd.nc') LOGGER.info('ensemble std and calculation done') except: msg = 'ensemble std or failed' LOGGER.exception(msg) # get the get the signal as difference from the beginning (first years) and end period (last years), : try: selyearstart = cdo.selyear('%s/%s' % (start1, start2), input=nc_ensmean, output='selyearstart.nc') selyearend = cdo.selyear('%s/%s' % (end1, end2), input=nc_ensmean, output='selyearend.nc') meanyearst = cdo.timmean(input=selyearstart, output='meanyearst.nc') meanyearend = cdo.timmean(input=selyearend, output='meanyearend.nc') signal = cdo.sub(input=[meanyearend, meanyearst], output='signal.nc') LOGGER.info('Signal calculation done') except: msg = 'calculation of signal failed' LOGGER.exception(msg) _, signal = mkstemp(dir='.', suffix='.nc') # get the intermodel standard deviation (mean over whole period) try: # std_selyear = cdo.selyear('%s/%s' % (end1,end2), input=nc_ensstd, output='std_selyear.nc') # std = cdo.timmean(input = std_selyear, output = 'std.nc') std = cdo.timmean(input=nc_ensstd, output='std.nc') std2 = cdo.mulc('2', input=std, output='std2.nc') LOGGER.info('calculation of internal model std for time period done') except: msg = 'calculation of internal model std failed' LOGGER.exception(msg) try: absolut = cdo.abs(input=signal, output='absolut_signal.nc') high_agreement_mask = cdo.gt( input=[absolut, std2], output='large_change_with_high_model_agreement.nc') low_agreement_mask = cdo.lt( input=[absolut, std], output='small_signal_or_low_agreement_of_models.nc') LOGGER.info('high and low mask done') except: msg = 'calculation of robustness mask failed' LOGGER.exception(msg) _, high_agreement_mask = mkstemp(dir='.', suffix='.nc') _, low_agreement_mask = mkstemp(dir='.', suffix='.nc') return signal, low_agreement_mask, high_agreement_mask, text_src
def get_segetalflora( resource=[], dir_output=".", culture_type="fallow", climate_type=2, region=None, dimension_map=None ): """productive worker for segetalflora jobs :param resources: list of tas netCDF files. (Any time aggregation is possible) :param culture_type: Type of culture. Possible values are: 'fallow', 'intensive', 'extensive' (default:'fallow') :param climate_type: Type of climate: number 1 to 7 or 'all' (default: 2) :param region: Region for subset. If 'None' (default), the values will be calculated for Europe """ from flyingpigeon.subset import clipping from flyingpigeon.utils import calc_grouping, sort_by_filename import os from os import remove from tempfile import mkstemp from ocgis import RequestDataset, OcgOperations from cdo import Cdo cdo = Cdo() if not os.path.exists(dir_output): os.makedirs(dir_output) os.chdir(dir_output) # outputs = [] if region == None: region = "Europe" if not type(culture_type) == list: culture_type = list([culture_type]) if not type(climate_type) == list: climate_type = list([climate_type]) ncs = sort_by_filename(resource) print "%s experiments found" % (len(ncs)) print "keys: %s " % (ncs.keys()) # generate outfolder structure: dir_netCDF = "netCDF" dir_ascii = "ascii" dir_netCDF_tas = dir_netCDF + "/tas" dir_ascii_tas = dir_ascii + "/tas" if not os.path.exists(dir_netCDF): os.makedirs(dir_netCDF) if not os.path.exists(dir_ascii): os.makedirs(dir_ascii) if not os.path.exists(dir_netCDF_tas): os.makedirs(dir_netCDF_tas) if not os.path.exists(dir_ascii_tas): os.makedirs(dir_ascii_tas) tas_files = [] for key in ncs.keys(): try: print "process %s" % (key) calc = [{"func": "mean", "name": "tas"}] calc_group = calc_grouping("yr") prefix = key.replace(key.split("_")[7], "yr") if not os.path.exists(os.path.join(dir_netCDF_tas, prefix + ".nc")): nc_tas = clipping( resource=ncs[key], variable="tas", calc=calc, dimension_map=dimension_map, calc_grouping=calc_group, prefix=prefix, polygons="Europe", dir_output=dir_netCDF_tas, )[0] print "clipping done for %s" % (key) if os.path.exists(os.path.join(dir_netCDF_tas, prefix + ".nc")): tas_files.append(prefix) else: print "clipping failed for %s: No output file exists" % (key) else: print "netCDF file already exists %s" % (key) nc_tas = os.path.join(dir_netCDF_tas, prefix + ".nc") except Exception as e: print "clipping failed for %s: %s" % (key, e) try: asc_tas = os.path.join(dir_ascii_tas, prefix + ".asc") if not os.path.exists(asc_tas): f, tmp = mkstemp(dir=os.curdir, suffix=".asc") tmp = tmp.replace(os.path.abspath(os.curdir), ".") # cdo.outputtab('name,date,lon,lat,value', input = nc_tas , output = tmp) cmd = "cdo outputtab,name,date,lon,lat,value %s > %s" % (nc_tas, tmp) print cmd os.system(cmd) print ("tanslation to ascii done") remove_rows(tmp, asc_tas) remove(tmp) print ("rows with missing values removed") else: print ("tas ascii already exists") plot_ascii(asc_tas) except Exception as e: print "translation to ascii failed %s: %s" % (key, e) if os.path.exists(tmp): remove(tmp) tas_files = [os.path.join(dir_netCDF_tas, nc) for nc in os.listdir(dir_netCDF_tas)] outputs = [] for name in tas_files: for cult in culture_type: for climat in climate_type: try: calc = get_equation(culture_type=cult, climate_type=climat) if type(calc) != None: try: var = "sf%s%s" % (cult, climat) prefix = os.path.basename(name).replace("tas", var).strip(".nc") infile = name # os.path.join(dir_netCDF_tas,name+'.nc') dir_sf = os.path.join(dir_netCDF, var) if not os.path.exists(dir_sf): os.makedirs(dir_sf) if os.path.exists(os.path.join(dir_sf, prefix + ".nc")): nc_sf = os.path.join(dir_sf, prefix + ".nc") print "netCDF file already exists: %s %s " % (dir_sf, prefix) else: rd = RequestDataset(name, variable="tas", dimension_map=dimension_map) op = OcgOperations( dataset=rd, calc=calc, prefix=prefix, output_format="nc", dir_output=dir_sf, add_auxiliary_files=False, ) nc_sf = op.execute() print "segetalflora done for %s" % (prefix) outputs.append(prefix) dir_ascii_sf = os.path.join(dir_ascii, var) if not os.path.exists(dir_ascii_sf): os.makedirs(dir_ascii_sf) asc_sf = os.path.join(dir_ascii_sf, prefix + ".asc") if not os.path.exists(asc_sf): f, tmp = mkstemp(dir=os.curdir, suffix=".asc") tmp = tmp.replace(os.path.abspath(os.curdir), ".") # cdo.outputtab('name,date,lon,lat,value', input = nc_sf , output = tmp) cmd = "cdo outputtab,name,date,lon,lat,value %s > %s" % (nc_sf, tmp) os.system(cmd) print ("translation to ascii done") remove_rows(tmp, asc_sf) remove(tmp) print ("rows with missing values removed") else: print "ascii file already exists" plot_ascii(asc_sf) except Exception as e: print "failed for ascii file: %s %s " % (name, e) if os.path.exists(tmp): remove(tmp) else: print "NO EQUATION found for %s %s " % (cult, climat) except Exception as e: print "Segetal flora failed: %s" % (e) return outputs
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' try: resources = archiveextract( resource=rename_complexinputs(request.inputs['resource'])) indices = [inpt.data for inpt in request.inputs['indices']] grouping = [inpt.data for inpt in request.inputs['grouping']] if 'mosaic' in request.inputs: mosaic = request.inputs['mosaic'][0].data else: mosaic = False if 'region' in request.inputs: region = [inpt.data for inpt in request.inputs['region']] else: region = None LOGGER.debug('grouping: {}'.format(grouping)) LOGGER.debug('mosaic: {}'.format(mosaic)) LOGGER.debug('indices: {}'.format(indices)) LOGGER.debug('region: {}'.format(region)) LOGGER.debug('Nr of input files: {}'.format(len(resources))) except Exception as ex: LOGGER.exception('failed to read in the arguments: {}'.format(str(ex))) response.update_status( 'starting: indices={}, grouping={}, num_files={}'.format(indices, grouping, len(resources)), 2) results = [] from flyingpigeon.utils import sort_by_filename datasets = sort_by_filename(resources, historical_concatination=True) results = [] try: group = grouping[0] # for group in grouping: indice = indices[0] # for indice in indices: for key in datasets.keys(): try: response.update_status('Dataset {}: {}'.format(len(results) + 1, key), 10) LOGGER.debug('grouping: {}'.format(grouping)) LOGGER.debug('mosaic: {}'.format(mosaic)) LOGGER.debug('indice: {}'.format(indice)) LOGGER.debug('region: {}'.format(region)) LOGGER.debug('Nr of input files: {}'.format(len(datasets[key]))) result = calc_indice_simple( resource=datasets[key], mosaic=mosaic, indice=indice, polygons=region, grouping=group, # dir_output=path.curdir, ) LOGGER.debug('result: {}'.format(result)) results.extend(result) except Exception as ex: msg = 'failed for {}: {}'.format(key, str(ex)) LOGGER.exception(msg) raise Exception(msg) except Exception as ex: msg = 'Failed to calculate indices: {}'.format(str(ex)) LOGGER.exception(msg) raise Exception(msg) # # if not results: # # raise Exception("failed to produce results") # # response.update_status('num results %s' % len(results), 90) tarf = archive(results) response.outputs['output_archive'].file = tarf i = next((i for i, x in enumerate(results) if x), None) if i is None: i = 'dummy.nc' response.outputs['ncout'].file = results[i] # response.update_status("done", 100) return response
def calc_indice_simple(resource=[], variable=None, prefix=None, indices=None, polygons=None, mosaic=False, groupings='yr', dir_output=None, dimension_map=None, memory_limit=None): """ Calculates given simple indices for suitable files in the appropriate time grouping and polygon. :param resource: list of filenames in data reference syntax (DRS) convention (netcdf) :param variable: variable name to be selected in the in netcdf file (default=None) :param indices: list of indices (default ='SU') :param polygons: list of polgons (default ='FRA') :param grouping: indices time aggregation (default='yr') :param out_dir: output directory for result file (netcdf) :param dimension_map: optional dimension map if different to standard (default=None) :return: list of netcdf files with calculated indices. Files are saved into out_dir. """ from os.path import join, dirname, exists from flyingpigeon import ocgis_module from flyingpigeon.subset import clipping import uuid #DIR_SHP = config.shapefiles_dir() #env.DIR_SHPCABINET = DIR_SHP #env.OVERWRITE = True if type(resource) != list: resource = list([resource]) if type(indices) != list: indices = list([indices]) if type(polygons) != list and polygons != None: polygons = list([polygons]) if type(groupings) != list: groupings = list([groupings]) if dir_output != None: if not exists(dir_output): makedirs(dir_output) #from flyingpigeon.subset import select_ugid # tile_dim = 25 output = None experiments = sort_by_filename(resource) outputs = [] for key in experiments: if variable == None: variable = get_variable(experiments[key][0]) #variable = key.split('_')[0] try: if variable == 'pr': calc = 'pr=pr*86400' ncs = ocgis_module.call( resource=experiments[key], variable=variable, dimension_map=dimension_map, calc=calc, memory_limit=memory_limit, #alc_grouping= calc_group, prefix=str(uuid.uuid4()), dir_output=dir_output, output_format='nc') else: ncs = experiments[key] for indice in indices: logger.info('indice: %s' % indice) try: calc = [{'func': 'icclim_' + indice, 'name': indice}] logger.info('calc: %s' % calc) for grouping in groupings: logger.info('grouping: %s' % grouping) try: calc_group = calc_grouping(grouping) logger.info('calc_group: %s' % calc_group) if polygons == None: try: prefix = key.replace(variable, indice).replace( '_day_', '_%s_' % grouping) tmp = ocgis_module.call( resource=ncs, variable=variable, dimension_map=dimension_map, calc=calc, calc_grouping=calc_group, prefix=prefix, dir_output=dir_output, output_format='nc') outputs.append(tmp) except Exception as e: msg = 'could not calc indice %s for domain in %s' % ( indice, key) logger.debug(msg) # raise Exception(msg) else: try: prefix = key.replace(variable, indice).replace( '_day_', '_%s_' % grouping) tmp = clipping(resource=ncs, variable=variable, dimension_map=dimension_map, calc=calc, calc_grouping=calc_group, prefix=prefix, polygons=polygons, mosaic=mosaic, dir_output=dir_output, output_format='nc') outputs.append(tmp) except Exception as e: msg = 'could not calc indice %s for domain in %s' % ( indice, key) logger.debug(msg) # raise Exception(msg) logger.info('indice file calculated: %s' % tmp) except Exception as e: msg = 'could not calc indice %s for key %s and grouping %s' % ( indice, key, grouping) logger.debug(msg) # raise Exception(msg) except Exception as e: msg = 'could not calc indice %s for key %s' % (indice, key) logger.debug(msg) # raise Exception(msg) except Exception as e: msg = 'could not calc key %s' % key logger.debug(msg) # raise Exception(msg) logger.info('indice outputs %s ' % outputs) return outputs
def method_A(resource=[], start=None, end=None, timeslice=20, variable=None, title=None, cmap='seismic' ): """returns the result :param resource: list of paths to netCDF files :param start: beginning of reference period (if None (default), the first year of the consistent ensemble will be detected) :param end: end of comparison period (if None (default), the last year of the consistent ensemble will be detected) :param timeslice: period length for mean calculation of reference and comparison period :param variable: variable name to be detected in the netCDF file. If not set (not recommended), the variable name will be detected :param title: str to be used as title for the signal mal :param cmap: define the color scheme for signal map plotting :return: signal.nc, low_agreement_mask.nc, high_agreement_mask.nc, graphic.png, text.txt """ from os.path import split from cdo import Cdo cdo = Cdo() cdo.forceOutput = True try: # preparing the resource # from flyingpigeon.ocgis_module import call file_dic = sort_by_filename(resource, historical_concatination = True) #print file_dic logger.info('file names sorted experimets: %s' % len(file_dic.keys())) except Exception as e: msg = 'failed to sort the input files' logger.exception(msg) raise Exception(msg) try: mergefiles = [] for key in file_dic.keys(): if type(file_dic[key]) == list and len(file_dic[key]) > 1: input = [] for i in file_dic[key]: print i input.extend([i.replace(' ','\\\ ')]) mergefiles.append(cdo.mergetime(input=input, output=key+'_mergetime.nc')) else: mergefiles.extend(file_dic[key]) # files.append(cdo.selyear('%s/%s' % (start1,end2), input = tmpfile , output = key+'.nc' )) #python version logger.info('datasets merged %s ' % mergefiles) except Exception as e: msg = 'seltime and mergetime failed %s' % e logger.exception(msg) raise Exception(e) try: text_src = open('infiles.txt', 'a') for key in file_dic.keys(): text_src.write(key + '\n') text_src.close() except Exception as e: msg = 'failed to write source textfile' logger.exception(msg) raise Exception(msg) # configure reference and compare period try: if start == None: st_set = set() en_set = set() for f in mergefiles: print f times = get_time(f) st_set.update([times[0].year]) if end == None: en_set.update([times[-1].year]) start = max(st_set) if end == None: end = min(en_set) logger.info('Start and End: %s - %s ' % (start, end)) if start >= end: logger.error('ensemble is inconsistent!!! start year is later than end year') except Exception as e: msg = 'failed to detect start and end times of the ensemble' logger.exception(msg) raise Exception(msg) # set the periodes: try: start = int(start) end = int(end) if timeslice == None: timeslice = int((end - start) / 3) if timeslice == 0: timeslice = 1 else: timeslice = int(timeslice) start1 = start start2 = start1 + timeslice - 1 end1 = end - timeslice + 1 end2 = end logger.info('timeslice and periodes set') except Exception as e: msg = 'failed to set the periodes' logger.exception(msg) raise Exception(msg) try: files = [] for i, mf in enumerate(mergefiles): files.append(cdo.selyear('{0}/{1}'.format(start1,end2), input=[mf.replace(' ','\ ')] , output='file_{0}_.nc'.format(i) )) #python version logger.info('timeseries selected from defined start to end year') except Exception as e: msg = 'seltime and mergetime failed' logger.exception(msg) raise Exception(msg) try: # ensemble mean nc_ensmean = cdo.ensmean(input=files , output='nc_ensmean.nc') logger.info('ensemble mean calculation done') except Exception as e: msg = 'ensemble mean failed' logger.exception(msg) raise Exception(msg) try: # ensemble std nc_ensstd = cdo.ensstd(input=files , output='nc_ensstd.nc') logger.info('ensemble std and calculation done') except Exception as e: msg = 'ensemble std or failed' logger.exception(msg) raise Exception(msg) # get the get the signal as difference from the beginning (first years) and end period (last years), : try: selyearstart = cdo.selyear('%s/%s' % (start1,start2), input = nc_ensmean, output = 'selyearstart.nc' ) selyearend = cdo.selyear('%s/%s' % (end1,end2), input = nc_ensmean, output = 'selyearend.nc' ) meanyearst = cdo.timmean(input = selyearstart, output= 'meanyearst.nc') meanyearend = cdo.timmean(input = selyearend, output= 'meanyearend.nc') signal = cdo.sub(input=[meanyearend, meanyearst], output = 'signal.nc') logger.info('Signal calculation done') except Exception as e: msg = 'calculation of signal failed' logger.exception(msg) raise Exception(msg) # get the intermodel standard deviation (mean over whole period) try: #std_selyear = cdo.selyear('%s/%s' % (end1,end2), input=nc_ensstd, output='std_selyear.nc') #std = cdo.timmean(input = std_selyear, output = 'std.nc') std = cdo.timmean(input = nc_ensstd, output = 'std.nc') std2 = cdo.mulc('2', input = std, output = 'std2.nc') logger.info('calculation of internal model std for time period done') except Exception as e: msg = 'calculation of internal model std failed' logger.exception(msg) raise Exception(msg) try: absolut = cdo.abs(input=signal, output='absolut_signal.nc') high_agreement_mask = cdo.gt(input=[absolut,std2], output= 'large_change_with_high_model_agreement.nc') low_agreement_mask = cdo.lt(input=[absolut,std], output= 'small_signal_or_low_agreement_of_models.nc') logger.info('high and low mask done') except Exception as e: msg = 'calculation of robustness mask failed' logger.exception(msg) raise Exception(msg) try: if variable == None: variable = get_variable(signal) logger.info('variable to be plotted: %s' % variable) if title == None: title='Change of %s (difference of mean %s-%s to %s-%s)' % (variable, end1, end2, start1, start2) graphic = None graphic = map_ensembleRobustness(signal, high_agreement_mask, low_agreement_mask, variable=variable, cmap=cmap, title = title) logger.info('graphic generated') except Exception as e: msg('graphic generation failed: %s' % e) logger.debug(msg) raise Exception(msg) return signal, low_agreement_mask, high_agreement_mask, graphic, text_src #
def signal_noise_ratio(resource=[], start=None, end=None, timeslice=20, variable=None, title=None, cmap='seismic'): """returns the result :param resource: list of paths to netCDF files :param start: beginning of reference period (if None (default), the first year of the consistent ensemble will be detected) :param end: end of comparison period (if None (default), the last year of the consistent ensemble will be detected) :param timeslice: period length for mean calculation of reference and comparison period :param variable: OBSOLETE :param title: str to be used as title for the signal mal :param cmap: define the color scheme for signal map plotting :return: signal.nc, low_agreement_mask.nc, high_agreement_mask.nc, text.txt, # graphic.png, """ from os.path import split from tempfile import mkstemp from cdo import Cdo cdo = Cdo() cdo.forceOutput = True # preparing the resource try: file_dic = sort_by_filename(resource, historical_concatination=True) LOGGER.info('file names sorted experimets: %s' % len(file_dic.keys())) except: msg = 'failed to sort the input files' LOGGER.exception(msg) # check that all datasets contains the same variable try: var_name = set() for key in file_dic.keys(): var_name = var_name.union([get_variable(file_dic[key])]) LOGGER.debug(var_name) except: LOGGER.exception('failed to get the variable in common') if len(var_name) == 1: variable = [str(n) for n in var_name][0] LOGGER.info('varible %s detected in all members of the ensemble' % variable) else: raise Exception( 'none or more than one variables are found in the ensemble members' ) # TODO: drop missfitting grids # timemerge for seperate datasets try: mergefiles = [] for key in file_dic.keys(): # if variable is None: # variable = get_variable(file_dic[key]) # LOGGER.info('variable detected %s ' % variable) try: if type(file_dic[key]) == list and len(file_dic[key]) > 1: _, nc_merge = mkstemp(dir='.', suffix='.nc') mergefiles.append( cdo.mergetime(input=file_dic[key], output=nc_merge)) else: mergefiles.extend(file_dic[key]) except: LOGGER.exception('failed to merge files for %s ' % key) LOGGER.info('datasets merged %s ' % mergefiles) except: msg = 'seltime and mergetime failed' LOGGER.exception(msg) # verify the calendar # find the most common calendar cals = [] n = 0 for nc in mergefiles: cal, util = get_calendar(nc) cals.append(cal) for cal in cals: m = cals.count(cal) if m > n: calendar = cal for c, nc in enumerate(mergefiles): cal, unit = get_calendar(nc) print 'calendar detected: %s most common: %s' % (cal, calendar) if cal != calendar: print 'calendar changed for %s to %s' % (cal, calendar) _, nc_cal = mkstemp(dir='.', suffix='.nc') nc_out = cdo.setcalendar('{0}'.format(calendar), input=nc, output=nc_cal) mergefiles[c] = nc_cal LOGGER.debug('calendar changed for %s' % nc) else: LOGGER.debug('calendar was %s' % cal) # dataset documentation try: text_src = open('infiles.txt', 'a') for key in file_dic.keys(): text_src.write(key + '\n') text_src.close() except: msg = 'failed to write source textfile' LOGGER.exception(msg) _, text_src = mkstemp(dir='.', suffix='.txt') # evaluation # configure reference and compare period st = set() en = set() for key in file_dic.keys(): # TODO: convert 360day calendar s, e = get_timerange(file_dic[key]) st.update([s]) en.update([e]) if start is None: start = list(st)[-1] else: if start < list(st)[-1]: start = list(st)[-1] LOGGER.debug( 'start was befor the first common timestep, set start to the first common timestep' ) if end is None: end = list(en)[0] else: if end > list(en)[0]: end = list(en)[0] LOGGER.debug( 'end was after the last common timestepp, set end to last common timestep ' ) from datetime import datetime as dt from datetime import timedelta start = dt.strptime(start, '%Y%M%d') end = dt.strptime(end, '%Y%M%d') length = end - start # set the periodes: try: if timeslice is None: td = lenth / 3 else: td = timedelta(days=timeslice) if td > length: td = lenth / 3 LOGGER.debug( 'timeslice is larger as whole timeseries! set timeslice to third of timeseries' ) start_td = start + td end_td = end - td LOGGER.info('timeslice and periodes set') except: msg = 'failed to set the periodes' LOGGER.exception(msg) try: files = [] for i, mf in enumerate(mergefiles): files.append( cdo.selyear('{0}/{1}'.format(start.year, end.year), input=[mf.replace(' ', '\ ')], output='file_{0}_.nc'.format(i))) # python version LOGGER.info('timeseries selected from defined start to end year') except: msg = 'seltime and mergetime failed' LOGGER.exception(msg) try: # ensemble mean nc_ensmean = cdo.ensmean(input=files, output='nc_ensmean.nc') LOGGER.info('ensemble mean calculation done') except: msg = 'ensemble mean failed' LOGGER.exception(msg) try: # ensemble std nc_ensstd = cdo.ensstd(input=files, output='nc_ensstd.nc') LOGGER.info('ensemble std and calculation done') except: msg = 'ensemble std or failed' LOGGER.exception(msg) # get the get the signal as difference from the beginning (first years) and end period (last years), : try: selyearstart = cdo.selyear('%s/%s' % (start.year, start_td.year), input=nc_ensmean, output='selyearstart.nc') selyearend = cdo.selyear('%s/%s' % (end_td.year, end.year), input=nc_ensmean, output='selyearend.nc') meanyearst = cdo.timmean(input=selyearstart, output='meanyearst.nc') meanyearend = cdo.timmean(input=selyearend, output='meanyearend.nc') signal = cdo.sub(input=[meanyearend, meanyearst], output='signal.nc') LOGGER.info('Signal calculation done') except: msg = 'calculation of signal failed' LOGGER.exception(msg) _, signal = mkstemp(dir='.', suffix='.nc') # get the intermodel standard deviation (mean over whole period) try: # std_selyear = cdo.selyear('%s/%s' % (end1,end2), input=nc_ensstd, output='std_selyear.nc') # std = cdo.timmean(input = std_selyear, output = 'std.nc') std = cdo.timmean(input=nc_ensstd, output='std.nc') std2 = cdo.mulc('2', input=std, output='std2.nc') LOGGER.info('calculation of internal model std for time period done') except: msg = 'calculation of internal model std failed' LOGGER.exception(msg) try: # absolut = cdo.abs(input=signal, output='absolut_signal.nc') # don't get the sence of this step :-) high_agreement_mask = cdo.gt(input=[signal, std2], output='signal_larger_than_noise.nc') low_agreement_mask = cdo.lt(input=[signal, std], output='signal_smaller_than_noise.nc') LOGGER.info('high and low mask done') except: msg = 'calculation of robustness mask failed' LOGGER.exception(msg) _, high_agreement_mask = mkstemp(dir='.', suffix='.nc') _, low_agreement_mask = mkstemp(dir='.', suffix='.nc') return signal, low_agreement_mask, high_agreement_mask, text_src
def calc_indice_simple(resource=[], variable=None, prefix=None,indices=None, polygons=None, mosaik = False, groupings='yr', dir_output=None, dimension_map = None, memory_limit=None): """ Calculates given simple indices for suitable files in the appopriate time grouping and polygon. :param resource: list of filenames in drs convention (netcdf) :param variable: variable name to be selected in the in netcdf file (default=None) :param indices: list of indices (default ='SU') :param polygons: list of polgons (default ='FRA') :param grouping: indices time aggregation (default='yr') :param out_dir: output directory for result file (netcdf) :param dimension_map: optional dimension map if different to standard (default=None) :return: list of netcdf files with calculated indices. Files are saved into out_dir """ from os.path import join, dirname, exists from flyingpigeon import ocgis_module from flyingpigeon.subset import clipping import uuid #DIR_SHP = config.shapefiles_dir() #env.DIR_SHPCABINET = DIR_SHP #env.OVERWRITE = True if type(resource) != list: resource = list([resource]) if type(indices) != list: indices = list([indices]) if type(polygons) != list and polygons != None: polygons = list([polygons]) if type(groupings) != list: groupings = list([groupings]) if dir_output != None: if not exists(dir_output): makedirs(dir_output) #from flyingpigeon.subset import select_ugid # tile_dim = 25 output = None experiments = sort_by_filename(resource) outputs = [] for key in experiments: if variable == None: variable = get_variable(experiments[key][0]) #variable = key.split('_')[0] try: if variable == 'pr': calc = 'pr=pr*86400' ncs = ocgis_module.call(resource=experiments[key], variable=variable, dimension_map=dimension_map, calc=calc, memory_limit=memory_limit, #alc_grouping= calc_group, prefix=str(uuid.uuid4()), dir_output=dir_output, output_format='nc') else: ncs = experiments[key] for indice in indices: logger.info('indice: %s' % indice) try: calc = [{'func' : 'icclim_' + indice, 'name' : indice}] logger.info('calc: %s' % calc) for grouping in groupings: logger.info('grouping: %s' % grouping) try: calc_group = calc_grouping(grouping) logger.info('calc_group: %s' % calc_group) if polygons == None: try: if prefix == None: prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ) tmp = ocgis_module.call(resource=ncs, variable=variable, dimension_map=dimension_map, calc=calc, calc_grouping= calc_group, prefix=prefix, dir_output=dir_output, output_format='nc') outputs.extend( [tmp] ) except Exception as e: msg = 'could not calc indice %s for domain in %s' %( indice, key) logger.exception( msg ) raise Exception(msg) else: try: if prefix == None: prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ) tmp = clipping(resource=ncs, variable=variable, dimension_map=dimension_map, calc=calc, calc_grouping= calc_group, prefix=prefix, polygons=polygons, mosaik=mosaik, dir_output=dir_output, output_format='nc') outputs.extend( [tmp] ) except Exception as e: msg = 'could not calc indice %s for domain in %s' %( indice, key) logger.exception( msg ) raise Exception(msg) logger.info('indice file calculated') except Exception as e: msg = 'could not calc indice %s for key %s and grouping %s' % (indice, key, grouping) logger.exception(msg) raise Exception(msg) except Exception as e: msg = 'could not calc indice %s for key %s' % ( indice, key) logger.exception(msg) raise Exception(msg) except Exception as e: msg = 'could not calc key %s' % key logger.exception(msg) raise Exception(msg) return outputs
def method_A(resource=[], start=None, end=None, timeslice=20, variable=None, title=None, cmap='seismic'): """returns the result :param resource: list of paths to netCDF files :param start: beginning of reference period (if None (default), the first year of the consistent ensemble will be detected) :param end: end of comparison period (if None (default), the last year of the consistent ensemble will be detected) :param timeslice: period length for mean calculation of reference and comparison period :param variable: variable name to be detected in the netCDF file. If not set (not recommended), the variable name will be detected :param title: str to be used as title for the signal mal :param cmap: define the color scheme for signal map plotting :return: signal.nc, low_agreement_mask.nc, high_agreement_mask.nc, graphic.png, text.txt """ from os.path import split from cdo import Cdo cdo = Cdo() cdo.forceOutput = True try: # preparing the resource file_dic = sort_by_filename(resource, historical_concatination=True) logger.info('file names sorted experimets: %s' % len(file_dic.keys())) except Exception as e: msg = 'failed to sort the input files' logger.exception(msg) raise Exception(msg) try: mergefiles = [] for key in file_dic.keys(): if type(file_dic[key]) == list and len(file_dic[key]) > 1: input = [] for i in file_dic[key]: input.extend([i.replace(' ', '\\\ ')]) mergefiles.append( cdo.mergetime(input=input, output=key + '_mergetime.nc')) else: mergefiles.extend(file_dic[key]) logger.info('datasets merged %s ' % mergefiles) except Exception as e: msg = 'seltime and mergetime failed %s' % e logger.exception(msg) raise Exception(e) try: text_src = open('infiles.txt', 'a') for key in file_dic.keys(): text_src.write(key + '\n') text_src.close() except Exception as e: msg = 'failed to write source textfile' logger.exception(msg) raise Exception(msg) # configure reference and compare period try: if start is None: st_set = set() en_set = set() for f in mergefiles: times = get_time(f) st_set.update([times[0].year]) if end is None: en_set.update([times[-1].year]) start = max(st_set) if end is None: end = min(en_set) logger.info('Start and End: %s - %s ' % (start, end)) if start >= end: logger.error( 'ensemble is inconsistent!!! start year is later than end year' ) except Exception as e: msg = 'failed to detect start and end times of the ensemble' logger.exception(msg) raise Exception(msg) # set the periodes: try: start = int(start) end = int(end) if timeslice is None: timeslice = int((end - start) / 3) if timeslice == 0: timeslice = 1 else: timeslice = int(timeslice) start1 = start start2 = start1 + timeslice - 1 end1 = end - timeslice + 1 end2 = end logger.info('timeslice and periodes set') except Exception as e: msg = 'failed to set the periodes' logger.exception(msg) raise Exception(msg) try: files = [] for i, mf in enumerate(mergefiles): files.append( cdo.selyear('{0}/{1}'.format(start1, end2), input=[mf.replace(' ', '\ ')], output='file_{0}_.nc'.format(i))) # python version logger.info('timeseries selected from defined start to end year') except Exception as e: msg = 'seltime and mergetime failed' logger.exception(msg) raise Exception(msg) try: # ensemble mean nc_ensmean = cdo.ensmean(input=files, output='nc_ensmean.nc') logger.info('ensemble mean calculation done') except Exception as e: msg = 'ensemble mean failed' logger.exception(msg) raise Exception(msg) try: # ensemble std nc_ensstd = cdo.ensstd(input=files, output='nc_ensstd.nc') logger.info('ensemble std and calculation done') except Exception as e: msg = 'ensemble std or failed' logger.exception(msg) raise Exception(msg) # get the get the signal as difference from the beginning (first years) and end period (last years), : try: selyearstart = cdo.selyear('%s/%s' % (start1, start2), input=nc_ensmean, output='selyearstart.nc') selyearend = cdo.selyear('%s/%s' % (end1, end2), input=nc_ensmean, output='selyearend.nc') meanyearst = cdo.timmean(input=selyearstart, output='meanyearst.nc') meanyearend = cdo.timmean(input=selyearend, output='meanyearend.nc') signal = cdo.sub(input=[meanyearend, meanyearst], output='signal.nc') logger.info('Signal calculation done') except Exception as e: msg = 'calculation of signal failed' logger.exception(msg) raise Exception(msg) # get the intermodel standard deviation (mean over whole period) try: # std_selyear = cdo.selyear('%s/%s' % (end1,end2), input=nc_ensstd, output='std_selyear.nc') # std = cdo.timmean(input = std_selyear, output = 'std.nc') std = cdo.timmean(input=nc_ensstd, output='std.nc') std2 = cdo.mulc('2', input=std, output='std2.nc') logger.info('calculation of internal model std for time period done') except Exception as e: msg = 'calculation of internal model std failed' logger.exception(msg) raise Exception(msg) try: absolut = cdo.abs(input=signal, output='absolut_signal.nc') high_agreement_mask = cdo.gt( input=[absolut, std2], output='large_change_with_high_model_agreement.nc') low_agreement_mask = cdo.lt( input=[absolut, std], output='small_signal_or_low_agreement_of_models.nc') logger.info('high and low mask done') except Exception as e: msg = 'calculation of robustness mask failed' logger.exception(msg) raise Exception(msg) try: if variable is None: variable = get_variable(signal) logger.info('variable to be plotted: %s' % variable) if title is None: title = 'Change of %s (difference of mean %s-%s to %s-%s)' % ( variable, end1, end2, start1, start2) graphic = None graphic = map_ensembleRobustness(signal, high_agreement_mask, low_agreement_mask, variable=variable, cmap=cmap, title=title) logger.info('graphic generated') except Exception as e: msg('graphic generation failed: %s' % e) logger.debug(msg) raise Exception(msg) return signal, low_agreement_mask, high_agreement_mask, graphic, text_src #
data[str(indice)] = ro.FloatVector(ravel(vals)) dataf = ro.DataFrame(data) predict_gam = mgcv.predict_gam(gam_model, newdata=dataf, type="response", progress="text", newdata_guaranteed=True, na_action=stats.na_pass) prediction = array(predict_gam).reshape(dims) return prediction p = "/home/nils/data/AFR-44/tas/" ncs = [path.join(p, nc) for nc in listdir(p)] ncd = utils.sort_by_filename(ncs) geom = subset.get_geom('CMR') ugid = subset.get_ugid('CMR', geom=geom) # from ocgis import RequestDataset, OcgOperations keys = ncd.keys() print len(keys) ocgis.env.OVERWRITE = True dmap = ocgis.DimensionMap() dmap.set_variable('x', 'lon', dimension='rlon') dmap.set_variable('y', 'lat', dimension='rlat') dmap.set_variable('time', 'time', dimension='time')
def clipping(resource=[], variable=None, dimension_map=None, calc=None, output_format='nc', calc_grouping= None, time_range=None, time_region=None, historical_concatination=True, prefix=None, spatial_wrapping='wrap', polygons=None, mosaik=False, dir_output=None, memory_limit=None): """ returns list of clipped netCDF files possible entries: :param resource: list of input netCDF files :param variable: variable (string) to be used in netCDF :param dimesion_map: specify a dimension map input netCDF has unconventional dimension :param calc: ocgis calculation argument :param calc_grouping: ocgis calculation grouping :param historical_concatination: concat files of RCPs with appropriate historical runs to one timeseries :param prefix: perfix for output file name :param polygons: list of polygons to be used. if more than 1 in the list, a appropriate mosaik will be clipped :param output_format: output_format (default='nc') :param dir_output: specify a output location """ from flyingpigeon.utils import get_variable, drs_filename from flyingpigeon.ocgis_module import call if type(resource) != list: resource = list([resource]) if type(polygons) != list: polygons = list([polygons]) if prefix != None: if type(prefix) != list: prefix = list([prefix]) geoms = set() ncs = sort_by_filename(resource, historical_concatination=historical_concatination) # historical_concatination=True geom_files = [] if mosaik == True : try: nameadd = '_' for polygon in polygons: geoms.add(get_geom(polygon)) nameadd = nameadd + '-' + polygon if len(geoms) > 1: logger.error('polygons belong to differnt shapefiles! mosaik option is not possible %s', geoms) else: geom = geoms.pop() ugids = get_ugid(polygons=polygons, geom=geom) except Exception as e: logger.debug('geom identification failed %s ' % e) for i, key in enumerate (ncs.keys()): try: if variable == None: variable = get_variable(ncs[key]) logger.info('variable %s detected in resource' % (variable)) if prefix == None: name = key + nameadd else: name = prefix[i] geom_file = call(resource=ncs[key], variable=variable, calc=calc, calc_grouping=calc_grouping, output_format=output_format, prefix=name, geom=geom, select_ugid=ugids, time_range=time_range, time_region=time_region, spatial_wrapping=spatial_wrapping, memory_limit=memory_limit, dir_output=dir_output, dimension_map=dimension_map) geom_files.append( geom_file ) except Exception as e: msg = 'ocgis calculations failed for %s ' % (key) logger.debug(msg) else: for i, polygon in enumerate(polygons): try: geom = get_geom(polygon) ugid = get_ugid(polygons=polygon, geom=geom) for key in ncs.keys(): try: if variable == None: variable = get_variable(ncs[key]) logger.info('variable %s detected in resource' % (variable)) if prefix == None: name = key + '_' + polygon else: name = prefix[i] geom_file = call(resource=ncs[key], variable=variable, calc=calc, calc_grouping=calc_grouping,output_format=output_format, prefix=name, geom=geom, select_ugid=ugid, dir_output=dir_output, dimension_map=dimension_map, spatial_wrapping=spatial_wrapping, memory_limit=memory_limit,time_range=time_range, time_region=time_region, ) geom_files.append( geom_file ) except Exception as e: msg = 'ocgis calculations failed for %s ' % (key) logger.debug(msg) raise except Exception as e: logger.debug('geom identification failed') raise return geom_files
}).execute() print ops from flyingpigeon.utils import sort_by_filename from flyingpigeon.ocgis_module import call # # kwds = {'percentile': percentile, 'window_width': 5} # calc = [{'func': 'daily_perc', 'name': 'dp', 'kwds': kwds}] # # # ops = OcgOperations(dataset=rd, calc=calc, # output_format='nc', # time_region={'year': [1980, 1990]} # ).execute() datasets = sort_by_filename(resource, historical_concatination=True) results = [] print datasets.keys() for key in datasets.keys(): result = call( resource=datasets[key], output_format='nc', calc=calc, prefix='call_', # time_region={'year': [1995, 2000]} # calc_grouping='year' ) results.extend(result) print result