def calc_flood_alert(self, forecast_filename=None, forecast_dir=None, forecast_pattern=None, threshold_filename=None, threshold_dir=None, threshold_pattern=None, dst_filename=None, dst_dir=None, dst_pattern=None, num_years=None, output_value=None): logger.info('entering calc_flood_alert') if forecast_filename is None: # get filenames from pattern and directory _forecast_filenames = directory_utils.get_matching_files( forecast_dir, forecast_pattern) if _forecast_filenames is None: raise ValueError( 'Cannot find matching forecast files in directory') else: _forecast_filenames = forecast_filename if threshold_filename is None: # get filename from pattern and directory files_list = directory_utils.get_matching_files( threshold_dir, threshold_pattern) try: _threshold_filename = files_list[0] except IndexError, e: raise ValueError( 'Cannot find matching flood threshold file in directory')
def mask_by_shapefile(self, raster_file, raster_dir, raster_pattern, polygon_file, polygon_dir, polygon_pattern, output_file, output_dir, output_pattern, nodata=False): if raster_file is None: _file_list = directory_utils.get_matching_files( raster_dir, raster_pattern) if _file_list: _raster_file = _file_list[0] else: raise ValueError, "No matching raster file found." else: _raster_file = raster_file if polygon_file is None: _file_list = directory_utils.get_matching_files( polygon_dir, polygon_pattern) if _file_list is not None: _polygon_file = _file_list[0] else: raise ValueError, "No matching polygon file or polygon dir/pattern found." else: _polygon_file = polygon_file if output_file is None: if output_dir is None: raise ValueError, "No output directory provided." if output_pattern is None: raise ValueError, "No output pattern provided." _output_dir = output_dir _output_file = os.path.join( _output_dir, filename_utils.generate_output_filename( os.path.basename(_raster_file), raster_pattern, output_pattern, False)) else: _output_file = output_file _gdal_path = self.vp.get('directories', 'gdal_dir') raster_utils.mask_by_shapefile(raster_file=_raster_file, polygon_file=_polygon_file, output_file=_output_file, gdal_path=_gdal_path, nodata=nodata) return None
def mask_by_raster(self, raster_file, raster_dir, raster_pattern, boundary_raster, boundary_raster_dir, boundary_raster_pattern, output_file, output_dir, output_pattern, nodata=False): if raster_file is None: _file_list = directory_utils.get_matching_files( raster_dir, raster_pattern) if _file_list: _raster_file = _file_list[0] else: raise ValueError, "No matching raster file found." else: _raster_file = raster_file if boundary_raster is None: _file_list = directory_utils.get_matching_files( boundary_raster_dir, boundary_raster_pattern) if _file_list is not None: _boundary_raster = _file_list[0] else: raise ValueError, "No matching raster boundary file or boundary dir/pattern found." else: _boundary_raster = boundary_raster if output_file is None: if output_dir is None: raise ValueError, "No output directory provided." if output_pattern is None: raise ValueError, "No output pattern provided." _output_dir = output_dir _output_file = os.path.join( _output_dir, filename_utils.generate_output_filename( os.path.basename(_raster_file), raster_pattern, output_pattern, False)) else: _output_file = output_file raster_utils.mask_by_raster(raster_file=_raster_file, mask_file=_boundary_raster, output_file=_output_file) return None
class PublishDSLRAreaImpactProduct(PublishableTabularProduct): """ Initialise MODISDownloadTask object. Implementation class for downloading MODIS products. """ def __init__(self, params, vampire_defaults): logger.debug('Initialising Area Impact product.') super(PublishDSLRAreaImpactProduct, self).__init__(params, vampire_defaults) self.params = params self.vp = vampire_defaults self.product_dir = self.vp.get('hazard_impact', 'dslr_output_dir') self.product_date = self.params[ 'start_date'] #datetime.datetime.strptime(self.params['start_date'], '%d/%m/%Y') self.valid_from_date = self.params['start_date'] self.valid_to_date = self.params['end_date'] self.database = self.vp.get('database', 'impact_db') self.table_name = self.vp.get('database', 'dslr_impact_area_table') try: self.schema = self.vp.get('database', 'dslr_impact_area_schema') except Exception, e: self.schema = self.vp.get('database', 'default_schema') _product_pattern = self.vp.get('hazard_impact', 'dslr_area_pattern') _product_pattern = _product_pattern.replace( '(?P<year>\d{4}).(?P<month>\d{2}).(?P<day>\d{2})', '{0}'.format(self.product_date.strftime('%Y.%m.%d'))) _product_files = directory_utils.get_matching_files( self.product_dir, _product_pattern) self.product_filename = _product_files[0] self.product_name = 'dslr_impact_area' self.destination_filename = self.product_filename self.ingestion_date = self.product_date #self.valid_from_date return
def __init__(self, params, vampire_defaults): logger.debug('Initialising MODIS download task') super(PublishSPIProduct, self).__init__(params, vampire_defaults) self.product_dir = self.vp.get('CHIRPS_SPI', 'output_dir') self.product_date = self.params[ 'start_date'] #datetime.datetime.strptime(self.params['start_date'], '%d/%m/%Y') self.valid_from_date = self.params['start_date'] self.valid_to_date = self.params['end_date'] self.summary = '{0} {1}'.format( self.vp.get('CHIRPS_SPI', 'default_interval'.capitalize()), self.vp.get('CHIRPS_SPI', 'summary')) self.tags = '{0}, {1}'.format( self.vp.get('CHIRPS_SPI', 'tags'), self.vp.get_country(self.vp.get('vampire_tmp', 'home_country'))) self.template_file = self.vp.get('CHIRPS_SPI', 'template_file') _product_pattern = self.params['input_pattern'] _product_files = directory_utils.get_matching_files( self.params['input_dir'], _product_pattern) self.product_filename = _product_files[0] # self.product_filename = 'lka_phy_MOD13Q1.%s.250m_16_days_EVI_EVI_VCI_VHI.tif' % self.product_date.strftime('%Y.%m.%d') self.product_name = 'spi' self.publish_name = None if 'publish_name' in self.params: self.publish_name = self.params['publish_name'] # if self.product_date.day < 11: # _dekad = 1 # elif self.product_date.day < 21: # _dekad = 2 # else: # _dekad = 3 # self.date_string = '{0}.{1}'.format(self.product_date.strftime('%Y.%m'), _dekad) # self.product_filename = 'lka_cli_chirps-v2.0.%s.spi.tif' % self.date_string # self.product_name = 'spi' # if using geoserver, need to modify destination filename so if can parse the date # ie. to have no full stops and be in the format YYYYmmdd self.destination_filename = self.product_filename if self.vp.get('vampire', 'gis_server').lower() == 'geoserver': self.destination_filename = os.path.basename(self.product_filename) # find date in destination filename and remove full stops regex = '' new_date = '{0}'.format(self.product_date.strftime('%Y%m%d')) if self.vp.get('CHIRPS_SPI', 'default_interval').lower() == 'monthly': regex = r'\d{4}.\d{2}' elif self.vp.get('CHIRPS_SPI', 'default_interval').lower() == 'seasonal': regex = r'\d{4}.\d{6}' elif self.vp.get('CHIRPS_SPI', 'default_interval').lower() == 'dekad': regex = r'\d{4}.\d{2}.\d{1}' self.destination_filename = re.sub(regex, new_date, self.destination_filename) # self.destination_filename = 'lka_cli_chirps-v2.0.%s.spi.tif' % self.product_date.strftime('%Y%m%d') self.ingestion_date = self.product_date #- datetime.timedelta(days=int(self.vp.get('CHIRPS_SPI', 'interval'))) return
def match_projection(self, master_file, master_dir, master_pattern, slave_file, slave_dir, slave_pattern, output_file, output_dir, output_pattern): if master_file is None: _file_list = directory_utils.get_matching_files( master_dir, master_pattern) if _file_list is not None: _master_file = _file_list[0] else: raise ValueError, "No matching master file found." else: _master_file = master_file if slave_file is None: _file_list = directory_utils.get_matching_files( slave_dir, slave_pattern) if _file_list is not None: _slave_file = _file_list[0] else: raise ValueError, "No matching slave file found." else: _slave_file = slave_file if output_file is not None: _output_file = output_file _output_dir = os.path.dirname(_output_file) else: if output_dir is None: raise ValueError, "No output directory provided." if output_pattern is None: raise ValueError, "No output pattern provided." _output_dir = output_dir _output_file = os.path.join( _output_dir, filename_utils.generate_output_filename( os.path.basename(_slave_file), slave_pattern, output_pattern, False)) if not os.path.isdir(_output_dir): # need to create output dir os.makedirs(_output_dir) raster_utils.reproject_image_to_master(_master_file, _slave_file, _output_file) return None
def _extract_subset(self, input_dir, output_dir, patterns, subset, subset_name, overwrite = False): logger.info('entering _extract_subset') _all_files = directory_utils.get_matching_files(input_dir, patterns[0]) if not _all_files: logger.debug('Extracting subset {0}. No files found in {1} with pattern {2}'.format( subset_name, input_dir, patterns[0])) logger.info('No files found in ' + input_dir + ', please check directory and try again') return -1 # check output directory exists and create it if not if not os.path.isdir(output_dir): os.makedirs(output_dir) new_files = [] _sr = osr.SpatialReference() _sr.ImportFromEPSG(4326) for _ifl in _all_files: # generate output file _nfl = filename_utils.generate_output_filename(os.path.basename(_ifl), patterns[0], patterns[1]) _ofl = os.path.join(output_dir, _nfl) if not os.path.exists(_ofl) or overwrite == True: try: gdal.SetConfigOption( 'CPL_DEBUG', 'ON' ) gdal.UseExceptions() _name_str = 'HDF5:"{0}"://{1}'.format(_ifl, subset_name) src_ds = gdal.Open(_name_str) _proj = src_ds.GetProjection() print _proj _geotransform = src_ds.GetGeoTransform() print _geotransform src_ds.SetProjection(_sr.ExportToWkt()) src_ds.SetGeoTransform([-180, 0.1, 0, -90, 0, 0.1]) print src_ds.RasterCount print src_ds.GetMetadata() t = src_ds.RasterXSize _band = src_ds.GetRasterBand(1) _data = _band.ReadAsArray().T stats = _band.GetStatistics( True, True ) if stats is None: continue logger.debug('[ STATS ] = Minimum=%.3f, Maximum=%.3f, Mean=%.3f, StdDev=%.3f' % (stats[0], stats[1], stats[2], stats[3])) ysize,xsize = _data.shape tif = gdal.GetDriverByName('GTiff').Create(_ofl, xsize, ysize, eType=gdal.GDT_Float32) tif.SetProjection(src_ds.GetProjection()) tif.SetGeoTransform(list(src_ds.GetGeoTransform())) band = tif.GetRasterBand(1) band.WriteArray(_data) band.FlushCache() band.SetNoDataValue(-9999.900390625) tif = None # closes file except RuntimeError, e: logger.debug('Unable to open file') return -1 if src_ds is None: logger.debug('Unable to open file {0}'.format(_ifl)) raise RuntimeError new_files.append(_ofl)
def calc_zonal_statistics(self, raster_file, raster_dir, raster_pattern, polygon_file, polygon_dir, polygon_pattern, zone_field, output_dir, output_file, output_pattern): if raster_file is None: _file_list = directory_utils.get_matching_files( raster_dir, raster_pattern) if _file_list is not None: _raster_file = _file_list[0] else: raise ValueError, "No matching raster file found." else: _raster_file = raster_file if polygon_file is None: _file_list = directory_utils.get_matching_files( polygon_dir, polygon_pattern) if _file_list is not None: _polygon_file = _file_list[0] else: raise ValueError, "No matching polygon file found." else: _polygon_file = polygon_file if output_file is None: if output_dir is None: raise ValueError, "No output directory provided." if output_pattern is None: raise ValueError, "No output pattern provided." _output_dir = output_dir _output_file = os.path.join( _output_dir, filename_utils.generate_output_filename( os.path.basename(_raster_file), raster_pattern, output_pattern, False)) else: _output_file = output_file calculate_statistics.calc_zonal_statistics(raster_file=_raster_file, polygon_file=_polygon_file, zone_field=zone_field, output_table=_output_file) return None
def calculate_impact_poverty(self, impact_file, impact_dir, impact_pattern, impact_field, impact_match_field, poor_file, poor_field, poor_multiplier, poor_match_field, output_file, output_dir, output_pattern, output_field, start_date, end_date): if impact_file is None: if impact_pattern is not None: _input_files = directory_utils.get_matching_files(impact_dir, impact_pattern) _impact_file = os.path.join(impact_dir, _input_files[0]) else: raise ValueError("Hazard raster is not specified") else: _impact_file = impact_file _country_name = self.vp.get_country_name(self.vp.get_home_country()) if impact_match_field is None: _impact_match_field = self.vp.get_country(_country_name)['crop_area_code'] else: _impact_match_field = impact_match_field if poor_match_field is None: _poor_match_field = self.vp.get_country(_country_name)['admin_3_boundary_area_code'] else: _poor_match_field = poor_match_field if output_file is None: if output_pattern is not None: _input_pattern = self.vp.get('hazard_impact', 'vhi_popn_pattern') _output_file = filename_utils.generate_output_filename(os.path.basename(_impact_file), _input_pattern, output_pattern) _output_file = os.path.join(output_dir, _output_file) if not os.path.exists(output_dir): os.makedirs(output_dir) else: raise ValueError("No output specified") else: _output_file = output_file if poor_multiplier is None: _multiplier = 0.01 else: _multiplier = poor_multiplier if output_field is None: _output_field = 'poor_aff' else: _output_field = output_field impact_analysis.calculate_poverty_impact(self, popn_impact_file=_impact_file, popn_impact_field=impact_field, popn_match_field=_impact_match_field, poor_file=poor_file, poor_field=poor_field, poor_match_field=_poor_match_field, multiplier=_multiplier, output_file=_output_file, output_field=_output_field, start_date=start_date, end_date=end_date) return None
def match_day_night_files(self, day_dir, night_dir, output_dir, patterns=None): logger.info('entering match_day_night_files') nightFiles = set(os.listdir(night_dir)) if patterns[0]: if not os.path.isdir(day_dir): dayFiles = directory_utils.get_matching_files( os.path.dirname(day_dir), patterns[0]) else: dayFiles = directory_utils.get_matching_files( day_dir, patterns[0]) else: dayFiles = list(os.listdir(day_dir)) print "Day files: ", dayFiles print "Night files: ", nightFiles if not os.path.exists(output_dir): os.makedirs(output_dir) for fl in dayFiles: # find matching night file d_fl, ext = os.path.splitext(os.path.basename( os.path.normpath(fl))) if (ext == '.tif'): d_t = d_fl.rpartition('.') # create regex pattern _pattern = re.compile('^{0}{1}LST_Night\w*.tif$'.format( d_t[0], d_t[1])) for n_fl in nightFiles: if _pattern.match(n_fl): avg_fl = os.path.join(output_dir, d_t[0] + d_t[1] + 'avg' + ext) dp = os.path.join(day_dir, d_fl + ext) np = os.path.join(night_dir, n_fl) calculate_statistics.calc_average_of_day_night( dp, np, avg_file=avg_fl) break logger.info('leaving match_day_night_files') return None
def calc_vhi(self, vci_filename=None, vci_dir=None, vci_pattern=None, tci_filename=None, tci_dir=None, tci_pattern=None, dst_filename=None, dst_dir=None, dst_pattern=None): logger.info('entering calc_vhi') if vci_filename is None: # get filename from pattern and directory files_list = directory_utils.get_matching_files(vci_dir, vci_pattern) try: _vci_filename = files_list[0] except IndexError, e: raise ValueError('Cannot find matching Vegetation Condition Index file in directory')
def calc_rainfall_anomaly(self, dst_filename=None, cur_filename=None, lta_filename=None, cur_dir=None, lta_dir=None, cur_pattern=None, lta_pattern=None, dst_pattern=None, dst_dir=None ): logger.info('entering calc_rainfall_anomaly') if cur_filename is None: # get filename from pattern and directory files_list = directory_utils.get_matching_files(cur_dir, cur_pattern) try: cur_filename = files_list[0] except IndexError, e: raise ValueError('Cannot find matching rainfall file in directory')
def process(self): logger.debug("Mosaic list of rasters") _input_dir = None _file_pattern = None _output_dir = None _output_pattern = None _mosaic_method = None if 'input_dir' in self.params: _input_dir = self.params['input_dir'] else: raise BaseTaskImpl.ConfigFileError( "No input directory 'input_dir' specified.", None) if 'file_pattern' in self.params: _file_pattern = self.params['file_pattern'] else: raise BaseTaskImpl.ConfigFileError( "No file pattern 'file_pattern' specified.", None) if 'output_dir' in self.params: _output_dir = self.params['output_dir'] else: raise BaseTaskImpl.ConfigFileError( "No output directory 'output_dir' specified.", None) if 'output_pattern' in self.params: _output_pattern = self.params['output_pattern'] else: raise BaseTaskImpl.ConfigFileError( "No output pattern 'output_pattern' specified.", None) if 'mosaic_method' in self.params: _mosaic_method = self.params['mosaic_method'] else: _mosaic_method = 'MAXIMUM' _file_list = directory_utils.get_matching_files( _input_dir, _file_pattern) if not _file_list: raise ValueError, "No matching raster file found." _base_name = os.path.basename(_file_list[0]) _output_file = filename_utils.generate_output_filename( _base_name, _file_pattern, _output_pattern, False) _file_str = ','.join(map(str, _file_list)) calculate_statistics.mosaic_rasters(_file_list, _output_dir, _output_file, _mosaic_method) return None
def __init__(self, params, vampire_defaults): logger.debug('Initialising MODIS download task') super(PublishFloodForecastProduct, self).__init__(params, vampire_defaults) self.product_dir = self.vp.get('FLOOD_FORECAST', 'product_dir') self.product_date = self.params[ 'start_date'] #datetime.datetime.strptime(self.params['start_date'], '%d/%m/%Y') self.valid_from_date = self.params['start_date'] self.valid_to_date = self.params['end_date'] self.summary = '{0} {1}'.format( self.vp.get('FLOOD_FORECAST', 'interval'.capitalize()), self.vp.get('FLOOD_FORECAST', 'summary')) self.tags = '{0}, {1}'.format( self.vp.get('FLOOD_FORECAST', 'tags'), self.vp.get_country_name(self.vp.get('vampire', 'home_country'))) self.template_file = self.vp.get('FLOOD_FORECAST', 'template_file') _product_pattern = self.params['input_pattern'] # _product_pattern = self.vp.get('FLOOD_FORECAST', 'flood_mosaic_pattern') # _product_pattern = _product_pattern.replace('(?P<year>\d{4}).(?P<month>\d{2}).(?P<day>\d{2})', '{0}'.format(self.product_date.strftime('%Y.%m.%d'))) # _product_files = directory_utils.get_matching_files(self.product_dir, _product_pattern) _product_files = directory_utils.get_matching_files( self.params['input_dir'], _product_pattern) self.product_filename = _product_files[0] # self.product_filename = 'lka_phy_MOD13Q1.%s.250m_16_days_EVI_EVI_VCI_VHI.tif' % self.product_date.strftime('%Y.%m.%d') self.product_name = 'flood_forecast' self.publish_name = None if 'publish_name' in self.params: self.publish_name = self.params['publish_name'] # if using geoserver, need to modify destination filename so if can parse the date # ie. to have no full stops and be in the format YYYYmmdd self.destination_filename = re.sub( '_fd\d{3}', '', os.path.basename(self.product_filename)) regex = r'\d{4}.\d{2}.\d{2}' new_date = None if self.vp.get('vampire', 'gis_server').lower() == 'geoserver': # find date in destination filename and remove full stops new_date = '{0}'.format(self.product_date.strftime('%Y%m%d')) else: # find date in destination filename and remove full stops new_date = '{0}'.format(self.product_date.strftime('%Y.%m.%d')) self.destination_filename = re.sub(regex, new_date, self.destination_filename) self.ingestion_date = self.valid_from_date #datetime.datetime.strptime(self.valid_from_date, '%d/%m/%Y') #self.product_date - datetime.timedelta(days=int(self.vampire.get('MODIS_VHI', 'interval'))) return
def __init__(self, params, vampire_defaults): logger.debug('Initialising PublishDSLRProduct task') super(PublishDSLRProduct, self).__init__(params, vampire_defaults) self.product_dir = self.vp.get('Days_Since_Last_Rain', 'output_dir') self.product_date = self.params[ 'start_date'] #datetime.datetime.strptime(self.params['start_date'], '%d/%m/%Y') self.valid_from_date = self.params['start_date'] self.valid_to_date = self.params['end_date'] self.summary = '{0} {1}'.format( self.vp.get('Days_Since_Last_Rain', 'interval'.capitalize()), self.vp.get('Days_Since_Last_Rain', 'summary')) self.tags = '{0}, {1}'.format( self.vp.get('Days_Since_Last_Rain', 'tags'), self.vp.get_country_name(self.vp.get('vampire', 'home_country'))) self.template_file = self.vp.get('Days_Since_Last_Rain', 'template_file') # _product_pattern = self.vp.get('Days_Since_Last_Rain', 'regional_dslr_pattern') # _product_pattern = _product_pattern.replace('(?P<year>\d{4}).(?P<month>\d{2}).(?P<day>\d{2})', '{0}'.format(self.product_date.strftime('%Y.%m.%d'))) _product_pattern = self.params['input_pattern'] _product_files = directory_utils.get_matching_files( self.product_dir, _product_pattern) self.product_filename = _product_files[0] # self.product_filename = 'lka_phy_MOD13Q1.%s.250m_16_days_EVI_EVI_VCI_VHI.tif' % self.product_date.strftime('%Y.%m.%d') self.product_name = 'days_since_last_rain' # self.destination_filename = self.product_filename # if using geoserver, need to modify destination filename so if can parse the date # ie. to have no full stops and be in the format YYYYmmdd new_date = None self.destination_filename = os.path.basename(self.product_filename) regex = r'\d{4}.\d{2}.\d{2}' if self.vp.get('vampire', 'gis_server').lower() == 'geoserver': # find date in destination filename and remove full stops new_date = '{0}'.format(self.product_date.strftime('%Y%m%d')) else: # find date in destination filename new_date = '{0}'.format(self.product_date.strftime('%Y.%m.%d')) self.destination_filename = re.sub(regex, new_date, self.destination_filename) self.ingestion_date = self.valid_from_date #datetime.datetime.strptime(self.valid_from_date, '%d/%m/%Y') #self.product_date - datetime.timedelta(days=int(self.vampire.get('MODIS_VHI', 'interval'))) return
def crop_files(base_path, output_path, bounds, tools_path, patterns=None, overwrite=False, nodata=True): # import re _fileslist = [] if not patterns[0]: # if no pattern, try all files _p = '*' else: _p = patterns[0] _all_files = directory_utils.get_matching_files(base_path, _p) for ifl in _all_files: _f = os.path.basename(os.path.basename(ifl)) # m = re.match(_p, _f) _new_filename = filename_utils.generate_output_filename( input_filename=_f, in_pattern=_p, out_pattern=patterns[1], ignore_leap_year=False) _out_raster = os.path.join(output_path, _new_filename) if not os.path.exists(_out_raster) or overwrite == True: # crop file here logger.debug("Cropping file: %s", ifl) if os.path.splitext(ifl)[1] == '.gz': # unzip first directory_utils.unzip_file_list([ifl]) ifl = ifl[:-3] # remove .gz from filename clip_raster_to_shp(shpfile=bounds, in_raster=ifl, out_raster=_out_raster, gdal_path=tools_path, nodata=nodata) _fileslist.append(_new_filename) return _fileslist
def calc_tci(self, cur_filename=None, cur_dir=None, cur_pattern=None, lst_max_filename=None, lst_max_dir=None, lst_max_pattern=None, lst_min_filename=None, lst_min_dir=None, lst_min_pattern=None, dst_filename=None, dst_dir=None, dst_pattern=None, interval=None): logger.info('entering calc_tci') _temp_file = None if dst_dir is None: _dst_dir = self.vp.get('MODIS_VCI', 'vci_product_dir') else: _dst_dir = dst_dir if cur_filename is None: # get filename from pattern and directory files_list = directory_utils.get_matching_files( cur_dir, cur_pattern) try: if files_list and len(files_list) > 1: # more than one match - average files print 'Found more than one matching temperature file in directory - averaging ' print files_list _fn, _ext = os.path.splitext( os.path.basename(files_list[len(files_list) - 1])) _temp_file = os.path.join( _dst_dir, os.path.basename(files_list[len(files_list) - 1])) calculate_statistics.calc_average(files_list, _temp_file) _cur_filename = _temp_file else: _cur_filename = files_list[0] except IndexError, e: raise ValueError( 'Cannot find matching temperature file in directory')
def calc_vci(self, cur_filename=None, cur_dir=None, cur_pattern=None, evi_max_filename=None, evi_max_dir=None, evi_max_pattern=None, evi_min_filename=None, evi_min_dir=None, evi_min_pattern=None, dst_filename=None, dst_dir=None, dst_pattern=None): logger.info('entering calc_vci') if cur_filename is None: # get filename from pattern and directory files_list = directory_utils.get_matching_files( cur_dir, cur_pattern) try: _cur_filename = files_list[0] except IndexError, e: raise ValueError( 'Cannot find matching vegetation file in directory')
class RainfallAnomalyTaskImpl(BaseTaskImpl.BaseTaskImpl): """ Initialise RainfallAnomalyTaskImpl object. Abstract implementation class for processing rainfall anomaly. """ def __init__(self, params, vampire_defaults): super(RainfallAnomalyTaskImpl, self).__init__(params, vampire_defaults) logger.debug('Initialising Rainfall Anomaly') return def process(self): logger.debug("Compute monthly rainfall anomaly") cur_file = None lta_file = None out_file = None cur_pattern = None lta_pattern = None output_pattern = None cur_dir = None lta_dir = None output_dir = None if 'current_file' in self.params: cur_file = self.params['current_file'] else: if not 'current_file_pattern' in self.params: raise BaseTaskImpl.ConfigFileError("No current file 'current_file' or pattern 'current_file_pattern' specified.", None) else: if 'current_dir' in self.params: cur_dir = self.params['current_dir'] else: cur_dir = None cur_pattern = self.params['current_file_pattern'] # try: # cur_file = process['current_file'] # except Exception, e: # raise ConfigFileError("No current file 'current_file' specified.", e) if 'longterm_avg_file' in self.params: lta_file = self.params['longterm_avg_file'] else: if not 'longterm_avg_file_pattern' in self.params: raise BaseTaskImpl.ConfigFileError("No long term average file 'longterm_avg_file' or pattern 'longterm_avg_file_pattern' specified.", None) else: if 'longterm_avg_dir' in self.params: lta_dir = self.params['longterm_avg_dir'] else: lta_dir = None lta_pattern = self.params['longterm_avg_file_pattern'] # try: # lta_file = process['longterm_avg_file'] # except Exception, e: # raise ConfigFileError("No long term average file 'longterm_avg_file' specified.", e) if 'output_file' in self.params: out_file = self.params['output_file'] else: if not 'output_file_pattern': raise BaseTaskImpl.ConfigFileError("No output file 'output_file' or output pattern 'output_file_pattern' specified.", None) else: if 'output_dir' in self.params: output_dir = self.params['output_dir'] else: output_dir = None output_pattern = self.params['output_file_pattern'] # try: # out_file = process['output_file'] # except Exception, e: # raise ConfigFileError("No output file 'output_file' specified.", e) self.calc_rainfall_anomaly(cur_filename=cur_file, lta_filename=lta_file, cur_dir=cur_dir, lta_dir=lta_dir, cur_pattern=cur_pattern, lta_pattern=lta_pattern, dst_filename=out_file, dst_pattern=output_pattern, dst_dir=output_dir ) # Calculate rainfall anomaly given a precipitation file, long-term average and output result to file. # Precipitation file can be given specifically, or as a pattern and directory to search in. # Long-term average file can be given specifically, of as a pattern and directory to search in. # Destination file can be given specifically, of a filename can be generated based on a pattern with parameters # from the precipitation file, and saved in the directory specified. # Actual calculation of rainfall anomaly is carried out in the calc_rainfall_anomaly function appropriate to the # system (i.e. ArcPy or opensource) def calc_rainfall_anomaly(self, dst_filename=None, cur_filename=None, lta_filename=None, cur_dir=None, lta_dir=None, cur_pattern=None, lta_pattern=None, dst_pattern=None, dst_dir=None ): logger.info('entering calc_rainfall_anomaly') if cur_filename is None: # get filename from pattern and directory files_list = directory_utils.get_matching_files(cur_dir, cur_pattern) try: cur_filename = files_list[0] except IndexError, e: raise ValueError('Cannot find matching rainfall file in directory') if lta_filename is None: # get filename from pattern and diretory files_list = directory_utils.get_matching_files(lta_dir, lta_pattern) try: lta_filename = files_list[0] except IndexError, e: raise ValueError('Cannot find matching long-term average file.')
def calculate_impact_area(self, hazard_raster, hazard_dir, hazard_pattern, threshold, threshold_direction, boundary, b_field, output_file, output_dir, output_pattern, start_date, end_date, hazard_var='vhi'): logger.debug("calculate_impact_area with hazard {0}, hazard dir {1}, hazard pattern {2}".format(hazard_raster, hazard_dir, hazard_pattern)) if threshold is None: # get threshold from VampireDefaults _threshold = self.vp.get('hazard_impact', '{0}_threshold'.format(hazard_var)) else: _threshold = threshold if threshold_direction is None: _threshold_direction = self.vp.get('hazard_impact', '{0}_threshold_direction'.format(hazard_var)) else: _threshold_direction = threshold_direction if hazard_raster is None: if hazard_pattern is not None: _input_files = directory_utils.get_matching_files(hazard_dir, hazard_pattern) _hazard_raster = os.path.join(hazard_dir, _input_files[0]) logger.debug("hazard files: {0}".format(_input_files)) else: raise ValueError("Hazard raster is not specified") else: _hazard_raster = hazard_raster if output_file is None: if output_pattern is not None: # _input_pattern = self.vp.get('MODIS_VHI', 'vhi_crop_pattern') _input_pattern = self.vp.get('hazard_impact', '{0}_input_pattern'.format(hazard_var)) _output_file = filename_utils.generate_output_filename(os.path.basename(_hazard_raster), _input_pattern, output_pattern) _output_file = os.path.join(output_dir, _output_file) if not os.path.exists(output_dir): os.makedirs(output_dir) else: raise ValueError("No output specified") else: _output_file = output_file logger.debug("Output file: {0}".format(_output_file)) if _threshold == '': _reclass_raster = _hazard_raster else: if _threshold_direction == '': _threshold_direction = 'LESS_THAN' # reclassify hazard raster to generate mask of all <= threshold _reclass_raster = os.path.join(os.path.dirname(_output_file), 'hazard_area_reclass.tif') impact_analysis.reclassify_raster(raster=_hazard_raster, threshold=_threshold, threshold_direction=_threshold_direction, output_raster=_reclass_raster) # calculate impact on boundary stats = calculate_statistics.calc_zonal_statistics(raster_file=_reclass_raster, polygon_file=boundary, zone_field=b_field, output_table=_output_file) # convert to hectares # TODO: get multiplier from defaults depending on resolution of hazard raster _multiplier = float(self.vp.get('hazard_impact', '{0}_area_multiplier'.format(hazard_var))) # csv_utils.calc_field(table_name=_output_file, new_field='area_aff', cal_field='COUNT', multiplier=_multiplier) csv_utils.calc_field(table_name=_output_file, new_field='area_aff', cal_field='SUM', multiplier=_multiplier) # add start and end date fields and set values csv_utils.add_field(table_name=_output_file, new_field='start_date', value=start_date) csv_utils.add_field(table_name=_output_file, new_field='end_date', value=end_date) csv_utils.copy_field(table_name=_output_file, new_field='kabupaten_id', copy_field=b_field) return None
# cur_filename is a list - need to average print 'More than one current file provided - averaging ' print cur_filename _temp_file = '{0}'.format( os.path.join( _dst_dir, os.path.basename(cur_filename[len(cur_filename) - 1]))) calculate_statistics.calc_average(cur_filename, _temp_file) _cur_filename = _temp_file else: _cur_filename = cur_filename _lst_max_filename = lst_max_filename if _lst_max_filename is None: # get filename from pattern and directory files_list = directory_utils.get_matching_files( lst_max_dir, lst_max_pattern) try: _lst_max_filename = files_list[0] except IndexError, e: raise ValueError( 'Cannot find LST long-term maximum file matching {0} in directory {1}' .format(lst_max_pattern, lst_max_dir)) _lst_min_filename = lst_min_filename if _lst_min_filename is None: # get filename from pattern and directory files_list = directory_utils.get_matching_files( lst_min_dir, lst_min_pattern) try: _lst_min_filename = files_list[0] except IndexError, e:
def calculate_impact_popn(self, hazard_raster, hazard_dir, hazard_pattern, threshold, population_raster, boundary, b_field, output_file, output_dir, output_pattern, start_date, end_date, threshold_direction, hazard_var='vhi'): if threshold is None: # get threshold from VampireDefaults _threshold = self.vp.get('hazard_impact', '{0}_threshold'.format(hazard_var)) else: _threshold = threshold if threshold_direction is None: _threshold_direction = self.vp.get('hazard_impact', '{0}_threshold_direction'.format(hazard_var)) else: _threshold_direction = threshold_direction if hazard_raster is None: if hazard_pattern is not None: _input_files = directory_utils.get_matching_files(hazard_dir, hazard_pattern) _hazard_raster = os.path.join(hazard_dir, _input_files[0]) else: raise ValueError("Hazard raster is not specified") else: _hazard_raster = hazard_raster if output_file is None: if output_pattern is not None: _input_pattern = self.vp.get('hazard_impact', '{0}_input_pattern'.format(hazard_var)) # _input_pattern = self.vp.get('MODIS_VHI', 'vhi_crop_pattern') _output_file = filename_utils.generate_output_filename(os.path.basename(_hazard_raster), _input_pattern, output_pattern) _output_file = os.path.join(output_dir, _output_file) if not os.path.exists(output_dir): os.makedirs(output_dir) else: raise ValueError("No output specified") else: _output_file = output_file if _threshold == '': _reclass_raster = _hazard_raster else: if _threshold_direction == '': _threshold_direction = 'LESS_THAN' # reclassify hazard raster to generate mask of all <= threshold _reclass_raster = os.path.join(os.path.dirname(_output_file), 'hazard_popn_reclass.tif') impact_analysis.reclassify_raster(raster=_hazard_raster, threshold=_threshold, output_raster=_reclass_raster, threshold_direction=_threshold_direction) if population_raster is None: _hazard_raster = _reclass_raster else: # calculate population from hazard raster and population raster intersection _hazard_raster = os.path.join(os.path.dirname(_output_file), 'hazard_popn.tif') impact_analysis.create_mask(raster=population_raster, mask=_reclass_raster, output_raster=_hazard_raster) # impact_analysis.multiply_by_mask(raster=population_raster, mask=_reclass_raster, # output_raster=_hazard_raster) # calculate impact on boundary calculate_statistics.calc_zonal_statistics(raster_file=_hazard_raster, polygon_file=boundary, zone_field=b_field, output_table=_output_file) # add field to table and calculate total for each area if population_raster is None: csv_utils.calc_field(table_name=_output_file, new_field='popn_aff', cal_field='COUNT', type='LONG') else: csv_utils.calc_field(table_name=_output_file, new_field='popn_aff', cal_field='SUM', type='LONG') # add start and end date fields and set values csv_utils.add_field(table_name=_output_file, new_field='start_date', value=start_date) csv_utils.add_field(table_name=_output_file, new_field='end_date', value=end_date) csv_utils.copy_field(table_name=_output_file, new_field='kabupaten_id', copy_field=b_field) return None
def accumulate_data(self, data_dir=None, data_pattern=None, output_dir=None, output_pattern=None, num_days=None, dates=None, overwrite=False): """ Accumulate GFS precipitation data for given interval and accumulate to given number of days if specified Download GFS precipitation data for given interval. Will download all available data unless start and/or end dates are provided. Parameters ---------- output_dir : str Filename of raster file interval : str Filename of vector file dates : str Name of field labelling the zones within vector file start_date : str Filename of output table (.dbf or .csv) end_date : str F overwrite : boolean Returns ------- None Returns None """ logger.info('entering accumulate_data') files_list = [] _dates = [] if dates is None: # get date from data directory name _dir_s = os.path.basename(data_dir)[:-2] _date = datetime.datetime.strptime(_dir_s, '%Y%m%d') _dates.append(_date) else: _dates = dates for d in _dates: # find all files _allfiles = directory_utils.get_matching_files( data_dir, data_pattern) _allfiles.sort() _cur_day = 0 _cur_hr = 0 _cur_accum_str = '' _accum_window_start = 0 _accum_window_end = 0 _day_ptrs = [] _pattern = re.compile(data_pattern) for idx, fname in enumerate(_allfiles): # get forecast hour from filename _base_fname = os.path.basename(fname) _result = _pattern.match(_base_fname) if _result: _forecast_hr = _result.group('forecast_hr') if int(_forecast_hr) % 24 == 0: # end of day _cur_day = _cur_day + 1 _day_ptrs.append(idx) if _cur_day >= num_days: # accumulate last num_days _output_pattern = output_pattern.replace( '{forecast_day}', ''.join( map( str, range(_cur_day - num_days + 1, _cur_day + 1)))) _output_pattern = _output_pattern.replace( '{year}', '{0}'.format(d.year)) _output_pattern = _output_pattern.replace( '{month}', '{0:0>2}'.format(d.month)) _output_pattern = _output_pattern.replace( '{day}', '{0:0>2}'.format(d.day)) _new_filename = filename_utils.generate_output_filename( _base_fname, data_pattern, _output_pattern) if not os.path.exists( os.path.join(output_dir, _new_filename)) or overwrite: self._accumulate_data( _allfiles[ _accum_window_start:_accum_window_end + 1], output_dir, _new_filename) _accum_window_start = _day_ptrs[_cur_day - num_days] + 1 files_list.append(_new_filename) _accum_window_end = _accum_window_end + 1 return files_list
except IndexError, e: raise ValueError( 'Cannot find matching rainfall file in directory') if lta_filename is None: # get filename from pattern and diretory files_list = directory_utils.get_matching_files( lta_dir, lta_pattern) try: lta_filename = files_list[0] except IndexError, e: raise ValueError( 'Cannot find matching long-term average file.') if ltsd_filename is None: # get filename from pattern and diretory files_list = directory_utils.get_matching_files( ltsd_dir, ltsd_pattern) try: ltsd_filename = files_list[0] except IndexError, e: raise ValueError( 'Cannot find matching long-term standard deviation file.') if dst_filename is None: if not os.path.exists(dst_dir): os.makedirs(dst_dir) # get new filename from directory and pattern dst_filename = os.path.join( dst_dir, filename_utils.generate_output_filename( os.path.split(cur_filename)[1], cur_pattern, dst_pattern)) precipitation_analysis.calc_standardized_precipitation_index(
class SPITaskImpl(BaseTaskImpl.BaseTaskImpl): """ Initialise RainfallAnomalyTaskImpl object. Abstract implementation class for processing rainfall anomaly. """ def __init__(self, params, vampire_defaults): super(SPITaskImpl, self).__init__(params, vampire_defaults) logger.debug('Initialising Standardised Precipitation Index') return def process(self): logger.debug("Compute standardized precipitation index") cur_file = None lta_file = None ltsd_file = None out_file = None cur_pattern = None lta_pattern = None ltsd_pattern = None output_pattern = None cur_dir = None lta_dir = None ltsd_dir = None output_dir = None if 'current_file' in self.params: cur_file = self.params['current_file'] else: if not 'current_file_pattern' in self.params: raise BaseTaskImpl.ConfigFileError( "No current file 'current_file' or pattern 'current_file_pattern' specified.", None) else: if 'current_dir' in self.params: cur_dir = self.params['current_dir'] else: cur_dir = None cur_pattern = self.params['current_file_pattern'] if 'longterm_avg_file' in self.params: lta_file = self.params['longterm_avg_file'] else: if not 'longterm_avg_file_pattern' in self.params: raise BaseTaskImpl.ConfigFileError( "No long term average file 'longterm_avg_file' or pattern 'longterm_avg_file_pattern' specified.", None) else: if 'longterm_avg_dir' in self.params: lta_dir = self.params['longterm_avg_dir'] else: lta_dir = None lta_pattern = self.params['longterm_avg_file_pattern'] if 'longterm_sd_file' in self.params: ltsd_file = self.params['longterm_sd_file'] else: if not 'longterm_sd_file_pattern' in self.params: raise BaseTaskImpl.ConfigFileError( "No long term standard deviation file 'longterm_sd_file' or pattern 'longterm_sd_file_pattern' specified.", None) else: if 'longterm_sd_dir' in self.params: ltsd_dir = self.params['longterm_sd_dir'] else: ltsd_dir = None ltsd_pattern = self.params['longterm_sd_file_pattern'] if 'output_file' in self.params: out_file = self.params['output_file'] else: if not 'output_file_pattern': raise BaseTaskImpl.ConfigFileError( "No output file 'output_file' or output pattern 'output_file_pattern' specified.", None) else: if 'output_dir' in self.params: output_dir = self.params['output_dir'] else: output_dir = None output_pattern = self.params['output_file_pattern'] self.calc_standardized_precipitation_index(cur_filename=cur_file, lta_filename=lta_file, ltsd_filename=ltsd_file, cur_dir=cur_dir, lta_dir=lta_dir, ltsd_dir=ltsd_dir, cur_pattern=cur_pattern, lta_pattern=lta_pattern, ltsd_pattern=ltsd_pattern, dst_filename=out_file, dst_pattern=output_pattern, dst_dir=output_dir) return def calc_standardized_precipitation_index(self, dst_filename=None, cur_filename=None, lta_filename=None, ltsd_filename=None, cur_dir=None, lta_dir=None, ltsd_dir=None, cur_pattern=None, lta_pattern=None, ltsd_pattern=None, dst_pattern=None, dst_dir=None): logger.info('entering calc_standardized_precipitation_index') if cur_filename is None: # get filename from pattern and directory files_list = directory_utils.get_matching_files( cur_dir, cur_pattern) try: cur_filename = files_list[0] except IndexError, e: raise ValueError( 'Cannot find matching rainfall file in directory') if lta_filename is None: # get filename from pattern and diretory files_list = directory_utils.get_matching_files( lta_dir, lta_pattern) try: lta_filename = files_list[0] except IndexError, e: raise ValueError( 'Cannot find matching long-term average file.')
def calc_days_since_last_rainfall(self, data_dir, data_pattern, dst_dir, start_date, threshold, max_days): logger.info('entering calc_days_since_last_rainfall') # get list of files from start_date back max_days files_list = directory_utils.get_matching_files(data_dir, data_pattern) raster_list = [] _r_in = regex.compile(data_pattern) for f in files_list: _m = _r_in.match(os.path.basename(f)) max_date = start_date - datetime.timedelta(days=max_days) f_date = datetime.date(int(_m.group('year')), int(_m.group('month')), int(_m.group('day'))) if max_date <= f_date < start_date: raster_list.append(f) if not os.path.exists(dst_dir): os.makedirs(dst_dir) def replace_closure(subgroup, replacement, m): if m.group(subgroup) not in [None, '']: start = m.start(subgroup) end = m.end(subgroup) return m.group()[:start] + replacement + m.group()[end:] _ref_file = regex.sub( data_pattern, functools.partial(replace_closure, 'year', '{0}'.format(start_date.year)), os.path.basename(files_list[0])) _ref_file = regex.sub( data_pattern, functools.partial(replace_closure, 'month', '{0:0>2}'.format(start_date.month)), _ref_file) _ref_file = regex.sub( data_pattern, functools.partial(replace_closure, 'day', '{0:0>2}'.format(start_date.day)), _ref_file) dslw_file = self.vp.get('Days_Since_Last_Rain', 'regional_dslr_output_pattern').replace( '{max_days}', '{0}d'.format(max_days)) dslw_file = dslw_file.replace('{threshold}', '{0}mm'.format(threshold)) dslw_file = os.path.join( dst_dir, filename_utils.generate_output_filename(_ref_file, data_pattern, dslw_file)) dsld_file = self.vp.get('Days_Since_Last_Rain', 'regional_dsld_output_pattern').replace( '{max_days}', '{0}d'.format(max_days)) dsld_file = dsld_file.replace('{threshold}', '{0}mm'.format(threshold)) dsld_file = os.path.join( dst_dir, filename_utils.generate_output_filename(_ref_file, data_pattern, dsld_file)) num_wet_file = self.vp.get( 'Days_Since_Last_Rain', 'regional_wet_accum_output_pattern').replace( '{max_days}', '{0}d'.format(max_days)) num_wet_file = num_wet_file.replace('{threshold}', '{0}mm'.format(threshold)) num_wet_file = os.path.join( dst_dir, filename_utils.generate_output_filename(_ref_file, data_pattern, num_wet_file)) ra_file = self.vp.get('Days_Since_Last_Rain', 'regional_accum_output_pattern').replace( '{max_days}', '{0}d'.format(max_days)) ra_file = ra_file.replace('{threshold}', '{0}mm'.format(threshold)) ra_file = os.path.join( dst_dir, filename_utils.generate_output_filename(_ref_file, data_pattern, ra_file)) _temp_dir = self.vp.get('directories', 'temp_dir') if not os.path.exists(_temp_dir): os.makedirs(_temp_dir) precipitation_analysis.days_since_last_rain( raster_list=raster_list, dslw_filename=dslw_file, dsld_filename=dsld_file, num_wet_days_filename=num_wet_file, rainfall_accum_filename=ra_file, temp_dir=_temp_dir, threshold=threshold, max_days=max_days) logger.info('leaving calc_days_since_last_rainfall') return None
def calc_vhi(self, vci_filename=None, vci_dir=None, vci_pattern=None, tci_filename=None, tci_dir=None, tci_pattern=None, dst_filename=None, dst_dir=None, dst_pattern=None): logger.info('entering calc_vhi') if vci_filename is None: # get filename from pattern and directory files_list = directory_utils.get_matching_files(vci_dir, vci_pattern) try: _vci_filename = files_list[0] except IndexError, e: raise ValueError('Cannot find matching Vegetation Condition Index file in directory') else: _vci_filename = vci_filename if tci_filename is None: # get filename from pattern and directory files_list = directory_utils.get_matching_files(tci_dir, tci_pattern) try: _tci_filename = files_list[0] except IndexError, e: raise ValueError('Cannot find matching Temperature Condition Index file in directory') else: _tci_filename = tci_filename if dst_filename is None: # get new filename from directory and pattern _dst_filename = os.path.join(dst_dir, filename_utils.generate_output_filename( os.path.split(_vci_filename)[1], vci_pattern, dst_pattern)) else: _dst_filename = dst_filename if dst_dir is not None and not os.path.isdir(dst_dir): # destination directory does not exist, create it first os.makedirs(dst_dir)
def _extract_subset(self, input_dir, output_dir, patterns, subset, subset_name, overwrite=False): logger.info('entering _extract_subset') _all_files = directory_utils.get_matching_files(input_dir, patterns[0]) if not _all_files: logger.debug( 'Extracting subset {0}. No files found in {1} with pattern {2}' .format(subset_name, input_dir, patterns[0])) print 'No files found in ' + input_dir + ', please check directory and try again' return -1 # check output directory exists and create it if not if not os.path.isdir(output_dir): os.makedirs(output_dir) new_files = [] for _ifl in _all_files: # generate parameter file _nfl = filename_utils.generate_output_filename( os.path.basename(_ifl), patterns[0], patterns[1]) _ofl = os.path.join(output_dir, _nfl) _checkfl = "{0}.{1}{2}".format( os.path.splitext(_ofl)[0], subset_name, os.path.splitext(_ofl)[1]) if not os.path.exists(_checkfl) or overwrite == True: try: src_ds = gdal.Open(_ifl) except RuntimeError, e: logger.debug('Unable to open file') return -1 sds = src_ds.GetSubDatasets() # if logger: logger.debug("Number of bands: %s",src_ds.RasterCount) self._convert_to_tiff(_ifl, _ofl, self.vp.get('directories', 'gdal_dir')) if len(sds) == 0: # no subdatasets - rename file to include subset name _new_name = "{0}.{1}{2}".format( os.path.splitext(os.path.basename(_ofl))[0], subset_name, os.path.splitext(_ofl)[1]) if not os.path.exists(os.path.join( output_dir, _new_name)) or overwrite: os.rename(_ofl, os.path.join(output_dir, _new_name)) if not os.path.exists( os.path.join( output_dir, "{0}.aux.xml".format(_new_name))) or overwrite: os.rename( "{0}.aux.xml".format(_ofl), os.path.join(output_dir, "{0}.aux.xml".format(_new_name))) for idx, sbs in enumerate(sds): # if logger: logger.debug("Subdataset: %s", sbs[0]) # get subset name (without spaces) _n = (sbs[0].rsplit(':', 1)[1]).replace(' ', '_') _rf = "{0}.{1}{2}".format( os.path.splitext(os.path.basename(_ofl))[0], _n, os.path.splitext(_ofl)[1]) _cf = "{0}_{1}{2}".format( os.path.splitext(os.path.basename(_ofl))[0], str(idx + 1).zfill(2), os.path.splitext(_ofl)[1]) if not os.path.exists(os.path.join(output_dir, _cf)) or overwrite: _cf = "{0}_{1}{2}".format( os.path.splitext(os.path.basename(_ofl))[0], str(idx + 1), os.path.splitext(_ofl)[1]) if idx + 1 not in subset: # remove un-needed files (including .aux & .aux.xml) os.remove(os.path.join(output_dir, _cf)) _aux_f = os.path.join(output_dir, "{0}.aux.xml".format(_cf)) if os.path.exists(_aux_f): os.remove(_aux_f) else: # keep this file - rename with subset name if os.path.exists(os.path.join(output_dir, _rf)): if overwrite: # file already exists....delete first os.remove(os.path.join(output_dir, _rf)) os.rename(os.path.join(output_dir, _cf), os.path.join(output_dir, _rf)) else: # just remove the new file os.remove(os.path.join(output_dir, _cf)) else: os.rename(os.path.join(output_dir, _cf), os.path.join(output_dir, _rf)) _aux_f = os.path.join(output_dir, "{0}.aux.xml".format(_cf)) if os.path.exists(_aux_f): if os.path.exists( os.path.join(output_dir, "{0}.aux.xml".format(_rf))): # can't rename, delete first os.remove( os.path.join(output_dir, "{0}.aux.xml".format(_rf))) os.rename( _aux_f, os.path.join(output_dir, "{0}.aux.xml".format(_rf))) new_files.append(_rf)
def calc_longterm_stats(self, input_dir, output_dir, product, interval, country=None, input_pattern=None, output_pattern=None, start_date=None, end_date=None, function_list=None): logger.info('entering calc_longterm_stats') if output_pattern is None: if product == self.vp.get('MODIS', 'vegetation_product'): _output_pattern = self.vp.get('MODIS_EVI_Long_Term_Average', 'lta_output_pattern') elif product == self.vp.get('MODIS', 'land_surface_temperature_product'): _output_pattern = self.vp.get('MODIS_LST_Long_Term_Average', 'lta_output_pattern') else: raise # product unknown else: _output_pattern = output_pattern if country is None: _country = self.vp.get('vampire_tmp', 'home_country') else: _country = country if input_pattern is None: if country == 'Global': if product == self.vp.get('MODIS', 'vegetation_product'): _input_pattern = self.vp.get('MODIS', 'evi_pattern') elif product == self.vp.get( 'MODIS', 'land_surface_temperature_product'): _input_pattern = self.vp.get('MODIS_LST', 'lst_pattern') else: raise # product unknown else: if product == self.vp.get('MODIS', 'vegetation_product'): _input_pattern = self.vp.get('MODIS_EVI', 'evi_regional_pattern') elif product == self.vp.get( 'MODIS', 'land_surface_temperature_product'): _input_pattern = self.vp.get('MODIS_LST', 'lst_regional_pattern') else: raise # product unknown else: _input_pattern = input_pattern _convert_interval = False _new_interval = None if interval is not None: if interval != self.vp.get('MODIS_PRODUCTS', '{0}.interval'.format(product)): _convert_interval = True _new_interval = interval _interval = self.vp.get('MODIS_PRODUCTS', '{0}.interval'.format(product)) else: _interval = interval else: _interval = self.vp.get('MODIS_PRODUCTS', '{0}.interval'.format(product)) # if product is not None: # _interval = self.vampire_tmp.get('MODIS_PRODUCTS', '{0}.interval'.format(product)) # else: # # use default - monthly # _interval = interval _all_files = directory_utils.get_matching_files( input_dir, input_pattern) _file_list = {} _yrs = [] _doy = [] for f in _all_files: _fname = os.path.basename(f) _result = re.match(_input_pattern, _fname) if not 'month' in _result.groupdict(): if 'dayofyear' in _result.groupdict(): _dt = datetime.datetime(int(_result.group('year')), 1,1) + \ datetime.timedelta(int(_result.group('dayofyear'))-1) _month = _dt.month _day = _dt.day else: raise ValueError('No month or day of year in file pattern') else: _month = int(_result.group('month')) _day = int(_result.group('day')) _f_date = datetime.date(int(_result.group('year')), _month, _day) # TODO: base date should be from start of long-term average data, not necessarily 2000 _base_date = datetime.date(2000, _month, _day) # _base_name = '{0}-{1}'.format(_result.group('base_name'), _result.group('version')) # check if start_date is a datetime object or a string if start_date is not None: if type(start_date) is datetime.date or type( start_date) is datetime.datetime: if _f_date < start_date.date(): break else: if _f_date < (dateutil.parser.parse(start_date)).date(): break else: if end_date is not None: if type(end_date) is datetime.date or type( end_date) is datetime.datetime: if _f_date > end_date: break else: if _f_date > (dateutil.parser.parse(end_date)).date(): break # else: _yrs.append(_result.group('year')) _doy.append(_f_date.timetuple().tm_yday) _file_list.setdefault(_f_date.timetuple().tm_yday, []).append(f) _years = set(_yrs) _syr = min(_years) #1981 _eyr = max(_years) _num_yrs = str(int(_eyr) - int(_syr)) # if need to convert 8-days to 16-days, do the next section to combine files into correct # lists. NOTE: This will only work for 8-day to 16-day conversion # TODO: make this generic so it works for other conversions. if _convert_interval and _new_interval == '16-days': _doys = set(_doy) _sorted_doy = sorted(_doys) _new_doys = [] _new_file_list = {} _new_temp_list = [] for x in _sorted_doy: if (x - 1) % 16 == 0: # yes _new_doys.append(x) _new_file_list.setdefault(x, []).extend(_file_list[x]) _new_file_list.setdefault(x, []).extend(_new_temp_list) _new_temp_list = [] else: _new_temp_list.extend(_file_list[x]) if _new_temp_list: # add last data to first item _new_file_list.setdefault(1, []).extend(_new_temp_list) _interval = _new_interval _file_list = _new_file_list _output_pattern = _output_pattern.replace('{yr_range}', '{0}-{1}'.format(_syr, _eyr)) _output_pattern = _output_pattern.replace('{num_yrs}', '{0}yrs'.format(_num_yrs)) _output_pattern = _output_pattern.replace( '{subset}', '{0}'.format(_interval.lower())) if function_list is None: _function_list = ['AVG'] else: _function_list = function_list if not os.path.isdir(output_dir): # directory doesn't exist....create it first os.makedirs(output_dir) for d in _file_list: fl = _file_list[d] newfl = directory_utils.unzip_file_list(fl) if len(fl) != 0: for func in _function_list: _fn_output_pattern = _output_pattern.replace( '{statistic}', func.lower()) newfilename = filename_utils.generate_output_filename( os.path.basename(fl[0]), _input_pattern, _fn_output_pattern) self._calculate_stats(newfl, newfilename, output_dir, [func]) # if interval == 'monthly': # for m in _months: # # for each month, calculate long term average # if m in _file_list: # fl = _file_list[m] # newfl = vampire_tmp.directory_utils.unzip_file_list(fl) # for func in function_list: # _fn_output_pattern = _output_pattern.replace('{statistic}', func.lower()) # newfilename = vampire_tmp.filename_utils.generate_output_filename(fl, _input_pattern, _fn_output_pattern) # # newfilename = '{0}.{1}-{2}.{3}.monthly.{4}yrs'.format(_base_name, _syr, _eyr, m, _numyrs) # self._calculate_stats(newfl, newfilename, output_dir, [func]) logger.info('leaving calc_longterm_stats') return None