def assign_from_product(self, product, subproduct, version): # product_out_info = querydb.get_product_out_info( productcode=product, subproductcode=subproduct, version=version, echo=False) product_out_info = functions.list_to_element(product_out_info) # Assign prod/subprod/version sds_metadata['eStation2_product'] = str(product) sds_metadata['eStation2_subProduct'] = str(subproduct) if isinstance(version, str) or isinstance(version, unicode): sds_metadata['eStation2_product_version'] = version else: sds_metadata['eStation2_product_version'] = 'undefined' sds_metadata['eStation2_defined_by'] = product_out_info.defined_by sds_metadata['eStation2_category'] = product_out_info.category_id sds_metadata[ 'eStation2_descr_name'] = product_out_info.descriptive_name sds_metadata['eStation2_description'] = product_out_info.description sds_metadata['eStation2_provider'] = product_out_info.provider sds_metadata['eStation2_date_format'] = product_out_info.date_format sds_metadata['eStation2_frequency'] = product_out_info.frequency_id sds_metadata[ 'eStation2_scaling_factor'] = product_out_info.scale_factor sds_metadata[ 'eStation2_scaling_offset'] = product_out_info.scale_offset sds_metadata['eStation2_unit'] = product_out_info.unit sds_metadata['eStation2_nodata'] = product_out_info.nodata
def create_pipeline(starting_sprod): # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, version, ext) input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) starting_files = input_dir+"*"+in_prod_ident # Read input product nodata in_prod_info = querydb.get_product_out_info(productcode=prod, subproductcode=starting_sprod, version=version) product_info = functions.list_to_element(in_prod_info) in_nodata = product_info.nodata print in_nodata # --------------------------------------------------------------------- # Monthly Average for a given month output_sprod="monavg" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) formatter_in="(?P<YYYYMM>[0-9]{6})[0-9]{2}"+in_prod_ident formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYYMM[0]}"+out_prod_ident] @active_if(activate_monavg_comput) @collate(starting_files, formatter(formatter_in),formatter_out) def modis_par_monavg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw", "input_nodata": in_nodata} raster_image_math.do_avg_image(**args)
def get_list_dates_for_dataset(product_code, sub_product_code, version, start_date=None, end_date=None): # Manage the dates if (start_date != None) or (end_date != None): # Get the frequency from product table product_info = querydb.get_product_out_info( productcode=product_code, subproductcode=sub_product_code, version=version) frequency_id = product_info[0].frequency_id dateformat = product_info[0].date_format cDataset = datasets.Dataset(product_code, sub_product_code, '', version=version) cFrequency = cDataset.get_frequency(frequency_id, dateformat) # Build the list of dates date_start = cFrequency.extract_date(str(start_date)) if (end_date != '' and end_date is not None): date_end = cFrequency.extract_date(str(end_date)) else: date_end = datetime.date.today() list_dates = cFrequency.get_internet_dates( cFrequency.get_dates(date_start, date_end), '%Y%m%d') else: list_dates = None return list_dates
def Test_get_product_out_info(self): product_out = querydb.get_product_out_info(productcode='fewsnet_rfe', subproductcode='rfe', version='undefined') logger.info("Product OUT info: %s", product_out) for row in product_out: print row self.assertEqual(1, 1)
def Test_get_product_out_info(self): product_out = querydb.get_product_out_info(productcode='fewsnet-rfe', subproductcode='10d', version='2.0') logger.info("Product OUT info: %s", product_out) for row in product_out: print row self.assertEqual(1, 1) self.assertEqual(1, 1)
def get_product_out_info(product, subproduct, my_logger): try: # Get information from 'product' table args = { "productcode": product['productcode'], "subproductcode": subproduct, "version": product['version'] } product_out_info = querydb.get_product_out_info(**args) product_out_info = functions.list_to_element(product_out_info) return product_out_info except: my_logger.error('Error defining Output product info ')
def __init__(self, product_code, sub_product_code, mapset, version=None, from_date=None, to_date=None): kwargs = { 'productcode': product_code, 'subproductcode': sub_product_code.lower() if sub_product_code else None } if not version is None: kwargs['version'] = version if from_date: self._check_date(from_date) if to_date: self._check_date(to_date) self._db_product = querydb.get_product_out_info(**kwargs) if self._db_product is None or self._db_product == []: raise NoProductFound(kwargs) if isinstance(self._db_product, list): self._db_product = self._db_product[0] self.mapset = mapset self._path = functions.set_path_sub_directory( product_code, sub_product_code, self._db_product.product_type, version, mapset) self.fullpath = os.path.join(es_constants.es2globals['processing_dir'], self._path) #self._db_frequency = querydb.db.frequency.get(self._db_product.frequency_id) #self._db_frequency = querydb.get_frequency(self._db_product.frequency_id) #if self._db_frequency is None: # raise NoFrequencyFound(self._db_product) #self._frequency = Frequency(value=self._db_frequency.frequency, # unit=self._db_frequency.time_unit, # frequency_type=self._db_frequency.frequency_type, # dateformat=self._db_product.date_format) self._frequency = Dataset.get_frequency(self._db_product.frequency_id, self._db_product.date_format) if not from_date and self.no_year(): from_date = datetime.date(datetime.date.today().year, 1, 1) if not to_date and self.no_year(): to_date = datetime.date(datetime.date.today().year, 12, 1) self.from_date = from_date or None self.to_date = to_date or self._frequency.today()
def create_pipeline(starting_sprod): # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) starting_files = input_dir + "*" + in_prod_ident # Read input product nodata in_prod_info = querydb.get_product_out_info(productcode=prod, subproductcode=starting_sprod, version=version) product_info = functions.list_to_element(in_prod_info) in_nodata = product_info.nodata print in_nodata # --------------------------------------------------------------------- # Monthly Average for a given month output_sprod = "monavg" out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" + out_prod_ident ] @active_if(activate_monavg_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def modis_par_monavg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw", "input_nodata": in_nodata} raster_image_math.do_avg_image(**args)
def create_permanently_missing_for_dataset(product_code, sub_product_code, version, mapset_code, start_date=None, end_date=None): # Get the existing dates for the dataset product = products.Product(product_code, version=version) missing_filenames = product.get_missing_filenames({ 'product': product_code, 'version': version }) # Manage the dates if (start_date != None) or (end_date != None): # Get the frequency from product table product_info = querydb.get_product_out_info( productcode=product_code, subproductcode=sub_product_code, version=version) frequency_id = product_info[0].frequency_id dateformat = product_info[0].date_format cDataset = datasets.Dataset(product_code, sub_product_code, '', version=version) cFrequency = cDataset.get_frequency(frequency_id, dateformat) # Build the list of dates date_start = cFrequency.extract_date(str(start_date)) if (end_date != '' and end_date is not None): date_end = cFrequency.extract_date(str(end_date)) else: date_end = datetime.date.today() list_dates = cFrequency.get_internet_dates( cFrequency.get_dates(date_start, date_end), '%Y%m%d') else: list_dates = None return list_dates
def assign_from_product(self, product, subproduct, version): # product_out_info = querydb.get_product_out_info(productcode=product,subproductcode=subproduct,version=version, echo=False) product_out_info = functions.list_to_element(product_out_info) # Assign prod/subprod/version sds_metadata['eStation2_product'] = str(product) sds_metadata['eStation2_subProduct'] = str(subproduct) if isinstance(version, str) or isinstance(version, unicode): sds_metadata['eStation2_product_version'] = version else: sds_metadata['eStation2_product_version'] = 'undefined' sds_metadata['eStation2_defined_by'] = product_out_info.defined_by sds_metadata['eStation2_category'] = product_out_info.category_id sds_metadata['eStation2_descr_name'] = product_out_info.descriptive_name sds_metadata['eStation2_description'] = product_out_info.description sds_metadata['eStation2_provider'] = product_out_info.provider sds_metadata['eStation2_date_format'] = product_out_info.date_format sds_metadata['eStation2_frequency'] = product_out_info.frequency_id sds_metadata['eStation2_scaling_factor'] = product_out_info.scale_factor sds_metadata['eStation2_scaling_offset'] = product_out_info.scale_offset sds_metadata['eStation2_unit'] = product_out_info.unit sds_metadata['eStation2_nodata'] = product_out_info.nodata
def __init__(self, product_code, sub_product_code, mapset, version=None, from_date=None, to_date=None): kwargs = {'productcode': product_code, 'subproductcode': sub_product_code.lower() if sub_product_code else None} if not version is None: kwargs['version'] = version if from_date: self._check_date(from_date) if to_date: self._check_date(to_date) self._db_product = querydb.get_product_out_info(**kwargs) if self._db_product is None or self._db_product == []: raise NoProductFound(kwargs) if isinstance(self._db_product, list): self._db_product = self._db_product[0] self.mapset = mapset self._path = functions.set_path_sub_directory(product_code, sub_product_code, self._db_product.product_type, version, mapset) self.fullpath = os.path.join(es_constants.es2globals['processing_dir'], self._path) #self._db_frequency = querydb.db.frequency.get(self._db_product.frequency_id) #self._db_frequency = querydb.get_frequency(self._db_product.frequency_id) #if self._db_frequency is None: # raise NoFrequencyFound(self._db_product) #self._frequency = Frequency(value=self._db_frequency.frequency, # unit=self._db_frequency.time_unit, # frequency_type=self._db_frequency.frequency_type, # dateformat=self._db_product.date_format) self._frequency = Dataset.get_frequency(self._db_product.frequency_id, self._db_product.date_format) if not from_date and self.no_year(): from_date = datetime.date(datetime.date.today().year, 1, 1) if not to_date and self.no_year(): to_date = datetime.date(datetime.date.today().year, 12, 1) self.from_date = from_date or None self.to_date = to_date or self._frequency.today()
def createSLD(product, version, subproduct, output_file=None): if output_file is None: output_file = '{0}/{1}_{2}_{3}.sld'.format(tempDir, product, version, subproduct) # make sure /data/temp exists # Note 1: see http://stackoverflow.com/questions/273192/how-to-check-if-a-directory-exists-and-create-it-if-necessary # Note 2: /data/temp should be a variable if not os.path.exists(tempDir): os.makedirs(tempDir) product_legends = querydb.get_product_legends(productcode=product, subproductcode=subproduct, version=version) # Get scale factor product_info = querydb.get_product_out_info(productcode=product, subproductcode=subproduct, version=version) scale_factor = product_info[0].scale_factor if hasattr(product_legends, "__len__") and product_legends.__len__() > 0: for legend in product_legends: # Changes for ES2-85 legend_dict = legend defaultlegend = legend_dict['default_legend'] # Changes for ES2-85 # if default_legend == 'True': # defaultlegend = True # else: # defaultlegend = False # if there is only 1 legend defined, this is the default legend (even if not defined as default legend). if product_legends.__len__() == 1: defaultlegend = True if defaultlegend: legend_id = legend_dict['legend_id'] legend_steps = querydb.get_legend_steps(legendid=legend_id) legend_name = legend_dict['legend_name'] else: logger.warning('Error: no legend exists for this product. Exit') return 1 num_steps = len(legend_steps) # Read the schema from the template tree = ET.ElementTree(file=geoserverREST.templ_sld) # Modify the schema for that Legend # Modify Layer Name for child in tree.getiterator(): if child.tag == 'NamedLayer': child.set("Name", product) # Modify User Style Title for child in tree.getiterator(): if child.tag == 'UserStyle': child.set("Title", legend_name) # Modify the Steps (and remove remaining ones) for child in tree.getiterator(): if child.tag == 'ColorMap': ColorMap = child num_CME = len(ColorMap) # Check there are enough CME for this legend if num_steps > num_CME: logger.error('Too many legend steps [>255]. Exit') return 1 for istep in range(0, num_steps): step = legend_steps[istep] # Build the RGB color color_rgb = step.color_rgb.split(' ') r = color_rgb[0] g = color_rgb[1] b = color_rgb[2] color_html = rgb2html(color_rgb) to_value = old_div(step.to_step, scale_factor) # Modify steps ColorMap[istep].set('quantity', str(to_value)) ColorMap[istep].set('color', color_html) for istep in range(num_steps, num_CME): del ColorMap[num_steps] tree.write(output_file) return output_file
def create_pipeline(input_products, output_product, logfile=None, nrt_products=True, update_stats=False): proc_lists = None if proc_lists is None: proc_lists = functions.ProcLists() spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_modis_pp') # Set DEFAULTS: all off activate_pp_comput = 0 # PP from Chla, SST, Kd490 and PAR activate_stats_comput = 0 # Stats computation (inter-annual clim, min, max) activate_anomalies_comput = 0 # Anomalies computation (not yet done!!) # switch wrt groups - according to options if nrt_products: activate_pp_comput = 1 # PP from Chla, SST, Kd490 and PAR if update_stats: activate_stats_comput = 1 activate_anomalies_comput = 1 activate_pp_prod_comput = 1 activate_pp_stats_clim_comput = 1 activate_pp_stats_min_comput = 1 activate_pp_stats_max_comput = 1 # --------------------------------------------------------------------- # Create lists # my_date='20160601' my_date = '' es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Parse the arguments and extract the 4 input variables # if len(input_products) != 4: spec_logger.error('Modis PP computation requires 4 inputs. Exit') return 1 found_chla = False found_sst = False found_par = False found_kd490 = False for input_product in input_products: if re.search('.*chla.*', input_product.productcode): found_chla = True chla_prod = input_product.productcode chla_version = input_product.version chla_sprod = input_product.subproductcode chla_mapset = input_product.mapsetcode chla_prod_ident = functions.set_path_filename_no_date( chla_prod, chla_sprod, chla_mapset, chla_version, ext) chla_input_dir = es2_data_dir + \ functions.set_path_sub_directory(chla_prod, chla_sprod, 'Derived', chla_version, chla_mapset) if re.search('.*sst.*', input_product.productcode): found_sst = True sst_prod = input_product.productcode sst_version = input_product.version sst_sprod = input_product.subproductcode sst_mapset = input_product.mapsetcode sst_prod_ident = functions.set_path_filename_no_date( sst_prod, sst_sprod, sst_mapset, sst_version, ext) sst_input_dir = es2_data_dir + \ functions.set_path_sub_directory(sst_prod, sst_sprod, 'Derived', sst_version, sst_mapset) if re.search('.*kd490.*', input_product.productcode): found_kd490 = True kd490_prod = input_product.productcode kd490_version = input_product.version kd490_sprod = input_product.subproductcode kd490_mapset = input_product.mapsetcode kd490_prod_ident = functions.set_path_filename_no_date( kd490_prod, kd490_sprod, kd490_mapset, kd490_version, ext) kd490_input_dir = es2_data_dir + \ functions.set_path_sub_directory(kd490_prod, kd490_sprod, 'Derived', kd490_version, kd490_mapset) if re.search('.*par.*', input_product.productcode): found_par = True par_prod = input_product.productcode par_version = input_product.version par_sprod = input_product.subproductcode par_mapset = input_product.mapsetcode par_prod_ident = functions.set_path_filename_no_date( par_prod, par_sprod, par_mapset, par_version, ext) par_input_dir = es2_data_dir + \ functions.set_path_sub_directory(par_prod, par_sprod, 'Derived', par_version, par_mapset) # Check consistency of inputs if not (found_chla) or not (found_kd490) or not (found_par) or not ( found_sst): spec_logger.error('At least one of 4 expected inputs missing. Exit') return 1 if chla_mapset != sst_mapset or chla_mapset != kd490_mapset or chla_mapset != par_mapset: spec_logger.error('All 4 input mapset must be equals. Exit') return 1 # Read input product nodata chla_prod_info = querydb.get_product_out_info(productcode=chla_prod, subproductcode=chla_sprod, version=chla_version) chla_product_info = functions.list_to_element(chla_prod_info) chla_nodata = chla_product_info.nodata chla_frequency = chla_product_info.frequency_id sst_prod_info = querydb.get_product_out_info(productcode=sst_prod, subproductcode=sst_sprod, version=sst_version) sst_product_info = functions.list_to_element(sst_prod_info) sst_nodata = sst_product_info.nodata kd_prod_info = querydb.get_product_out_info(productcode=kd490_prod, subproductcode=kd490_sprod, version=kd490_version) kd_product_info = functions.list_to_element(kd_prod_info) kd_nodata = kd_product_info.nodata par_prod_info = querydb.get_product_out_info(productcode=par_prod, subproductcode=par_sprod, version=par_version) par_product_info = functions.list_to_element(par_prod_info) par_nodata = par_product_info.nodata # Define input files # if starting_dates is not None: # starting_files = [] # for my_date in starting_dates: # starting_files.append(input_dir+my_date+in_prod_ident) # else: # starting_files=input_dir+"*"+in_prod_ident # Define outputs output_nodata = -32767 old = False # NOTE: the prod/mapset/version are taken from the FIRST OUTPUT passed # subprod defined according to the frequency output_prod = output_product[0].productcode output_version = output_product[0].version output_mapset = output_product[0].mapsetcode if old: # Get the first output -> PP subproduct generated (8daysavg or monavg) output_sprod = output_product[0].subproductcode else: # Define the outputs according to the frequency (method in 'functions' to be created !!) if chla_frequency == 'e1month': frequency_string = 'monthly' output_sprod = 'monavg' output_sprod_clim = '1monclim' output_sprod_min = '1monmin' output_sprod_max = '1monmax' sub_product_group = '1monstat' elif chla_frequency == 'e1modis8day': frequency_string = '8 days' output_sprod = '8daysavg' activate_pp_stats_clim_comput = 1 activate_pp_stats_min_comput = 1 activate_pp_stats_max_comput = 1 sub_product_group = '8daysstat' output_sprod_clim = '8daysclim' output_sprod_min = '8daysmin' output_sprod_max = '8daysmax' else: spec_logger.error('Frequency not recognized: %s. Exit!', chla_frequency) return out_prod_ident = functions.set_path_filename_no_date( output_prod, output_sprod, output_mapset, output_version, ext) output_subdir = functions.set_path_sub_directory(output_prod, output_sprod, 'Derived', output_version, output_mapset) # Fixes ES2-36 def generate_input_files_pp(): # Take kd490 as starting point kd_files = kd490_input_dir + my_date + "*" + kd490_prod_ident input_files = sorted(glob.glob(kd_files)) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) ancillary_chla = chla_input_dir + mydate + chla_prod_ident ancillary_par = par_input_dir + mydate + par_prod_ident ancillary_sst = sst_input_dir + mydate + sst_prod_ident do_comp = True if not os.path.isfile(ancillary_chla): do_comp = False if not os.path.isfile(ancillary_par): do_comp = False if not os.path.isfile(ancillary_sst): do_comp = False if do_comp is True: output_file = es_constants.processing_dir + output_subdir + os.path.sep + mydate + out_prod_ident my_inputs = (input_file, ancillary_chla, ancillary_par, ancillary_sst) yield (my_inputs, output_file) @active_if(activate_pp_comput) @files(generate_input_files_pp) def modis_pp_comp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"chla_file": input_file[1], "sst_file": input_file[3], "kd_file": input_file[0], "par_file": input_file[2], \ "sst_nodata": sst_nodata, "kd_nodata": kd_nodata, "chla_nodata": chla_nodata, \ "par_nodata": par_nodata, "output_file": output_file, "output_nodata": output_nodata, "output_format": 'GTIFF', \ "output_type": None, "options": "compress=lzw"} raster_image_math.do_compute_primary_production(**args) # --------------------------------------------------------------------- # Climatology (inter-annual average) prod = output_prod mapset = output_mapset new_input_subprod = output_sprod version = output_version in_prod_ident = functions.set_path_filename_no_date( prod, new_input_subprod, mapset, version, ext) in_prod_subdir = functions.set_path_sub_directory(prod, new_input_subprod, 'Derived', version, mapset) starting_files = es2_data_dir + in_prod_subdir + "*" + in_prod_ident output_sprod_group = proc_lists.proc_add_subprod_group(sub_product_group) output_sprod = proc_lists.proc_add_subprod( output_sprod_clim, sub_product_group, final=False, descriptive_name='Inter-annual Climatology at ' + frequency_string + ' frequency', description='Inter-annual Climatology at ' + frequency_string + ' frequency', frequency_id=chla_frequency, date_format='MMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_clim = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_clim = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir_clim + "{MMDD[0]}" + out_prod_ident_clim ] # Fixes ES2-304 def generate_input_files_pp_stats(): # MMDD_nonleap_list = ['0101', '0109', '0117', '0125', '0202', '0210', '0218', '0226', '0306', '0314', '0314', # '0330', '0407', '0415', '0423', '0501', '0509', '0517', '0525', '0602', '0610', '0618', # '0626', '0704', '0712', '0720', '0728', '0805', '0813', '0821', '0829', '0906', '0914', # '0922', '0930', '1008', '1016', '1024', '1101', '1109', '1117', '1125', '1203', '1211', # '1219', '1227'] MMDD_nonleap_dict = { '0101': '0101', '0109': '0109', '0117': '0117', '0125': '0125', '0202': '0202', '0210': '0210', '0218': '0218', '0226': '0226', '0306': '0305', '0314': '0313', '0322': '0321', '0330': '0329', '0407': '0406', '0415': '0414', '0423': '0422', '0501': '0430', '0509': '0508', '0517': '0516', '0525': '0524', '0602': '0601', '0610': '0609', '0618': '0617', '0626': '0625', '0704': '0703', '0712': '0711', '0720': '0719', '0728': '0727', '0805': '0804', '0813': '0812', '0821': '0820', '0829': '0828', '0906': '0905', '0914': '0913', '0922': '0921', '0930': '0929', '1008': '1007', '1016': '1015', '1024': '1023', '1101': '1031', '1109': '1108', '1117': '1116', '1125': '1124', '1203': '1202', '1211': '1210', '1219': '1218', '1227': '1226' } # for MMDD_nonleap in MMDD_nonleap_list: for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items(): formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident nonleap_files = sorted(glob.glob(formatter_in_nonleap)) formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident leap_files = sorted(glob.glob(formatter_in_leap)) my_inputs = leap_files + nonleap_files input_files_unique = list(set(my_inputs)) output_file = es_constants.processing_dir + output_subdir_clim + os.path.sep + MMDD_nonleap + out_prod_ident_clim yield (input_files_unique, output_file) if frequency_string != 'monthly': @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_clim_comput) @files(generate_input_files_pp_stats) def std_yearly_clim(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) else: @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_clim_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_yearly_clim(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # # --------------------------------------------------------------------- # # Minimum output_sprod = proc_lists.proc_add_subprod( output_sprod_min, sub_product_group, final=False, descriptive_name='Inter-annual Minimum at ' + frequency_string + ' frequency', description='Inter-annual Minimum at ' + frequency_string + ' frequency', frequency_id=chla_frequency, date_format='MMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_min = functions.set_path_filename_no_date( prod, output_sprod_min, mapset, version, ext) output_subdir_min = functions.set_path_sub_directory( prod, output_sprod_min, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir_min + "{MMDD[0]}" + out_prod_ident_min ] def generate_input_files_pp_stats_min(): # MMDD_nonleap_list = ['0101', '0109', '0117', '0125', '0202', '0210', '0218', '0226', '0306', '0314', '0314', # '0330', '0407', '0415', '0423', '0501', '0509', '0517', '0525', '0602', '0610', '0618', # '0626', '0704', '0712', '0720', '0728', '0805', '0813', '0821', '0829', '0906', '0914', # '0922', '0930', '1008', '1016', '1024', '1101', '1109', '1117', '1125', '1203', '1211', # '1219', '1227'] MMDD_nonleap_dict = { '0101': '0101', '0109': '0109', '0117': '0117', '0125': '0125', '0202': '0202', '0210': '0210', '0218': '0218', '0226': '0226', '0306': '0305', '0314': '0313', '0322': '0321', '0330': '0329', '0407': '0406', '0415': '0414', '0423': '0422', '0501': '0430', '0509': '0508', '0517': '0516', '0525': '0524', '0602': '0601', '0610': '0609', '0618': '0617', '0626': '0625', '0704': '0703', '0712': '0711', '0720': '0719', '0728': '0727', '0805': '0804', '0813': '0812', '0821': '0820', '0829': '0828', '0906': '0905', '0914': '0913', '0922': '0921', '0930': '0929', '1008': '1007', '1016': '1015', '1024': '1023', '1101': '1031', '1109': '1108', '1117': '1116', '1125': '1124', '1203': '1202', '1211': '1210', '1219': '1218', '1227': '1226' } # for MMDD_nonleap in MMDD_nonleap_list: for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items(): formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident nonleap_files = sorted(glob.glob(formatter_in_nonleap)) formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident leap_files = sorted(glob.glob(formatter_in_leap)) my_inputs = leap_files + nonleap_files input_files_unique = list(set(my_inputs)) output_file = es_constants.processing_dir + output_subdir_min + os.path.sep + MMDD_nonleap + out_prod_ident_min yield (input_files_unique, output_file) if frequency_string != 'monthly': @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_min_comput) @files(generate_input_files_pp_stats_min) def std_yearly_min(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) else: @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_min_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_yearly_min(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) # # --------------------------------------------------------------------- # # Monthly Maximum output_sprod = proc_lists.proc_add_subprod( output_sprod_max, sub_product_group, final=False, descriptive_name='Inter-annual Maximum at ' + frequency_string + ' frequency', description='Inter-annual Maximum at ' + frequency_string + ' frequency', frequency_id=chla_frequency, date_format='MMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_max = functions.set_path_filename_no_date( prod, output_sprod_max, mapset, version, ext) output_subdir_max = functions.set_path_sub_directory( prod, output_sprod_max, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir_max + "{MMDD[0]}" + out_prod_ident_max ] def generate_input_files_pp_stats_max(): MMDD_nonleap_dict = { '0101': '0101', '0109': '0109', '0117': '0117', '0125': '0125', '0202': '0202', '0210': '0210', '0218': '0218', '0226': '0226', '0306': '0305', '0314': '0313', '0322': '0321', '0330': '0329', '0407': '0406', '0415': '0414', '0423': '0422', '0501': '0430', '0509': '0508', '0517': '0516', '0525': '0524', '0602': '0601', '0610': '0609', '0618': '0617', '0626': '0625', '0704': '0703', '0712': '0711', '0720': '0719', '0728': '0727', '0805': '0804', '0813': '0812', '0821': '0820', '0829': '0828', '0906': '0905', '0914': '0913', '0922': '0921', '0930': '0929', '1008': '1007', '1016': '1015', '1024': '1023', '1101': '1031', '1109': '1108', '1117': '1116', '1125': '1124', '1203': '1202', '1211': '1210', '1219': '1218', '1227': '1226' } # for MMDD_nonleap in MMDD_nonleap_list: for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items(): formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident nonleap_files = sorted(glob.glob(formatter_in_nonleap)) formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident leap_files = sorted(glob.glob(formatter_in_leap)) my_inputs = leap_files + nonleap_files input_files_unique = list(set(my_inputs)) output_file = es_constants.processing_dir + output_subdir_max + os.path.sep + MMDD_nonleap + out_prod_ident_max yield (input_files_unique, output_file) if frequency_string != 'monthly': @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_max_comput) @files(generate_input_files_pp_stats_max) def std_yearly_max(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args) else: @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_max_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_yearly_max(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args)
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, nrt_products=True, update_stats=False): # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all off activate_pp_1mon_comput = 0 # 10d stats activate_10danomalies_comput = 0 # 10d anomalies activate_monthly_comput = 0 # monthly cumulation activate_monstats_comput = 0 # monthly stats activate_monanomalies_comput = 0 # monthly anomalies # switch wrt groups - according to options if nrt_products: activate_pp_1mon_comput = 1 # Primary Production Monthly activate_monthly_comput = 1 # monthly cumulation activate_monanomalies_comput = 1 # monthly anomalies if update_stats: activate_pp_8dstats_comput = 1 # 10d stats activate_pp_monstats_comput = 1 # monthly stats # Primary Production Monthly # Always true #activate_pp_1mon_comput = 1 #my_date='20160601' my_date = '' es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Primary Productivity from chl-a, sst, kd490 and par data # Define inputs chla_prod = prod chla_version = 'v2013.1' chla_prod_ident = functions.set_path_filename_no_date( chla_prod, starting_sprod, mapset, chla_version, ext) chla_input_dir = es2_data_dir+ \ functions.set_path_sub_directory(chla_prod, starting_sprod, 'Derived', chla_version, mapset) # --------------------------------------------------------------------- sst_prod = "modis-sst" sst_version = 'v2013.1' sst_prod_ident = functions.set_path_filename_no_date( sst_prod, starting_sprod, mapset, sst_version, ext) sst_input_dir = es2_data_dir+ \ functions.set_path_sub_directory(sst_prod, starting_sprod, 'Derived', sst_version, mapset) # --------------------------------------------------------------------- kd_prod = "modis-kd490" kd_version = 'v2012.0' kd_prod_ident = functions.set_path_filename_no_date( kd_prod, starting_sprod, mapset, kd_version, ext) kd_input_dir = es2_data_dir+ \ functions.set_path_sub_directory(kd_prod, starting_sprod, 'Derived', kd_version, mapset) kd_files = kd_input_dir + my_date + "*" + kd_prod_ident # --------------------------------------------------------------------- par_prod = "modis-par" par_version = 'v2012.0' par_prod_ident = functions.set_path_filename_no_date( par_prod, starting_sprod, mapset, par_version, ext) par_input_dir = es2_data_dir+ \ functions.set_path_sub_directory(par_prod, starting_sprod, 'Derived', par_version, mapset) # Read input product nodata chla_prod_info = querydb.get_product_out_info(productcode=chla_prod, subproductcode="monavg", version=chla_version) chla_product_info = functions.list_to_element(chla_prod_info) chla_nodata = chla_product_info.nodata sst_prod_info = querydb.get_product_out_info(productcode=sst_prod, subproductcode="monavg", version=sst_version) sst_product_info = functions.list_to_element(sst_prod_info) sst_nodata = sst_product_info.nodata kd_prod_info = querydb.get_product_out_info(productcode=kd_prod, subproductcode="monavg", version=kd_version) kd_product_info = functions.list_to_element(kd_prod_info) kd_nodata = kd_product_info.nodata par_prod_info = querydb.get_product_out_info(productcode=par_prod, subproductcode="monavg", version=par_version) par_product_info = functions.list_to_element(par_prod_info) par_nodata = par_product_info.nodata # Define outputs output_prod = "modis-pp" output_sprod = starting_sprod out_prod_ident = functions.set_path_filename_no_date( output_prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(output_prod, output_sprod, 'Derived', version, mapset) # Starting files monthly composites formatter_kd = "(?P<YYYYMMDD>[0-9]{8})" + kd_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMMDD[0]}" + out_prod_ident ancillary_sst = sst_input_dir + "{YYYYMMDD[0]}" + sst_prod_ident ancillary_par = par_input_dir + "{YYYYMMDD[0]}" + par_prod_ident ancillary_chla = chla_input_dir + "{YYYYMMDD[0]}" + chla_prod_ident @active_if(activate_pp_1mon_comput) @transform(kd_files, formatter(formatter_kd), add_inputs(ancillary_chla, ancillary_par, ancillary_sst), formatter_out) def modis_pp_1mon(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"chla_file": input_file[1], "sst_file": input_file[3], "kd_file": input_file[0],"par_file": input_file[2], \ "sst_nodata": sst_nodata, "kd_nodata": kd_nodata, "chla_nodata": chla_nodata,\ "par_nodata": par_nodata, "output_file": output_file, "output_nodata": -9999, "output_format": 'GTIFF',\ "output_type": None, "options": "compress=lzw"} raster_image_math.do_compute_primary_production(**args)
def convert_to_ecoargis(productinfo, startdate, enddate, aggregateinfo, vectorlayer, regionidattr, regionlevel): crud_db = crud.CrudDB(schema=es_constants.es2globals['schema_products']) productcode = productinfo['productcode'] subproductcode = productinfo['subproductcode'] version = productinfo['version'] mapsetcode = productinfo['mapsetcode'] product_info = querydb.get_product_out_info(productcode=productcode, subproductcode=subproductcode, version=version) if product_info.__len__() > 0: for row in product_info: product_descriptive_name = row.descriptive_name product_description = row.description product_dateformat = row.date_format product_provider = row.provider else: logger.error('Product does not exist: %s - %s - %s - %s' % productcode, version, subproductcode, mapsetcode) exit() from_date = datetime.datetime.strptime(startdate, '%Y-%m-%d').date() to_date = datetime.datetime.strptime(enddate, '%Y-%m-%d').date() filename, file_extension = os.path.splitext(vectorlayer) if file_extension == '.shp': driver = ogr.GetDriverByName('ESRI Shapefile') elif file_extension == '.geojson': driver = ogr.GetDriverByName('GeoJSON') else: logger.error( 'Vector layer file in a wrong format or has a wrong extention: %s' % vectorlayer) exit() vectorlayer = driver.Open(vectorlayer) # Get Projection from layer layer = vectorlayer.GetLayer() # spatialRef = layer.GetSpatialRef() # Get Shapefile Fields and Types layerDefinition = layer.GetLayerDefn() idattr_exists = False for i in range(layerDefinition.GetFieldCount()): fieldName = layerDefinition.GetFieldDefn(i).GetName() # fieldTypeCode = layerDefinition.GetFieldDefn(i).GetType() # fieldType = layerDefinition.GetFieldDefn(i).GetFieldTypeName(fieldTypeCode) # fieldWidth = layerDefinition.GetFieldDefn(i).GetWidth() # GetPrecision = layerDefinition.GetFieldDefn(i).GetPrecision() # print fieldName + " - " + fieldType + " " + str(fieldWidth) + " " + str(GetPrecision) if fieldName == regionidattr: idattr_exists = True if not idattr_exists: logger.error('ID Attribute does not exist in vector layer: %s' % regionidattr) return for feature in layer: geom = feature.GetGeometryRef() # wkt = geom.ExportToWkt() regionid = feature.GetField(regionidattr) delete_ecoagrisrec = { "productcode": productcode, "subproductcode": subproductcode, "version": version, "mapsetcode": mapsetcode, "regionid": regionid, 'aggregation_type': aggregateinfo['aggregation_type'], } if crud_db.delete('ecoagris', **delete_ecoagrisrec): logger.info('ecoagris record deleted') else: logger.error('Error deleting ecoagris record') timeseries = getTimeseries(productcode, subproductcode, version, mapsetcode, geom, from_date, to_date, aggregateinfo) # Loop through the timeseries and get each date/value and create a record for timeserie in timeseries: if timeserie['meanvalue'] not in [None, '']: productdate = timeserie['date'].strftime("%Y%m%d") if product_dateformat == 'YYYYMMDDHHMM': productdate = timeserie['date'].strftime("%Y%m%d%H%M") if product_dateformat == 'YYYYMMDD': productdate = timeserie['date'].strftime("%Y%m%d") if product_dateformat == 'YYYYMM': productdate = timeserie['date'].strftime("%Y%m") if product_dateformat == 'YYYY': productdate = timeserie['date'].strftime("%Y") if product_dateformat == 'MMDD': productdate = timeserie['date'].strftime("%m%d") ecoagris_record = { "productcode": productcode, "subproductcode": subproductcode, "version": version, "mapsetcode": mapsetcode, "product_descriptive_name": product_descriptive_name, "product_description": product_description, "provider": product_provider, "regionid": regionid, "regionlevel": regionlevel, 'aggregation_type': aggregateinfo['aggregation_type'], 'aggregation_min': aggregateinfo['aggregation_min'], 'aggregation_max': aggregateinfo['aggregation_max'], "product_dateformat": product_dateformat, "product_date": productdate, "tsvalue": timeserie['meanvalue'] } # print ecoagris_record # Insert record in DB table ecoagris if crud_db.create('ecoagris', ecoagris_record): logger.info('ecoagris record created') else: logger.error('Error creating ecoagris record') # Return the latest computed record (M.C.) return ecoagris_record
def getTimeseries_green(productcode, subproductcode, version, mapsetcode, wkt, start_date, end_date, aggregate): # Extract timeseries from a list of files and return as JSON object # It applies to a single dataset (prod/sprod/version/mapset) and between 2 dates # Several types of aggregation foreseen: # # mean : Sum(Xi)/N(Xi) -> min/max not considered e.g. Rain # cumulate: Sum(Xi) -> min/max not considered e.g. Fire # # count: N(Xi where min < Xi < max) e.g. Vegetation anomalies # surface: count * PixelArea e.g. Water Bodies # percent: count/Ntot e.g. Vegetation anomalies ogr.UseExceptions() theGeomWkt = ' '.join(wkt.strip().split()) geom = Geometry(wkt=str(theGeomWkt), srs=4326) # Get Mapset Info mapset_info = querydb.get_mapset(mapsetcode=mapsetcode) # Compute pixel area by converting degree to km pixelArea = abs(mapset_info.pixel_shift_lat) * abs( mapset_info.pixel_shift_lat) * 12544.0 # Get Product Info product_info = querydb.get_product_out_info(productcode=productcode, subproductcode=subproductcode, version=version) if product_info.__len__() > 0: scale_factor = 0 scale_offset = 0 nodata = 0 date_format = '' for row in product_info: scale_factor = row.scale_factor scale_offset = row.scale_offset nodata = row.nodata unit = row.unit date_format = row.date_format [list_files, dates_list] = getFilesList(productcode, subproductcode, version, mapsetcode, date_format, start_date, end_date) # Built a dictionary with filenames/dates dates_to_files_dict = dict(zip(dates_list, list_files)) # Generate unique list of files unique_list = set(list_files) uniqueFilesValues = [] for infile in unique_list: single_result = { 'filename': '', 'meanvalue_noscaling': nodata, 'meanvalue': None } if os.path.isfile(infile): try: mx = [] with Raster(infile) as img: # Assign nodata from prod_info img._nodata = nodata with img.clip(geom) as clipped: # Save clipped image (for debug only) # clipped.save('/data/processing/exchange/clipped_test.tif') mx = clipped.array() nodata_array_masked = ma.masked_equal(mx, nodata) merged_mask = ma.mask_or(ma.getmask(mx), ma.getmask(nodata_array_masked)) mxnodata = ma.masked_array(ma.getdata(mx), merged_mask) if aggregate['aggregation_type'] == 'count' or aggregate[ 'aggregation_type'] == 'percent' or aggregate[ 'aggregation_type'] == 'surface': min_val = aggregate['aggregation_min'] max_val = aggregate['aggregation_max'] # Scale threshold from physical to digital value min_val_scaled = (min_val - scale_offset) / scale_factor max_val_scaled = (max_val - scale_offset) / scale_factor mxrange = ma.masked_outside(mxnodata, min_val_scaled, max_val_scaled) if aggregate['aggregation_type'] == 'percent': # 'percent' meanResult = float(mxrange.count()) / float( mxnodata.count()) * 100 elif aggregate['aggregation_type'] == 'surface': # 'surface' meanResult = float(mxrange.count()) * pixelArea else: # 'count' meanResult = float(mxrange.count()) # Both results are equal finalvalue = meanResult else: #if aggregate['type'] == 'mean' or if aggregate['type'] == 'cumulate': if mxnodata.count() == 0: meanResult = 0.0 else: if aggregate['aggregation_type'] == 'mean': # 'mean' meanResult = mxnodata.mean() else: # 'cumulate' meanResult = mxnodata.sum() # Scale to physical value finalvalue = (meanResult * scale_factor + scale_offset) # Assign results single_result['filename'] = infile single_result['meanvalue_noscaling'] = meanResult single_result['meanvalue'] = finalvalue except Exception, e: logger.debug('ERROR: clipping - %s' % (e)) # sys.exit (1) else: logger.debug('ERROR: raster file does not exist - %s' % infile) # sys.exit (1) uniqueFilesValues.append(single_result) # Define a dictionary to associate filenames/values files_to_values_dict = dict( (x['filename'], x['meanvalue']) for x in uniqueFilesValues) # Prepare array for result resultDatesValues = [] # Returns a list of 'filenames', 'dates', 'values' for mydate in dates_list: # my_result = {'date': datetime.date.today(), 'filename':'', 'meanvalue':nodata} my_result = {'date': datetime.date.today(), 'meanvalue': nodata} # Assign the date my_result['date'] = mydate # Assign the filename my_filename = dates_to_files_dict[mydate] # my_result['filename'] = my_filename # Map from array of Values my_result['meanvalue'] = files_to_values_dict[my_filename] # Map from array of dates resultDatesValues.append(my_result) return resultDatesValues
def processing_std_vgt(res_queue, pipeline_run_level=0, pipeline_printout_level=0, pipeline_printout_graph_level=0, prod='', starting_sprod='', mapset='', version='', starting_dates=None, update_stats=False, nrt_products=True, write2file=None, logfile=None, touch_only=False, upsert_db=False): spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_std_vgt') proc_lists = None proc_lists = create_pipeline(prod=prod, starting_sprod=starting_sprod, mapset=mapset, version=version, starting_dates=starting_dates, proc_lists=proc_lists, update_stats=update_stats, nrt_products=nrt_products) if write2file is not None: fwrite_id = open(write2file, 'w') else: fwrite_id = None if upsert_db: tasks = pipeline_get_task_names() spec_logger.info("Updating DB for the pipeline %s" % tasks[0]) # Get input product info input_product_info = querydb.get_product_out_info( allrecs=False, productcode=prod, subproductcode=starting_sprod, version=version) for my_sprod in proc_lists.list_subprods: # my_sprod.print_out() status = querydb.update_processing_chain_products( prod, version, my_sprod, input_product_info) spec_logger.info("Updating DB Done - Exit") # return proc_lists if pipeline_run_level > 0: spec_logger.info("Run the pipeline %s" % 'processing_std_vgt') pipeline_run( touch_files_only=touch_only, verbose=pipeline_run_level, logger=spec_logger, log_exceptions=spec_logger, history_file='/eStation2/log/.ruffus_history_{0}_{1}.sqlite'. format(prod, starting_sprod)) tasks = pipeline_get_task_names() spec_logger.info("Run the pipeline %s" % tasks[0]) spec_logger.info("After running the pipeline %s" % 'processing_std_vgt') if pipeline_printout_level > 0: pipeline_printout( verbose=pipeline_printout_level, output_stream=fwrite_id, history_file='/eStation2/log/.ruffus_history_{0}_{1}.sqlite'. format(prod, starting_sprod)) if pipeline_printout_graph_level > 0: pipeline_printout_graph('flowchart.jpg') if write2file is not None: fwrite_id.close() # res_queue.put(proc_lists) return True
def getTimeseries(productcode, subproductcode, version, mapsetcode, geom, start_date, end_date, aggregate): # Extract timeseries from a list of files and return as JSON object # It applies to a single dataset (prod/sprod/version/mapset) and between 2 dates # Several types of aggregation foreseen: # # mean : Sum(Xi)/N(Xi) -> min/max not considered e.g. Rain # cumulate: Sum(Xi) -> min/max not considered e.g. Fire # # count: N(Xi where min < Xi < max) e.g. Vegetation anomalies # surface: count * PixelArea e.g. Water Bodies # percent: count/Ntot e.g. Vegetation anomalies # precip: compute the precipitation volume in m3*1E6 Rain (only) # # History: 1.0 : Initial release - since 2.0.1 -> now renamed '_green' from greenwich package # 1.1 : Since Feb. 2017, it is based on a different approach (gdal.RasterizeLayer instead of greenwich) # in order to solve the issue with MULTIPOLYGON # ogr.UseExceptions() # Get Mapset Info mapset_info = querydb.get_mapset(mapsetcode=mapsetcode) # Prepare for computing conversion to area: the pixel size at Lat=0 is computed # The correction to the actual latitude (on AVERAGE value - will be computed below) const_d2km = 12364.35 area_km_equator = abs(float(mapset_info.pixel_shift_lat)) * abs( float(mapset_info.pixel_shift_long)) * const_d2km # Get Product Info product_info = querydb.get_product_out_info(productcode=productcode, subproductcode=subproductcode, version=version) if product_info.__len__() > 0: # Get info from product_info scale_factor = 0 scale_offset = 0 nodata = 0 date_format = '' for row in product_info: scale_factor = row.scale_factor scale_offset = row.scale_offset nodata = row.nodata date_format = row.date_format date_type = row.data_type_id # Create an output/temp shapefile, for managing the output layer (really mandatory ?? Can be simplified ???) try: tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='_getTimeseries', dir=es_constants.base_tmp_dir) except: logger.error('Cannot create temporary dir ' + es_constants.base_tmp_dir + '. Exit') raise NameError('Error in creating tmpdir') out_shape = tmpdir + os.path.sep + "output_shape.shp" outDriver = ogr.GetDriverByName('ESRI Shapefile') # Create the output shapefile outDataSource = outDriver.CreateDataSource(out_shape) dest_srs = ogr.osr.SpatialReference() dest_srs.ImportFromEPSG(4326) outLayer = outDataSource.CreateLayer("Layer", dest_srs) # outLayer = outDataSource.CreateLayer("Layer") idField = ogr.FieldDefn("id", ogr.OFTInteger) outLayer.CreateField(idField) featureDefn = outLayer.GetLayerDefn() feature = ogr.Feature(featureDefn) feature.SetGeometry(geom) # area = geom.GetArea() feature.SetField("id", 1) outLayer.CreateFeature(feature) feature = None [list_files, dates_list] = getFilesList(productcode, subproductcode, version, mapsetcode, date_format, start_date, end_date) # Built a dictionary with filenames/dates dates_to_files_dict = dict(list(zip(dates_list, list_files))) # Generate unique list of files unique_list = set(list_files) uniqueFilesValues = [] geo_mask_created = False for infile in unique_list: single_result = { 'filename': '', 'meanvalue_noscaling': nodata, 'meanvalue': None } if infile.strip() != '' and os.path.isfile(infile): # try: # Open input file orig_ds = gdal.Open(infile, gdal.GA_ReadOnly) orig_cs = osr.SpatialReference() orig_cs.ImportFromWkt(orig_ds.GetProjectionRef()) orig_geoT = orig_ds.GetGeoTransform() x_origin = orig_geoT[0] y_origin = orig_geoT[3] pixel_size_x = orig_geoT[1] pixel_size_y = -orig_geoT[5] in_data_type_gdal = conv_data_type_to_gdal(date_type) # Create a mask from the geometry, with the same georef as the input file[s] if not geo_mask_created: # Read polygon extent and round to raster resolution x_min, x_max, y_min, y_max = outLayer.GetExtent() x_min_round = int(old_div( (x_min - x_origin), pixel_size_x)) * pixel_size_x + x_origin x_max_round = ( int(old_div( (x_max - x_origin), (pixel_size_x))) + 1) * pixel_size_x + x_origin y_min_round = ( int(old_div( (y_min - y_origin), (pixel_size_y))) - 1) * pixel_size_y + y_origin y_max_round = int( old_div((y_max - y_origin), (pixel_size_y))) * pixel_size_y + y_origin # # # Create the destination data source x_res = int( round( old_div((x_max_round - x_min_round), pixel_size_x))) y_res = int( round( old_div((y_max_round - y_min_round), pixel_size_y))) # # # Create mask in memory mem_driver = gdal.GetDriverByName('MEM') mem_ds = mem_driver.Create('', x_res, y_res, 1, in_data_type_gdal) mask_geoT = [ x_min_round, pixel_size_x, 0, y_max_round, 0, -pixel_size_y ] mem_ds.SetGeoTransform(mask_geoT) mem_ds.SetProjection(orig_cs.ExportToWkt()) # # # Create a Layer with '1' for the pixels to be selected gdal.RasterizeLayer(mem_ds, [1], outLayer, burn_values=[1]) # gdal.RasterizeLayer(mem_ds, [1], outLayer, None, None, [1]) # Read the polygon-mask band = mem_ds.GetRasterBand(1) geo_values = mem_ds.ReadAsArray() # Create a mask from geo_values (mask-out the '0's) geo_mask = ma.make_mask(geo_values == 0) geo_mask_created = True # # # Clean/Close objects mem_ds = None mem_driver = None outDriver = None outLayer = None # Read data from input file x_offset = int(old_div((x_min - x_origin), pixel_size_x)) y_offset = int(old_div((y_origin - y_max), pixel_size_y)) band_in = orig_ds.GetRasterBand(1) data = band_in.ReadAsArray(x_offset, y_offset, x_res, y_res) # Catch the Error ES2-105 (polygon not included in Mapset) if data is None: logger.error( 'ERROR: polygon extends out of file mapset for file: %s' % infile) return [] # Create a masked array from the data (considering Nodata) masked_data = ma.masked_equal(data, nodata) # Apply on top of it the geo mask mxnodata = ma.masked_where(geo_mask, masked_data) # Test ONLY # write_ds_to_geotiff(mem_ds, '/data/processing/exchange/Tests/mem_ds.tif') if aggregate['aggregation_type'] == 'count' or aggregate[ 'aggregation_type'] == 'percent' or aggregate[ 'aggregation_type'] == 'surface' or aggregate[ 'aggregation_type'] == 'precip': if mxnodata.count() == 0: meanResult = None else: mxrange = mxnodata min_val = aggregate['aggregation_min'] max_val = aggregate['aggregation_max'] if min_val is not None: min_val_scaled = old_div((min_val - scale_offset), scale_factor) mxrange = ma.masked_less(mxnodata, min_val_scaled) # See ES2-271 if max_val is not None: # Scale threshold from physical to digital value max_val_scaled = old_div( (max_val - scale_offset), scale_factor) mxrange = ma.masked_greater( mxrange, max_val_scaled) elif max_val is not None: # Scale threshold from physical to digital value max_val_scaled = old_div((max_val - scale_offset), scale_factor) mxrange = ma.masked_greater( mxnodata, max_val_scaled) if aggregate['aggregation_type'] == 'percent': # 'percent' meanResult = float(mxrange.count()) / float( mxnodata.count()) * 100 elif aggregate['aggregation_type'] == 'surface': # 'surface' # Estimate 'average' Latitude y_avg = (y_min + y_max) / 2.0 pixelAvgArea = area_km_equator * math.cos( old_div(y_avg, 180) * math.pi) meanResult = float(mxrange.count()) * pixelAvgArea elif aggregate['aggregation_type'] == 'precip': # 'surface' # Estimate 'average' Latitude y_avg = (y_min + y_max) / 2.0 pixelAvgArea = area_km_equator * math.cos( old_div(y_avg, 180) * math.pi) n_pixels = mxnodata.count() avg_precip = mxnodata.mean() # Result is in km * km * mmm i.e. 1E3 m*m*m -> we divide by 1E3 to get 1E6 m*m*m meanResult = float( n_pixels) * pixelAvgArea * avg_precip * 0.001 else: # 'count' meanResult = float(mxrange.count()) # Both results are equal finalvalue = meanResult else: # if aggregate['type'] == 'mean' or if aggregate['type'] == 'cumulate': if mxnodata.count() == 0: finalvalue = None meanResult = None else: if aggregate['aggregation_type'] == 'mean': # 'mean' meanResult = mxnodata.mean() else: # 'cumulate' meanResult = mxnodata.sum() finalvalue = (meanResult * scale_factor + scale_offset) # Assign results single_result['filename'] = infile single_result['meanvalue_noscaling'] = meanResult single_result['meanvalue'] = finalvalue else: logger.debug('ERROR: raster file does not exist - %s' % infile) uniqueFilesValues.append(single_result) # Define a dictionary to associate filenames/values files_to_values_dict = dict( (x['filename'], x['meanvalue']) for x in uniqueFilesValues) # Prepare array for result resultDatesValues = [] # Returns a list of 'filenames', 'dates', 'values' for mydate in dates_list: my_result = {'date': datetime.date.today(), 'meanvalue': nodata} # Assign the date my_result['date'] = mydate # Assign the filename my_filename = dates_to_files_dict[mydate] # Map from array of Values my_result['meanvalue'] = files_to_values_dict[my_filename] # Map from array of dates resultDatesValues.append(my_result) try: shutil.rmtree(tmpdir) except: logger.debug('ERROR: Error in deleting tmpdir. Exit') # Return result return resultDatesValues else: logger.debug( 'ERROR: product not registered in the products table! - %s %s %s' % (productcode, subproductcode, version)) return []
def getTimeseries(productcode, subproductcode, version, mapsetcode, wkt, start_date, end_date): # Extract timeseries from a list of files and return as JSON object # It applies to a single dataset (prod/sprod/version/mapset) and between 2 dates ogr.UseExceptions() theGeomWkt = ' '.join(wkt.strip().split()) geom = Geometry(wkt=str(theGeomWkt), srs=4326) # Get Product Info product_info = querydb.get_product_out_info(productcode=productcode, subproductcode=subproductcode, version=version) if product_info.__len__() > 0: scale_factor = 0 scale_offset = 0 nodata = 0 date_format = '' for row in product_info: scale_factor = row.scale_factor scale_offset = row.scale_offset nodata = row.nodata unit = row.unit date_format = row.date_format [list_files, dates_list] = getFilesList(productcode, subproductcode, version, mapsetcode, date_format, start_date, end_date) # Built a dictionary with filesnames/dates dates_to_files_dict = dict(zip(dates_list, list_files)) # Generate unique list of files unique_list = set(list_files) uniqueFilesValues = [] for infile in unique_list: if os.path.isfile(infile): try: mx = [] single_result = {'filename': '', 'meanvalue_noscaling': nodata, 'meanvalue': nodata} with Raster(infile) as img: # Assign nodata from prod_info img._nodata = nodata with img.clip(geom) as clipped: # Save clipped image (for debug only) # clipped.save(dataset.fullpath+'clipped_'+productfilename) mx = clipped.array() nodata_array_masked = ma.masked_equal(mx, nodata) merged_mask = ma.mask_or(ma.getmask(mx), ma.getmask(nodata_array_masked)) mxnodata = ma.masked_array(ma.getdata(mx), merged_mask) if mxnodata.count() == 0: meanResult = 0.0 else: meanResult = mxnodata.mean() single_result['filename'] = infile single_result['meanvalue_noscaling'] = meanResult # Scale to physical value finalvalue = (meanResult*scale_factor+scale_offset) single_result['meanvalue'] = finalvalue uniqueFilesValues.append(single_result) except Exception, e: logger.debug('ERROR: clipping - %s' % (e)) # sys.exit (1) else: logger.debug('ERROR: raster file does not exist - %s' % infile) # sys.exit (1) # Define a dictionary to associate filenames/values files_to_values_dict = dict((x['filename'], x['meanvalue']) for x in uniqueFilesValues) # Prepare array for result resultDatesValues = [] # Returns a list of 'filenames', 'dates', 'values' for mydate in dates_list: # my_result = {'date': datetime.date.today(), 'filename':'', 'meanvalue':nodata} my_result = {'date': datetime.date.today(), 'meanvalue':nodata} # Assign the date my_result['date'] = mydate # Assign the filename my_filename = dates_to_files_dict[mydate] # my_result['filename'] = my_filename # Map from array of Values my_result['meanvalue'] = files_to_values_dict[my_filename] # Map from array of dates resultDatesValues.append(my_result) return resultDatesValues
def loop_processing(dry_run=False, serialize=False, test_one_product=None): # Driver of the process service # Reads configuration from the database # Creates the pipelines for the active processing # Calls the active pipelines with the relevant argument # Arguments: dry_run -> if > 0, it triggers pipeline_printout() rather than pipeline_run() # -> if < 0, it triggers pipeline_printout_graph() rather than pipeline_run() # serialize -> False (default): detach the process and work in parallel # -> True: do NOT detach processes and work in series (mainly for debugging) # Clean dir with locks at restart if os.path.isdir(es_constants.processing_tasks_dir): shutil.rmtree(es_constants.processing_tasks_dir) logger.info("Entering routine %s" % 'loop_processing') functions.check_output_dir(es_constants.processing_tasks_dir) # Read sleep time (used by each processing chain) sleep_time=es_constants.processing_sleep_time_sec while True: logger.debug("Entering infinite loop") # Get all active processing chains from the database. active_processing_chains = querydb.get_active_processing_chains() # Manage dry_run if dry_run: pipeline_run_level = 0 pipeline_printout_level = 3 else: pipeline_run_level = 3 pipeline_printout_level = 0 logger.debug("Pipeline run level: %i" % pipeline_run_level) logger.debug("Pipeline printout level: %i" % pipeline_printout_level) for chain in active_processing_chains: derivation_method = chain.derivation_method # name of the method in the module algorithm = chain.algorithm # name of the .py module mapset = chain.output_mapsetcode process_id = chain.process_id do_processing_singleproduct = False if test_one_product: if process_id != test_one_product: do_processing_singleproduct = True if do_processing_singleproduct: continue # Get input products input_products = querydb.get_processing_chain_products(chain.process_id,type='input') product_code = input_products[0].productcode sub_product_code = input_products[0].subproductcode version = input_products[0].version native_mapset=input_products[0].mapsetcode logger.info("Algorithm %s applied to [%s]/[%s]" % (str(algorithm), str(product_code),str(sub_product_code))) # Get product metadata for output products (from first input) input_product_info = querydb.get_product_out_info(productcode=product_code, subproductcode=sub_product_code, version=version) # Define a standard logfile associated to the processing chain processing_unique_id='ID='+str(process_id)+'_PROD='+product_code+'_METHOD='+derivation_method+'_ALGO='+algorithm logfile='apps.processing.'+processing_unique_id # Case of a 'std_' processing (i.e. ruffus with 1 input) -> get all info from 1st INPUT and manage dates if re.search('^std_.*',algorithm): logger.debug("Processing Chain is standard type") # Define dates interval from input product start_date = input_products[0].start_date end_date = input_products[0].end_date # Manage the dates list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version, start_date=start_date, end_date=end_date) # Prepare arguments args = {'pipeline_run_level':pipeline_run_level, \ 'pipeline_printout_level':pipeline_printout_level,\ 'starting_sprod': sub_product_code, \ 'prod': product_code, \ 'mapset':mapset,\ 'starting_dates': list_dates,\ 'version':version, 'logfile':logfile} # 'native_mapset':native_mapset} logger.debug('RL:{pipeline_run_level}; PL:{pipeline_printout_level},prod:{prod}, sprod:{starting_sprod},mapset:{mapset},\ dates:{starting_dates},version:{version}'.format(**args)) # Define an id from a combination of fields processing_unique_lock=es_constants.processing_tasks_dir+processing_unique_id+'.lock' # Check the processing chain is not locked if not os.path.isfile(processing_unique_lock): # # Perform sanity check on the output files # processing_base_directory = es_constants.es2globals['processing_dir']+\ # os.path.sep+product_code+\ # os.path.sep+version+\ # os.path.sep+mapset+os.path.sep+'derived' # # proc_functions.clean_corrupted_files(processing_base_directory, dry_run=True) open(processing_unique_lock,'a').close() logger.debug("Unique lock created: % s" % processing_unique_id) # Define the module name and function() module_name = 'processing_'+algorithm function_name = 'processing_'+derivation_method # Enter the module and walk until to the name of the function() to be executed proc_dir = __import__("apps.processing") try: proc_pck = getattr(proc_dir, "processing") except: logger.error("Error in loading module apps.processing.processing") return try: proc_mod = getattr(proc_pck, module_name) except: logger.error("Error in loading module [%s]" % module_name) return try: proc_func= getattr(proc_mod, function_name) except: logger.error("Error in loading algoritm [%s] for module [%s]" % (function_name,module_name)) return # Check serialize option if serialize==False: # Call to the processing pipeline logger.debug("Launching the pipeline") #proc_lists = proc_func(**args) results_queue = Queue() p = Process(target=proc_func, args=(results_queue,), kwargs=args) #p.daemon = True logger.debug("Before starting the process .. %i", p.is_alive()) p.start() logger.debug("After start .. %i", p.is_alive()) #proc_lists=results_queue.get() p.join() logger.debug("After join .. %i", p.is_alive()) # Sleep time to be read from processing time.sleep(float(sleep_time)) logger.debug("Execution finished - remove lock") try: os.remove(processing_unique_lock) except: logger.warning("Lock not removed: %s" % processing_unique_lock) # Do NOT detach process (work in series) else: logger.info("Work in series - do not detach process") results_queue = Queue() proc_lists = proc_func(results_queue, **args) os.remove(processing_unique_lock) time.sleep(float(sleep_time)) else: logger.debug("Lock already exist: %s" % processing_unique_id) # Case of no 'std' (e.g. merge processing - or more than 1 input) -> get output products and pass everything to function else: output_products = querydb.get_processing_chain_products(chain.process_id,type='output') # Prepare arguments args = {'pipeline_run_level':pipeline_run_level, 'pipeline_printout_level':pipeline_printout_level, 'input_products': input_products, 'output_product': output_products, 'logfile': logfile} # Define an id from a combination of fields processing_unique_id='ID='+str(process_id)+'_METHOD='+derivation_method+'_ALGO='+algorithm+'.lock' processing_unique_lock=es_constants.processing_tasks_dir+processing_unique_id if not os.path.isfile(processing_unique_lock): logger.debug("Launching processing for ID: %s" % processing_unique_id) open(processing_unique_lock,'a').close() # Define the module name and function() module_name = 'processing_'+algorithm function_name = 'processing_'+derivation_method # Enter the module and walk until to the name of the function() to be executed proc_dir = __import__("apps.processing") proc_pck = getattr(proc_dir, "processing") proc_mod = getattr(proc_pck, module_name) proc_func= getattr(proc_mod, function_name) if re.search('.*merge.*',algorithm): logger.debug("Processing Chain is merge type") # Do NOT detach process (work in series) proc_lists = proc_func(**args) time.sleep(float(sleep_time)) logger.info("Waking-up now, and removing the .lock") os.remove(processing_unique_lock) else: logger.info("Processing Chain is more-inputs type (e.g. modis-pp)") # We have to 'detach' the process for avoiding ruffus exception 'error_duplicate_task_name' results_queue = Queue() p = Process(target=proc_func, args=(results_queue,), kwargs=args) p.start() p.join() # Sleep time to be read from processing time.sleep(float(sleep_time)) logger.debug("Execution finished - remove lock") try: os.remove(processing_unique_lock) except: logger.warning("Lock not removed: %s" % processing_unique_lock) else: logger.debug("Processing already running for ID: %s " % processing_unique_id) logger.info("End of the loop ... wait a while") time.sleep(1)
def syncGeoserver(): # # Copy some 'relevant' datasets to GeoServer # Selection of datasets is done on the basis of the product.geoserver table # # Get list of all 'relevant' subproducts (see 2. above) list_active_geoserver = esTools.get_activated_geoserver() # Loop over existing sub_products for geoserver_sprod in list_active_geoserver: # Extract local variable: my_prod = geoserver_sprod.productcode my_subprod = geoserver_sprod.subproductcode my_version = geoserver_sprod.version start_date = geoserver_sprod.startdate end_date = geoserver_sprod.enddate logger.info("Working on Product/Subproduct/Version: {0}/{1}/{2}".format(my_prod, my_subprod, my_version)) # Manage dates from bigint to datetime if functions.is_date_yyyymmdd(str(start_date), silent=True): date_start = datetime.datetime.strptime(str(start_date), '%Y%m%d').date() else: date_start = None if functions.is_date_yyyymmdd(str(end_date), silent=True): date_end = datetime.datetime.strptime(str(end_date), '%Y%m%d').date() else: date_end = None # Get additional products info product_info = querydb.get_product_out_info(productcode=my_prod, subproductcode=my_subprod, version=my_version) # my_mapset = subprod.mapsetcode my_type = product_info[0].product_type my_category = product_info[0].category_id # Create a Product object (to get mapsets) my_product = products.Product(my_prod, version=my_version) my_mapsets = my_product.mapsets if len(my_mapsets) > 1: logger.info('More than 1 mapset exists. Take the first') if len(my_mapsets) == 0: logger.warning('No any mapset exists. Skip.') continue my_mapset = my_mapsets[0] # Create a Dataset object (to get file list) # If data_start is not set (e.g. for 10davg prod) create w/o dates if date_start: my_dataset = datasets.Dataset(my_prod, my_subprod, my_mapset, version=my_version, from_date=date_start, to_date=date_end) if my_dataset._frequency.dateformat == 'MMDD': logger.warning('Product of type MMDD: date specification not supported. Skip.') continue file_list = my_dataset.get_filenames_range() else: my_dataset = datasets.Dataset(my_prod, my_subprod, my_mapset, version=my_version) file_list = my_dataset.get_filenames() # Check that there is at least 1 file if len(file_list) > 0: # Check the Workspace exists, or create it my_workspace = esTools.setWorkspaceName(my_category, my_prod, my_subprod, my_version, my_mapset, nameType=geoserverREST.geoserverWorkspaceName) if not geoserverREST.isWorkspace(my_workspace): geoserverREST.createWorkspace(my_workspace) # Loop over files and upload for my_file in file_list: my_date = functions.get_date_from_path_full(my_file) # if subprod in list_active_subprods: logger.debug("Working on Product/Subproduct/Version/Mapset/Date: {0}/{1}/{2}/{3}/{4}".format( my_prod, my_subprod, my_version, my_mapset, my_date)) # Upload the file and register esTools.uploadAndRegisterRaster(my_category, my_prod, my_subprod, my_version, my_mapset, my_date, my_type, local_data_dir)
def create_pipeline(starting_sprod): # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) starting_files = input_dir + "*" + in_prod_ident # Read input product nodata in_prod_info = querydb.get_product_out_info(productcode=prod, subproductcode=starting_sprod, version=version) product_info = functions.list_to_element(in_prod_info) in_nodata = product_info.nodata print in_nodata # --------------------------------------------------------------------- # Monthly Average for a given month output_sprod = "monavg" out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" + out_prod_ident ] @active_if(activate_monavg_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def modis_kd_monavg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw", "input_nodata": in_nodata} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Climatology for all years new_input_subprod = 'monavg' new_in_prod_ident = functions.set_path_filename_no_date( prod, new_input_subprod, mapset, version, ext) new_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, new_input_subprod, 'Derived', version, mapset) new_starting_files = new_input_dir + "*" + new_in_prod_ident output_sprod = "monclim" out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MM>[0-9]{2})" + new_in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MM[0]}" + out_prod_ident ] @active_if(activate_monclim_comput) @collate(new_starting_files, formatter(formatter_in), formatter_out) def modis_kd_monclim(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw", "input_nodata": in_nodata} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Anomaly for a given monthly output_sprod = "monanom" out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MM>[0-9]{2})" + new_in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MM[0]}" + out_prod_ident ancillary_sprod = "monclim" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MM[0]}" + ancillary_sprod_ident @active_if(activate_monanom_comput) @transform(new_starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def modis_kd_mondiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_oper_subtraction(**args)
def test_get_productlayer(self): #import StringIO import mapscript # getparams = web.input() #getparams = {'STYLES': u'', 'productcode': u'vgt-ndvi', 'legendid': u'7', 'SERVICE': u'WMS', 'subproductcode': u'ndv', 'CRS': u'EPSG:4326', 'FORMAT': u'image/png', 'REQUEST': u'GetMap', 'HEIGHT': u'1010', 'WIDTH': u'998', 'VERSION': u'1.3.0', 'productversion': u'sv2-pv2.1', 'date': u'20130221', 'mapsetcode': u'SPOTV-Africa-1km', 'TRANSPARENT': u'false', 'BBOX': u'-16.17,16.17,-15.47,16.87'} getparams = {'STYLES': u'', 'productcode': u'vgt-fapar', 'legendid': u'99', 'SERVICE': u'WMS', 'subproductcode': u'fapar', 'CRS': u'EPSG:4326', 'FORMAT': u'image/png', 'REQUEST': u'GetMap', 'HEIGHT': u'1010', 'WIDTH': u'998', 'VERSION': u'1.3.0', 'productversion': u'V1.4', 'date': u'20130221', 'mapsetcode': u'SPOTV-Africa-1km', 'TRANSPARENT': u'false', 'BBOX': u'15.46875, -17.578125, 16.171875, -16.875'} #getparams = {'STYLES': u'', 'productcode': u'vgt-ndvi', 'legendid': u'7', 'SERVICE': u'WMS', 'subproductcode': u'ndv', 'CRS': u'EPSG:4326', 'FORMAT': u'image/png', 'REQUEST': u'GetMap', 'HEIGHT': u'1091', 'WIDTH': u'998', 'VERSION': u'1.3.0', 'productversion': u'sv2-pv2.1', 'date': u'20130221', 'mapsetcode': u'SPOTV-Africa-1km', 'TRANSPARENT': u'false', 'BBOX': u'-25.70957541665903,9.276714800828785,-13.723491432284028,20.021343707078785'} # getparams = [ # SERVICE:'WMS', # VERSION='1.3.0', # REQUEST='GetMap', # FORMAT='image/png', # TRANSPARENT='false', # productcode='vgt-ndvi', # productversion='sv2-pv2.1', # subproductcode='ndv', # mapsetcode='SPOTV-Africa-1km', # legendid='7', # date='20130221', # CRS='EPSG:4326'', # STYLES='' # WIDTH='998', # HEIGHT='1010', # BBOX='-26,-35,60,38' # ] p = Product(product_code=getparams['productcode'], version=getparams['productversion']) dataset = p.get_dataset(mapset=getparams['mapsetcode'], sub_product_code=getparams['subproductcode']) # print dataset.fullpath if hasattr(getparams, "date"): filedate = getparams['date'] else: dataset.get_filenames() lastdate = dataset.get_dates()[-1].strftime("%Y%m%d") filedate = lastdate if dataset.no_year(): filedate=dataset.strip_year(filedate) # lastdate = lastdate.replace("-", "") # mydate=lastdate.strftime("%Y%m%d") filename = functions.set_path_filename(filedate, getparams['productcode'], getparams['subproductcode'], getparams['mapsetcode'], getparams['productversion'], '.tif') productfile = dataset.fullpath + filename # print productfile #web.header('Content-type', 'image/png') #web.header('Content-transfer-encoding', 'binary') #buf = StringIO.StringIO() #mapscript.msIO_installStdoutToBuffer() #map = mapserver.getmap() ##map.save to a file fname.png ##web.header('Content-Disposition', 'attachment; filename="fname.png"') #contents = buf.getvalue() #return contents #logger.debug("MapServer: Installing stdout to buffer.") mapscript.msIO_installStdoutToBuffer() # projlib = "/usr/share/proj/" projlib = es_constants.proj4_lib_dir # errorfile = es_constants.apps_dir+"/analysis/ms_tmp/ms_errors.log" errorfile = es_constants.log_dir+"/mapserver_error.log" # imagepath = es_constants.apps_dir+"/analysis/ms_tmp/" owsrequest = mapscript.OWSRequest() inputparams = getparams # web.input() for k, v in inputparams.iteritems(): print k + ':' + v owsrequest.setParameter(k.upper(), v) # print owsrequest filenamenoextention = functions.set_path_filename(filedate, getparams['productcode'], getparams['subproductcode'], getparams['mapsetcode'], getparams['productversion'], '') owsrequest.setParameter("LAYERS", filenamenoextention) productmap = mapscript.mapObj(es_constants.template_mapfile) productmap.setConfigOption("PROJ_LIB", projlib) productmap.setConfigOption("MS_ERRORFILE", errorfile) productmap.maxsize = 4096 outputformat_png = mapscript.outputFormatObj('GD/PNG', 'png') outputformat_png.setOption("INTERLACE", "OFF") productmap.appendOutputFormat(outputformat_png) #outputformat_gd = mapscript.outputFormatObj('GD/GIF', 'gif') #productmap.appendOutputFormat(outputformat_gd) productmap.selectOutputFormat('png') productmap.debug = mapscript.MS_TRUE productmap.status = mapscript.MS_ON productmap.units = mapscript.MS_DD coords = map(float, inputparams['BBOX'].split(",")) print coords llx = coords[0] lly = coords[1] urx = coords[2] ury = coords[3] print llx, lly, urx, ury productmap.setExtent(llx, lly, urx, ury) # -26, -35, 60, 38 # productmap.setExtent(-26, -35, 60, 38) # epsg must be in lowercase because in unix/linux systems the proj filenames are lowercase! # epsg = "+init=epsg:3857" # epsg = "+init=" + inputparams.CRS.lower() # CRS = "EPSG:4326" epsg = inputparams['CRS'].lower() # CRS = "EPSG:4326" productmap.setProjection(epsg) w = int(inputparams['WIDTH']) h = int(inputparams['HEIGHT']) productmap.setSize(w, h) # General web service information productmap.setMetaData("WMS_TITLE", "Product description") productmap.setMetaData("WMS_SRS", inputparams['CRS'].lower()) # productmap.setMetaData("WMS_SRS", "epsg:3857") productmap.setMetaData("WMS_ABSTRACT", "A Web Map Service returning eStation2 raster layers.") productmap.setMetaData("WMS_ENABLE_REQUEST", "*") # necessary!! product_info = querydb.get_product_out_info(productcode=inputparams['productcode'], subproductcode=inputparams['subproductcode'], version=inputparams['productversion']) if hasattr(product_info, "__len__") and product_info.__len__() > 0: for row in product_info: scale_factor = row.scale_factor scale_offset = row.scale_offset nodata = row.nodata legend_info = querydb.get_legend_info(legendid=inputparams['legendid']) if hasattr(legend_info, "__len__") and legend_info.__len__() > 0: for row in legend_info: minstep = int((row.min_value - scale_offset)/scale_factor) #int(row.min_value*scale_factor+scale_offset) maxstep = int((row.max_value - scale_offset)/scale_factor) # int(row.max_value*scale_factor+scale_offset) realminstep = int((row.realminstep - scale_offset)/scale_factor) realmaxstep = int((row.realmaxstep - scale_offset)/scale_factor) minstepwidth = int((row.minstepwidth - scale_offset)/scale_factor) maxstepwidth = int((row.maxstepwidth - scale_offset)/scale_factor) totwidth = int((row.totwidth - scale_offset)/scale_factor) totsteps = row.totsteps # maxstep = 255 processing_scale = 'SCALE='+str(minstep)+','+str(maxstep) # min(legend_step.from_step) max(legend_step.to_step) example: 'SCALE=-7000,10000' minbuckets = 256 maxbuckets = 10000 num_buckets = maxbuckets if minstepwidth > 0: num_buckets = round(totwidth / minstepwidth, 0) if num_buckets < minbuckets: num_buckets = minbuckets elif num_buckets > maxbuckets: num_buckets = 0 # num_buckets = 10000 if num_buckets > 0: processing_buckets = 'SCALE_BUCKETS='+str(num_buckets) # nodata = -32768 # get this value from the table products.product processing_novalue = '' if nodata is not None and minstep <= nodata < maxstep: processing_novalue = 'NODATA='+str(nodata) layer = mapscript.layerObj(productmap) layer.name = filenamenoextention layer.type = mapscript.MS_LAYER_RASTER layer.status = mapscript.MS_ON # MS_DEFAULT layer.data = productfile # layer.setProjection("+init=epsg:4326") layer.setProjection("epsg:4326") layer.dump = mapscript.MS_TRUE # scale & buckets if num_buckets > 0: layer.setProcessing(processing_scale) layer.setProcessing(processing_buckets) if processing_novalue != '': layer.setProcessing(processing_novalue) legend_steps = querydb.get_legend_steps(legendid=inputparams['legendid']) if hasattr(legend_steps, "__len__") and legend_steps.__len__() > 0: stepcount = 0 for step in legend_steps: stepcount += 1 min_step = int((step.from_step - scale_offset)/scale_factor) max_step = int((step.to_step - scale_offset)/scale_factor) colors = map(int, (color.strip() for color in step.color_rgb.split(" ") if color.strip())) if stepcount == legend_steps.__len__(): # For the last step use <= max_step expression_string = '([pixel] >= '+str(min_step)+' and [pixel] <= '+str(max_step)+')' else: expression_string = '([pixel] >= '+str(min_step)+' and [pixel] < '+str(max_step)+')' # define class object and style layerclass = mapscript.classObj(layer) layerclass.name = layer.name+'_'+str(stepcount) layerclass.setExpression(expression_string) style = mapscript.styleObj(layerclass) style.color.setRGB(colors[0], colors[1], colors[2]) result_map_file = es_constants.apps_dir+'/analysis/MAP_result.map' # if os.path.isfile(result_map_file): # os.remove(result_map_file) productmap.save(result_map_file) image = productmap.draw() image.save(es_constants.apps_dir+'/analysis/'+filenamenoextention+'.png') contents = productmap.OWSDispatch(owsrequest) content_type = mapscript.msIO_stripStdoutBufferContentType() content = mapscript.msIO_getStdoutBufferBytes() #web.header = "Content-Type","%s; charset=utf-8"%content_type # web.header('Content-type', 'image/png') #web.header('Content-transfer-encoding', 'binary') # return content self.assertEquals(True, True)
def loop_processing(dry_run=False, serialize=False): # Driver of the process service # Reads configuration from the database # Creates the pipelines for the active processing # Calls the active pipelines with the relevant argument # Arguments: dry_run -> if > 0, it triggers pipeline_printout() rather than pipeline_run() # -> if < 0, it triggers pipeline_printout_graph() rather than pipeline_run() # serialize -> False (default): detach the process and work in parallel # -> True: do NOT detach processes and work in series (mainly for debugging) # Clean dir with locks if os.path.isdir(es_constants.processing_tasks_dir): shutil.rmtree(es_constants.processing_tasks_dir) logger.info("Entering routine %s" % 'loop_processing') echo_query = False functions.check_output_dir(es_constants.processing_tasks_dir) while True: logger.debug("Entering infinite loop") # Get all active processing chains from the database. active_processing_chains = querydb.get_active_processing_chains() # Manage dry_run if dry_run: pipeline_run_level = 0 pipeline_printout_level = 3 else: pipeline_run_level = 3 pipeline_printout_level = 0 for chain in active_processing_chains: logger.debug("Processing Chain N.:%s" % str(chain.process_id)) derivation_method = chain.derivation_method # name of the method in the module algorithm = chain.algorithm # name of the .py module mapset = chain.output_mapsetcode process_id = chain.process_id # Get input products input_products = querydb.get_processing_chain_products(chain.process_id,type='input') product_code = input_products[0].productcode sub_product_code = input_products[0].subproductcode version = input_products[0].version # Get product metadata for output products (from first input) input_product_info = querydb.get_product_out_info(productcode=product_code, subproductcode=sub_product_code, version=version) # Case of a 'std_' (i.e. ruffus with 1 input) processing -> get all info from 1st INPUT and manage dates if re.search('^std_.*',algorithm): start_date = input_products[0].start_date end_date = input_products[0].end_date # Manage the dates list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version, start_date=start_date, end_date=end_date) # Prepare arguments args = {'pipeline_run_level':pipeline_run_level, \ 'pipeline_printout_level':pipeline_printout_level,\ 'starting_sprod': sub_product_code, \ 'prod': product_code, \ 'mapset':mapset,\ 'starting_dates': list_dates,\ 'version':version} # Case of no 'std' (e.g. merge processing) -> get output products and pass everything to function else: output_products = querydb.get_processing_chain_products(chain.process_id,type='output') # Prepare arguments args = {'pipeline_run_level':pipeline_run_level, \ 'pipeline_printout_level':pipeline_printout_level,\ 'input_products': input_products, \ 'output_product': output_products} # Define an id from a combination of fields processing_unique_id='ID='+str(process_id)+'_METHOD='+derivation_method+'_ALGO='+algorithm+'.lock' processing_unique_lock=es_constants.processing_tasks_dir+processing_unique_id if not os.path.isfile(processing_unique_lock): logger.debug("Launching processing for ID: %s" % processing_unique_id) open(processing_unique_lock,'a').close() # Define the module name and function() module_name = 'processing_'+algorithm function_name = 'processing_'+derivation_method # Enter the module and walk until to the name of the function() to be executed proc_dir = __import__("apps.processing") proc_pck = getattr(proc_dir, "processing") proc_mod = getattr(proc_pck, module_name) proc_func= getattr(proc_mod, function_name) # Fork and call the std_precip 'generic' processing if serialize==False: pid = os.fork() if pid == 0: # Here I'm the child process -> call to the processing pipeline proc_lists = proc_func(**args) # Upsert database upsert_database(process_id, product_code, version, mapset, proc_lists, input_product_info) # Simulate longer processing (TEMP) logger.info("Going to sleep for a while - to be removed") time.sleep(2) logger.info("Waking-up now, and removing the .lock") os.remove(processing_unique_lock) sys.exit(0) else: # Here I'm the parent process -> just go on .. pass # Do NOT detach process (work in series) else: proc_lists = proc_func(**args) logger.info("Going to sleep for a while - to be removed") # Upsert database upsert_database(process_id, product_code, version, mapset, proc_lists, input_product_info) time.sleep(2) logger.info("Waking-up now, and removing the .lock") os.remove(processing_unique_lock) else: logger.debug("Processing already running for ID: %s " % processing_unique_id) # logger.info("End of the loop ... wait a while") time.sleep(5)
def create_pipeline(starting_sprod): # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, version, ext) input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) starting_files = input_dir+"*"+in_prod_ident # Read input product nodata in_prod_info = querydb.get_product_out_info(productcode=prod, subproductcode=starting_sprod, version=version) product_info = functions.list_to_element(in_prod_info) in_nodata = product_info.nodata print in_nodata # --------------------------------------------------------------------- # Monthly Average for a given month output_sprod="monavg" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) formatter_in="(?P<YYYYMM>[0-9]{6})[0-9]{2}"+in_prod_ident formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYYMM[0]}"+out_prod_ident] @active_if(activate_monavg_comput) @collate(starting_files, formatter(formatter_in),formatter_out) def modis_chla_monavg(input_file, output_file): output_file = functions.list_to_element(output_file) out_filename=os.path.basename(output_file) str_date=out_filename[0:6] expected_ndays=functions.get_number_days_month(str_date) functions.check_output_dir(os.path.dirname(output_file)) current_ndays=len(input_file) if expected_ndays != current_ndays: logger.info('Missing days for period: %s. Skip' % str_date) else: args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw", "input_nodata": in_nodata} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Climatology for all years new_input_subprod='monavg' new_in_prod_ident= functions.set_path_filename_no_date(prod, new_input_subprod, mapset, version, ext) new_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, new_input_subprod, 'Derived', version, mapset) new_starting_files = new_input_dir+"*"+new_in_prod_ident output_sprod="monclim" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) formatter_in="[0-9]{4}(?P<MM>[0-9]{2})"+new_in_prod_ident formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{MM[0]}"+out_prod_ident] @active_if(activate_monclim_comput) @collate(new_starting_files, formatter(formatter_in),formatter_out) def modis_chla_monclim(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw", "input_nodata": in_nodata} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Anomaly for a given monthly output_sprod="monanom" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset,version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in="(?P<YYYY>[0-9]{4})(?P<MM>[0-9]{2})"+new_in_prod_ident formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MM[0]}"+out_prod_ident ancillary_sprod = "monclim" ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset,version,ext) ancillary_subdir = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived',version, mapset) ancillary_input="{subpath[0][5]}"+os.path.sep+ancillary_subdir+"{MM[0]}"+ancillary_sprod_ident @active_if(activate_monanom_comput) @transform(new_starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def modis_chla_mondiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_oper_subtraction(**args)
def processing_merge(pipeline_run_level=0, pipeline_printout_level=0, input_products='', output_product='', mapset=''): es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # Do some checks on the integrity of the inputs # Manage output_product data out_product_code = output_product[0].productcode out_sub_product_code = output_product[0].subproductcode out_version = output_product[0].version out_mapset = output_product[0].mapsetcode out_subdir = functions.set_path_sub_directory(out_product_code, out_sub_product_code, 'Ingest', out_version, out_mapset) out_prod_ident = functions.set_path_filename_no_date( out_product_code, out_sub_product_code, out_mapset, out_version, ext) out_dir = es2_data_dir + out_subdir # Check the output product directory functions.check_output_dir(out_dir) # Loop over the input products: for input in input_products: # Extract info from input product product_code = input.productcode sub_product_code = input.subproductcode version = input.version start_date = input.start_date end_date = input.end_date product_info = querydb.get_product_out_info( productcode=product_code, subproductcode=sub_product_code, version=version) prod_type = product_info[0].product_type in_subdir = functions.set_path_sub_directory(product_code, sub_product_code, prod_type, version, out_mapset) in_prod_ident = functions.set_path_filename_no_date( out_product_code, out_sub_product_code, out_mapset, version, ext) # Create the list of dates -> returns empty if start==end==None list_dates = proc_functions.get_list_dates_for_dataset( product_code, sub_product_code, version, start_date=start_date, end_date=end_date) # If list_dates == None, look at all existing files if list_dates is None: print 'To be Done !!!' # Otherwise, build list of files from list of dates else: for my_date in list_dates: in_file_path = es2_data_dir + in_subdir + my_date + in_prod_ident out_file_path = out_dir + my_date + out_prod_ident # Create the link print in_file_path print out_file_path functions.create_sym_link(in_file_path, out_file_path, force=False)
def create_pipeline(starting_sprod): # --------------------------------------------------------------------- # Define input files: Chla is the 'driver', sst,kd and par 'ancillary inputs' chla_prod = "modis-chla" chla_prod_ident = functions.set_path_filename_no_date( chla_prod, starting_sprod, mapset, version, ext) chla_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(chla_prod, starting_sprod, 'Derived', version, mapset) #chla_files = chla_input_dir+"2014*"+chla_prod_ident # --------------------------------------------------------------------- sst_prod = "modis-sst" sst_prod_ident = functions.set_path_filename_no_date( sst_prod, starting_sprod, mapset, version, ext) sst_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(sst_prod, starting_sprod, 'Derived', version, mapset) # --------------------------------------------------------------------- kd_prod = "modis-kd490" kd_prod_ident = functions.set_path_filename_no_date( kd_prod, starting_sprod, mapset, version, ext) kd_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(kd_prod, starting_sprod, 'Derived', version, mapset) kd_files = kd_input_dir + "*" + kd_prod_ident # --------------------------------------------------------------------- par_prod = "modis-par" par_prod_ident = functions.set_path_filename_no_date( par_prod, starting_sprod, mapset, version, ext) par_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(par_prod, starting_sprod, 'Derived', version, mapset) # Read input product nodata chla_prod_info = querydb.get_product_out_info(productcode=chla_prod, subproductcode="chla-day", version=version) chla_product_info = functions.list_to_element(chla_prod_info) chla_nodata = chla_product_info.nodata sst_prod_info = querydb.get_product_out_info(productcode=sst_prod, subproductcode="sst-day", version=version) sst_product_info = functions.list_to_element(sst_prod_info) sst_nodata = sst_product_info.nodata kd_prod_info = querydb.get_product_out_info(productcode=kd_prod, subproductcode="kd490-day", version=version) kd_product_info = functions.list_to_element(kd_prod_info) kd_nodata = kd_product_info.nodata par_prod_info = querydb.get_product_out_info(productcode=par_prod, subproductcode="par-day", version=version) par_product_info = functions.list_to_element(par_prod_info) par_nodata = par_product_info.nodata # --------------------------------------------------------------------- # Monthly Primary Productivity from chl-a, sst, kd490 and par monthly data output_sprod = "1mon" out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files monthly composites formatter_kd = "(?P<YYYYMM>[0-9]{6})" + kd_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" + out_prod_ident ancillary_sst = sst_input_dir + "{YYYYMM[0]}" + sst_prod_ident ancillary_par = par_input_dir + "{YYYYMM[0]}" + par_prod_ident ancillary_chla = chla_input_dir + "{YYYYMM[0]}" + chla_prod_ident @active_if(activate_pp_1mon_comput) @transform(kd_files, formatter(formatter_kd), add_inputs(ancillary_chla, ancillary_par, ancillary_sst), formatter_out) def modis_pp_1mon(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"chla_file": input_file[1], "sst_file": input_file[3], "kd_file": input_file[0],"par_file": input_file[2], \ "sst_nodata": sst_nodata, "kd_nodata": kd_nodata,\ "par_nodata": par_nodata, "output_file": output_file, "output_nodata": -9999, "output_format": 'GTIFF',\ "output_type": None, "options": "compress=lzw"} raster_image_math.do_compute_primary_production(**args)
def create_pipeline(starting_sprod): # --------------------------------------------------------------------- # Define input files: Chla is the 'driver', sst,kd and par 'ancillary inputs' chla_prod="modis-chla" chla_prod_ident = functions.set_path_filename_no_date(chla_prod, starting_sprod, mapset, version, ext) chla_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(chla_prod, starting_sprod, 'Derived', version, mapset) #chla_files = chla_input_dir+"2014*"+chla_prod_ident # --------------------------------------------------------------------- sst_prod="modis-sst" sst_prod_ident = functions.set_path_filename_no_date(sst_prod, starting_sprod, mapset, version, ext) sst_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(sst_prod, starting_sprod, 'Derived', version, mapset) # --------------------------------------------------------------------- kd_prod="modis-kd490" kd_prod_ident = functions.set_path_filename_no_date(kd_prod, starting_sprod, mapset, version, ext) kd_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(kd_prod, starting_sprod, 'Derived', version, mapset) kd_files = kd_input_dir+"*"+kd_prod_ident # --------------------------------------------------------------------- par_prod="modis-par" par_prod_ident = functions.set_path_filename_no_date(par_prod, starting_sprod, mapset, version, ext) par_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(par_prod, starting_sprod, 'Derived', version, mapset) # Read input product nodata chla_prod_info = querydb.get_product_out_info(productcode=chla_prod, subproductcode="chla-day", version=version) chla_product_info = functions.list_to_element(chla_prod_info) chla_nodata = chla_product_info.nodata sst_prod_info = querydb.get_product_out_info(productcode=sst_prod, subproductcode="sst-day", version=version) sst_product_info = functions.list_to_element(sst_prod_info) sst_nodata = sst_product_info.nodata kd_prod_info = querydb.get_product_out_info(productcode=kd_prod, subproductcode="kd490-day", version=version) kd_product_info = functions.list_to_element(kd_prod_info) kd_nodata = kd_product_info.nodata par_prod_info = querydb.get_product_out_info(productcode=par_prod, subproductcode="par-day", version=version) par_product_info = functions.list_to_element(par_prod_info) par_nodata = par_product_info.nodata # --------------------------------------------------------------------- # Monthly Primary Productivity from chl-a, sst, kd490 and par monthly data output_sprod="1mon" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset,version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # Starting files monthly composites formatter_kd="(?P<YYYYMM>[0-9]{6})"+kd_prod_ident formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYYMM[0]}"+out_prod_ident ancillary_sst = sst_input_dir+"{YYYYMM[0]}"+sst_prod_ident ancillary_par = par_input_dir+"{YYYYMM[0]}"+par_prod_ident ancillary_chla = chla_input_dir+"{YYYYMM[0]}"+chla_prod_ident @active_if(activate_pp_1mon_comput) @transform(kd_files, formatter(formatter_kd), add_inputs(ancillary_chla, ancillary_par, ancillary_sst), formatter_out) def modis_pp_1mon(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"chla_file": input_file[1], "sst_file": input_file[3], "kd_file": input_file[0],"par_file": input_file[2], \ "sst_nodata": sst_nodata, "kd_nodata": kd_nodata,\ "par_nodata": par_nodata, "output_file": output_file, "output_nodata": -9999, "output_format": 'GTIFF',\ "output_type": None, "options": "compress=lzw"} raster_image_math.do_compute_primary_production(**args)
def create_pipeline(input_product, logfile=None, nrt_products=True, update_stats=False): proc_lists = None if proc_lists is None: proc_lists = functions.ProcLists() spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_modis_pp') # Set DEFAULTS: all off activate_pp_comput = 0 # PP from Chla, SST, Kd490 and PAR activate_stats_comput = 0 # Stats computation (inter-annual clim, min, max) activate_anomalies_comput = 0 # Anomalies computation (not yet done!!) # switch wrt groups - according to options if nrt_products: activate_pp_comput = 1 # PP from Chla, SST, Kd490 and PAR if update_stats: activate_stats_comput = 1 activate_anomalies_comput = 1 activate_pp_prod_comput = 1 activate_pp_stats_clim_comput = 0 activate_pp_stats_min_comput = 0 activate_pp_stats_max_comput = 0 # --------------------------------------------------------------------- # Create lists # my_date='20160601' my_date = '' es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Parse the arguments and extract the 4 input variables # if len(input_products) <> 4: spec_logger.error('Modis PP computation requires 4 inputs. Exit') return 1 found_chla = False found_sst = False found_par = False found_kd490 = False for input_product in input_products: if re.search('.*chla.*', input_product.productcode): found_chla = True chla_prod = input_product.productcode chla_version = input_product.version chla_sprod = input_product.subproductcode chla_mapset = input_product.mapsetcode chla_prod_ident = functions.set_path_filename_no_date( chla_prod, chla_sprod, chla_mapset, chla_version, ext) chla_input_dir = es2_data_dir + \ functions.set_path_sub_directory(chla_prod, chla_sprod, 'Derived', chla_version, chla_mapset) if re.search('.*sst.*', input_product.productcode): found_sst = True sst_prod = input_product.productcode sst_version = input_product.version sst_sprod = input_product.subproductcode sst_mapset = input_product.mapsetcode sst_prod_ident = functions.set_path_filename_no_date( sst_prod, sst_sprod, sst_mapset, sst_version, ext) sst_input_dir = es2_data_dir + \ functions.set_path_sub_directory(sst_prod, sst_sprod, 'Derived', sst_version, sst_mapset) if re.search('.*kd490.*', input_product.productcode): found_kd490 = True kd490_prod = input_product.productcode kd490_version = input_product.version kd490_sprod = input_product.subproductcode kd490_mapset = input_product.mapsetcode kd490_prod_ident = functions.set_path_filename_no_date( kd490_prod, kd490_sprod, kd490_mapset, kd490_version, ext) kd490_input_dir = es2_data_dir + \ functions.set_path_sub_directory(kd490_prod, kd490_sprod, 'Derived', kd490_version, kd490_mapset) if re.search('.*par.*', input_product.productcode): found_par = True par_prod = input_product.productcode par_version = input_product.version par_sprod = input_product.subproductcode par_mapset = input_product.mapsetcode par_prod_ident = functions.set_path_filename_no_date( par_prod, par_sprod, par_mapset, par_version, ext) par_input_dir = es2_data_dir + \ functions.set_path_sub_directory(par_prod, par_sprod, 'Derived', par_version, par_mapset) # Check consistency of inputs if not (found_chla) or not (found_kd490) or not (found_par) or not ( found_sst): spec_logger.error('At least one of 4 expected inputs missing. Exit') return 1 if chla_mapset <> sst_mapset or chla_mapset <> kd490_mapset or chla_mapset <> par_mapset: spec_logger.error('All 4 input mapset must be equals. Exit') return 1 # Read input product nodata chla_prod_info = querydb.get_product_out_info(productcode=chla_prod, subproductcode=chla_sprod, version=chla_version) chla_product_info = functions.list_to_element(chla_prod_info) chla_nodata = chla_product_info.nodata sst_prod_info = querydb.get_product_out_info(productcode=sst_prod, subproductcode=sst_sprod, version=sst_version) sst_product_info = functions.list_to_element(sst_prod_info) sst_nodata = sst_product_info.nodata kd_prod_info = querydb.get_product_out_info(productcode=kd490_prod, subproductcode=kd490_sprod, version=kd490_version) kd_product_info = functions.list_to_element(kd_prod_info) kd_nodata = kd_product_info.nodata par_prod_info = querydb.get_product_out_info(productcode=par_prod, subproductcode=par_sprod, version=par_version) par_product_info = functions.list_to_element(par_prod_info) par_nodata = par_product_info.nodata # Define input files # if starting_dates is not None: # starting_files = [] # for my_date in starting_dates: # starting_files.append(input_dir+my_date+in_prod_ident) # else: # starting_files=input_dir+"*"+in_prod_ident # Define outputs output_nodata = -32767 # Get the first output -> PP subproduct generated (8daysavg or monavg) output_prod = output_product[0].productcode output_sprod = output_product[0].subproductcode output_version = output_product[0].version output_mapset = output_product[0].mapsetcode out_prod_ident = functions.set_path_filename_no_date( output_prod, output_sprod, output_mapset, output_version, ext) output_subdir = functions.set_path_sub_directory(output_prod, output_sprod, 'Derived', output_version, output_mapset) # Fixes ES2-36 def generate_input_files_pp(): # Take kd490 as starting point kd_files = kd490_input_dir + my_date + "*" + kd490_prod_ident input_files = sorted(glob.glob(kd_files)) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) ancillary_chla = chla_input_dir + mydate + chla_prod_ident ancillary_par = par_input_dir + mydate + par_prod_ident ancillary_sst = sst_input_dir + mydate + sst_prod_ident do_comp = True if not os.path.isfile(ancillary_chla): do_comp = False if not os.path.isfile(ancillary_par): do_comp = False if not os.path.isfile(ancillary_sst): do_comp = False if do_comp is True: output_file = es_constants.processing_dir + output_subdir + os.path.sep + mydate + out_prod_ident my_inputs = (input_file, ancillary_chla, ancillary_par, ancillary_sst) yield (my_inputs, output_file) @active_if(activate_pp_comput) @files(generate_input_files_pp) def modis_pp_comp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"chla_file": input_file[1], "sst_file": input_file[3], "kd_file": input_file[0], "par_file": input_file[2], \ "sst_nodata": sst_nodata, "kd_nodata": kd_nodata, "chla_nodata": chla_nodata, \ "par_nodata": par_nodata, "output_file": output_file, "output_nodata": output_nodata, "output_format": 'GTIFF', \ "output_type": None, "options": "compress=lzw"} raster_image_math.do_compute_primary_production(**args)