def create_pipeline(starting_sprod): # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, version, ext) input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) starting_files = input_dir+"*"+in_prod_ident # Read input product nodata in_prod_info = querydb.get_product_out_info(productcode=prod, subproductcode=starting_sprod, version=version) product_info = functions.list_to_element(in_prod_info) in_nodata = product_info.nodata print in_nodata # --------------------------------------------------------------------- # Monthly Average for a given month output_sprod="monavg" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) formatter_in="(?P<YYYYMM>[0-9]{6})[0-9]{2}"+in_prod_ident formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYYMM[0]}"+out_prod_ident] @active_if(activate_monavg_comput) @collate(starting_files, formatter(formatter_in),formatter_out) def modis_par_monavg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw", "input_nodata": in_nodata} raster_image_math.do_avg_image(**args)
def processing_merge(pipeline_run_level=0, pipeline_printout_level=0, input_products='', output_product='', mapset=''): # Dummy return arguments proc_lists = functions.ProcLists() list_subprods = proc_lists.list_subprods list_subprod_groups = proc_lists.list_subprod_groups es2_data_dir = es_constants.processing_dir+os.path.sep # Do some checks on the integrity of the inputs # Manage output_product data out_product_code = output_product[0].productcode out_sub_product_code = output_product[0].subproductcode out_version = output_product[0].version out_mapset = output_product[0].mapsetcode out_subdir = functions.set_path_sub_directory(out_product_code, out_sub_product_code,'Ingest', out_version, out_mapset) out_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, out_version, ext) out_dir = es2_data_dir + out_subdir # Check the output product directory functions.check_output_dir(out_dir) # Loop over the input products: for input in input_products: # Extract info from input product product_code = input.productcode sub_product_code = input.subproductcode version = input.version start_date = input.start_date end_date = input.end_date product_info = querydb.get_product_out_info_connect(productcode=product_code, subproductcode=sub_product_code, version=version) prod_type = product_info[0].product_type in_subdir = functions.set_path_sub_directory(product_code, sub_product_code, prod_type, version, out_mapset) in_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, version, ext) # Create the list of dates -> returns empty if start==end==None list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version, start_date=start_date, end_date=end_date) # If list_dates == None, look at all existing files if list_dates is None: print 'To be Done !!!' # Otherwise, build list of files from list of dates else: for my_date in list_dates: in_file_path = es2_data_dir + in_subdir + my_date + in_prod_ident out_file_path = out_dir+my_date+out_prod_ident # Create the link functions.create_sym_link(in_file_path, out_file_path, force=False) return list_subprods, list_subprod_groups
def hide_some_files(self, productcode, version, subproductcode, type, mapset, dates): # Move to /tmp/eStation2/test/ some products - for generating a product request source_dir = es_constants.es2globals['processing_dir'] + \ functions.set_path_sub_directory(productcode, subproductcode, type, version, mapset) target_dir = es_constants.es2globals['base_tmp_dir'] for date in dates: filename = date + functions.set_path_filename_no_date(productcode,subproductcode, mapset, version,'.tif') fullpath=source_dir+os.path.sep+filename fullpath_dest=target_dir+os.path.sep+filename try: os.rename(source_dir+filename, fullpath_dest) except: logger.error('Error in moving file %s' % fullpath)
def __init__(self, product_code, sub_product_code, mapset, version=None, from_date=None, to_date=None): kwargs = {'productcode': product_code, 'subproductcode': sub_product_code.lower() if sub_product_code else None} if not version is None: kwargs['version'] = version if from_date: self._check_date(from_date) if to_date: self._check_date(to_date) self._db_product = querydb.get_product_out_info(**kwargs) if self._db_product is None or self._db_product == []: raise NoProductFound(kwargs) if isinstance(self._db_product, list): self._db_product = self._db_product[0] self.mapset = mapset self._path = functions.set_path_sub_directory(product_code, sub_product_code, self._db_product.product_type, version, mapset) self.fullpath = os.path.join(es_constants.es2globals['processing_dir'], self._path) #self._db_frequency = querydb.db.frequency.get(self._db_product.frequency_id) #self._db_frequency = querydb.get_frequency(self._db_product.frequency_id) #if self._db_frequency is None: # raise NoFrequencyFound(self._db_product) #self._frequency = Frequency(value=self._db_frequency.frequency, # unit=self._db_frequency.time_unit, # frequency_type=self._db_frequency.frequency_type, # dateformat=self._db_product.date_format) self._frequency = Dataset.get_frequency(self._db_product.frequency_id, self._db_product.date_format) if not from_date and self.no_year(): from_date = datetime.date(datetime.date.today().year, 1, 1) if not to_date and self.no_year(): to_date = datetime.date(datetime.date.today().year, 12, 1) self.from_date = from_date or None self.to_date = to_date or self._frequency.today()
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, update_stats=False, nrt_products=True): # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all off activate_10dstats_comput = 0 # 10d stats activate_10danomalies_comput = 0 # 10d anomalies # switch wrt groups - according to options if nrt_products: activate_10dcount_comput = 1 # 10d anomalies activate_10danomalies_comput = 1 # monthly anomalies if update_stats: activate_10dstats_comput = 1 # 10d stats # switch wrt single products: not to be changed !! activate_10dcount_comput = 1 # 10d count activate_10dcountavg_comput = 1 activate_10dcountmin_comput = 1 activate_10dcountmax_comput = 1 activate_10ddiff_comput = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) #logger.debug('Base data directory is: %s' % es2_data_dir) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # 10dcount output_sprod_group = proc_lists.proc_add_subprod_group("10dcount") output_sprod = proc_lists.proc_add_subprod( "10dcount", "10dcount", final=False, descriptive_name='10d Count', description='Fire Count for dekad', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident_10dcount = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_10dcount = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_10dcount(): # Look for all input files in input_dir, and sort them input_files = glob.glob(starting_files) dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_10dcount + os.path.sep + my_dekad_str + out_prod_ident_10dcount yield (file_list, output_file) @active_if(activate_10dcount_comput) @files(generate_parameters_10dcount) def std_precip_10dcount(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_cumulate(**args) # --------------------------------------------------------------------- # 10dcountavg starting_files_10dcount = es_constants.processing_dir + output_subdir_10dcount + "*" + out_prod_ident_10dcount output_sprod_group = proc_lists.proc_add_subprod_group("10dstats") output_sprod = proc_lists.proc_add_subprod( "10dcountavg", "10dstats", final=False, descriptive_name='10d Fire Average', description='Average fire for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dcountavg_comput) @collate(starting_files_10dcount, formatter(formatter_in), formatter_out) def std_precip_10davg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Float32', 'input_nodata': -32767 } raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # 10dcountmin output_sprod = proc_lists.proc_add_subprod( "10dcountmin", "10dstats", final=False, descriptive_name='10d Fire Minimum', description='Minimum Fire for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dcountmin_comput) @collate(starting_files_10dcount, formatter(formatter_in), formatter_out) def std_precip_10dmin(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # 10dcountmax output_sprod = proc_lists.proc_add_subprod( "10dcountmax", "10dstats", final=False, descriptive_name='10d Maximum', description='Maximum rainfall for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dcountmax_comput) @collate(starting_files_10dcount, formatter(formatter_in), formatter_out) def std_precip_10dmax(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args) # # --------------------------------------------------------------------- # # 10dDiff # output_sprod_group=proc_lists.proc_add_subprod_group("10danomalies") # output_sprod=proc_lists.proc_add_subprod("10dcountdiff", "10danomalies", final=False, # descriptive_name='10d Absolute Difference', # description='10d Absolute Difference vs. LTA', # frequency_id='e1dekad', # date_format='YYYYMMDD', # masked=False, # timeseries_role='10d', # active_default=True) # out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) # output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # # # Starting files + avg # formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident # formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident # # ancillary_sprod = "10davg" # ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, version, ext) # ancillary_subdir = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived',version, mapset) # ancillary_input="{subpath[0][5]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident # # @follows(std_precip_10davg) # @active_if(activate_10danomalies_comput, activate_10ddiff_comput) # @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) # def std_precip_10ddiff(input_file, output_file): # # output_file = functions.list_to_element(output_file) # functions.check_output_dir(os.path.dirname(output_file)) # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} # raster_image_math.do_oper_subtraction(**args) return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, nrt_products=True, logger=None): # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all ON activate_3davg_comput = 1 activate_1monavg_comput = 1 sds_meta = metadata.SdsMetadata() es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files (chl) in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir + functions.set_path_sub_directory( prod, starting_sprod, 'Ingest', version, mapset) starting_files = input_dir + "*" + in_prod_ident # ---------------------------------------------------------------------------------------------------------------- # 1 . 3davg # 3 Day average of the 1 day Chl, re-projected on target mapset output_sprod = proc_lists.proc_add_subprod("3dayavg", prod, final=False, descriptive_name='3day Avg', description='3 day Average', frequency_id='e1day', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) prod_ident_3davg = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) subdir_3davg = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Use a specific function, to skip the current day def generate_parameters_3davg(): # Look for all input files in input_dir, and sort them if starting_dates is not None: input_files = [] for my_date in starting_dates: input_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident input_files = glob.glob(starting_files) logger.debug("starting_files %s" % input_files) day_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] if mydate_yyyymmdd not in day_list: day_list.append(mydate_yyyymmdd) day_list = sorted(day_list) # Compute the 'julian' dakad for the current day today = datetime.today() yesterday = today - timedelta(1) today_str = today.strftime('%Y%m%d') yesterday_str = yesterday.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for myday in day_list: # Exclude the current day and yesterday #if myday != today_str or myday != yesterday_str: #some_list = ['abc-123', 'def-456', 'ghi-789', 'abc-456'] input_file = [s for s in input_files if myday in s] file_list = [] #for input_file in input_files: #for i, input_file in enumerate(input_files, 1): basename = os.path.basename(input_file[0]) # Date is in format YYYYMMDD mydate_yyyymmdd = functions.get_date_from_path_filename(basename) #if mydate_yyyymmdd != day_list[i]: yyyy = int(mydate_yyyymmdd[0:4]) mm = int(mydate_yyyymmdd[4:6]) dd = int(mydate_yyyymmdd[6:8]) day2 = datetime(yyyy, mm, dd) + timedelta(1) day2_filepath = input_dir + day2.strftime('%Y%m%d') + in_prod_ident if not functions.is_file_exists_in_path(day2_filepath): continue day3 = datetime(yyyy, mm, dd) + timedelta(2) day3_filepath = input_dir + day3.strftime('%Y%m%d') + in_prod_ident if not functions.is_file_exists_in_path(day3_filepath): continue file_list.append(input_file[0]) file_list.append(day2_filepath) file_list.append(day3_filepath) output_file = es_constants.processing_dir + subdir_3davg + os.path.sep + mydate_yyyymmdd + prod_ident_3davg file_list = sorted(file_list) # Check here the number of missing files (for optimization) if len(file_list) == 3: yield (file_list, output_file) @active_if(activate_3davg_comput) @files(generate_parameters_3davg) def compute_3dayavg(input_file, output_file): no_data = int(sds_meta.get_nodata_value(input_file[0])) output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "", "input_nodata": no_data, "output_nodata": no_data } raster_image_math.do_avg_image(**args) return proc_lists
activate_1monstd = 1 # for Group 3.d (monthly_anomalies) -> TB Done activate_1monsndvi = 1 activate_1monandvi = 1 activate_1monvci = 1 activate_1monicn = 1 # --------------------------------------------------------------------- # Define input files (NDV) starting_sprod = 'ndv' in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, ext) logger.debug('Base data directory is: %s' % es_constants.processing_dir) input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) logger.debug('Input data directory is: %s' % input_dir) starting_files = input_dir+"*"+in_prod_ident logger.debug('Starting files wild card is: %s' % starting_files) # --------------------------------------------------------------------- # 1.a 10Day non-filtered Stats # --------------------------------------------------------------------- # --------------------------------------------------------------------- # NDV avg x dekad (i.e. avg_dekad) output_sprod = "10davg" prod_ident_10davg = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext) subdir_10davg = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset)
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None): my_date = None # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # 1. 10d prod stats activate_monavg_comput = 1 activate_monclim_comput = 1 activate_monanom_comput = 0 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir + functions.set_path_sub_directory( prod, starting_sprod, 'Ingest', version, mapset) if my_date is not None: starting_files = input_dir + my_date + "*" + in_prod_ident else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # Monthly Average for a given month output_sprod_group = proc_lists.proc_add_subprod_group("monstats") output_sprod = proc_lists.proc_add_subprod( "monavg", "monstats", final=False, descriptive_name='Monthly average', description='Chla Monthly average', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" + out_prod_ident ] @active_if(activate_monavg_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def modis_chla_monavg(input_file, output_file): output_file = functions.list_to_element(output_file) out_filename = os.path.basename(output_file) str_date = out_filename[0:6] expected_ndays = functions.get_number_days_month(str_date) functions.check_output_dir(os.path.dirname(output_file)) current_ndays = len(input_file) # if expected_ndays != current_ndays: # logger.info('Missing days for period: %s. Skip' % str_date) # else: args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Climatology for all years output_sprod = proc_lists.proc_add_subprod( "monclim", "monstats", final=False, descriptive_name='Monthly climatology', description='Chla Monthly climatology', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) new_input_subprod = 'monavg' new_in_prod_ident = functions.set_path_filename_no_date( prod, new_input_subprod, mapset, version, ext) new_input_dir = es2_data_dir + functions.set_path_sub_directory( prod, new_input_subprod, 'Derived', version, mapset) new_starting_files = new_input_dir + "*" + new_in_prod_ident out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MM>[0-9]{2})" + new_in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MM[0]}" + out_prod_ident ] @active_if(activate_monclim_comput) @collate(new_starting_files, formatter(formatter_in), formatter_out) def modis_chla_monclim(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw"} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Anomaly for a given monthly output_sprod = proc_lists.proc_add_subprod( "monanom", "monstats", final=False, descriptive_name='Monthly anomaly', description='Chla Monthly anomaly', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MM>[0-9]{2})" + new_in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MM[0]}" + out_prod_ident ancillary_sprod = "monclim" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MM[0]}" + ancillary_sprod_ident @active_if(activate_monanom_comput) @transform(new_starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def modis_chla_mondiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_oper_subtraction(**args) return proc_lists
def create_pipeline(starting_sprod): # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, ext) input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) starting_files = input_dir+"*"+in_prod_ident # --------------------------------------------------------------------- # Average output_sprod="10davg" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out=["{subpath[0][4]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident] @active_if(activate_fewsnet_rfe_comput, activate_10d_comput, activate_10davg_comput) @collate(starting_files, formatter(formatter_in),formatter_out) def fewsnet_10davg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Minimum output_sprod="10dmin" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out=["{subpath[0][4]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident] @active_if(activate_fewsnet_rfe_comput, activate_10d_comput, activate_10dmin_comput) @collate(starting_files, formatter(formatter_in),formatter_out) def fewsnet_10dmin(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # Maximum output_sprod="10dmax" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out=["{subpath[0][4]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident] @active_if(activate_fewsnet_rfe_comput, activate_10d_comput, activate_10dmax_comput) @collate(starting_files, formatter(formatter_in),formatter_out) def fewsnet_10dmax(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # 10dDiff output_sprod="10ddiff" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out="{subpath[0][4]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident ancillary_sprod = "10davg" ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, ext) ancillary_subdir = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived',version, mapset) ancillary_input="{subpath[0][4]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident @follows(fewsnet_10davg) @active_if(activate_fewsnet_rfe_comput, activate_10d_comput, activate_10ddiff_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def fewsnet_10ddiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_oper_subtraction(**args) # --------------------------------------------------------------------- # 10dAvgPerc output_sprod="10davgperc" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out="{subpath[0][4]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident ancillary_sprod = "10davg" ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, ext) ancillary_subdir = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived',version, mapset) ancillary_input="{subpath[0][4]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident @follows(fewsnet_10ddiff) @active_if(activate_fewsnet_rfe_comput, activate_10d_comput, activate_10davgperc_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def fewsnet_10davgperc(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_oper_division_perc(**args) # --------------------------------------------------------------------- # 10dperc output_sprod="10dperc" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out="{subpath[0][4]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident ancillary_sprod = "10davg" ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, ext) ancillary_subdir = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input="{subpath[0][4]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident @follows(fewsnet_10davg) @active_if(activate_fewsnet_rfe_comput, activate_10d_comput, activate_10dperc_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def fewsnet_10dperc(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_compute_perc_diff_vs_avg(**args) # --------------------------------------------------------------------- # 10dnp output_sprod="10dnp" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # Starting files + min + max formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out="{subpath[0][4]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident ancillary_sprod_1 = "10dmin" ancillary_sprod_ident_1 = functions.set_path_filename_no_date(prod, ancillary_sprod_1, mapset, ext) ancillary_subdir_1 = functions.set_path_sub_directory(prod, ancillary_sprod_1, 'Derived',version, mapset) ancillary_input_1="{subpath[0][4]}"+os.path.sep+ancillary_subdir_1+"{MMDD[0]}"+ancillary_sprod_ident_1 ancillary_sprod_2 = "10dmax" ancillary_sprod_ident_2 = functions.set_path_filename_no_date(prod, ancillary_sprod_2, mapset, ext) ancillary_subdir_2 = functions.set_path_sub_directory(prod, ancillary_sprod_2, 'Derived',version, mapset) ancillary_input_2="{subpath[0][4]}"+os.path.sep+ancillary_subdir_2+"{MMDD[0]}"+ancillary_sprod_ident_2 @follows(fewsnet_10dmin, fewsnet_10dmax) @active_if(activate_fewsnet_rfe_comput, activate_10d_comput, activate_10dnp_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input_1, ancillary_input_2), formatter_out) def fewsnet_10dnp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file[0], "min_file": input_file[1],"max_file": input_file[2], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_make_vci(**args) # --------------------------------------------------------------------- # 1moncum output_sprod="1moncum" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # inputs: files from same months formatter_in="(?P<YYYYMM>[0-9]{6})(?P<DD>[0-9]{2})"+in_prod_ident formatter_out="{subpath[0][4]}"+os.path.sep+output_subdir+"{YYYYMM[0]}"+'01'+out_prod_ident # @follows(fewsnet_10davg) @active_if(activate_fewsnet_rfe_comput, activate_1month_comput, activate_1moncum_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def fewsnet_1moncum(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file,"output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_cumulate(**args) # --------------------------------------------------------------------- # Monthly Average new_input_subprod='1moncum' in_prod_ident= functions.set_path_filename_no_date(prod, new_input_subprod, mapset, ext) output_sprod='1monavg' out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out=["{subpath[0][4]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident] @active_if(activate_fewsnet_rfe_comput, activate_1month_comput, activate_1monavg_comput) @collate(fewsnet_1moncum, formatter(formatter_in),formatter_out) def fewsnet_1monavg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Minimum output_sprod="1monmin" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out=["{subpath[0][4]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident] @active_if(activate_fewsnet_rfe_comput, activate_1month_comput, activate_1monmin_comput) @collate(fewsnet_1moncum, formatter(formatter_in),formatter_out) def fewsnet_1monmin(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # Monthly Maximum output_sprod="1monmax" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) reg_ex_in="[0-9]{4}([0-9]{4})"+in_prod_ident formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out=["{subpath[0][4]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident] @active_if(activate_fewsnet_rfe_comput, activate_1month_comput, activate_1monmax_comput) @collate(fewsnet_1moncum, formatter(formatter_in),formatter_out) def fewsnet_1monmax(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # 1monDiff output_sprod="1mondiff" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # inputs # Starting files + avg formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out="{subpath[0][4]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident ancillary_sprod = "1monavg" ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, ext) ancillary_subdir = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input="{subpath[0][4]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident @follows(fewsnet_1monavg) @active_if(activate_fewsnet_rfe_comput, activate_1month_comput, activate_1mondiff_comput) @transform(fewsnet_1moncum, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def fewsnet_1mondiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_oper_subtraction(**args) # --------------------------------------------------------------------- # 1monperc output_sprod="1monperc" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # inputs # Starting files + avg formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out="{subpath[0][4]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident ancillary_sprod = "1monavg" ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, ext) ancillary_subdir = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived',version, mapset) ancillary_input="{subpath[0][4]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident @follows(fewsnet_1monavg) @active_if(activate_fewsnet_rfe_comput, activate_1month_comput, activate_1monperc_comput) @transform(fewsnet_1moncum, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def fewsnet_1monperc(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_compute_perc_diff_vs_avg(**args) # --------------------------------------------------------------------- # 1monnp output_sprod="1monnp" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # Starting files + min + max formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out="{subpath[0][4]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident ancillary_sprod_1 = "1monmin" ancillary_sprod_ident_1 = functions.set_path_filename_no_date(prod, ancillary_sprod_1, mapset, ext) ancillary_subdir_1 = functions.set_path_sub_directory(prod, ancillary_sprod_1, 'Derived',version, mapset) ancillary_input_1="{subpath[0][4]}"+os.path.sep+ancillary_subdir_1+"{MMDD[0]}"+ancillary_sprod_ident_1 ancillary_sprod_2 = "1monmax" ancillary_sprod_ident_2 = functions.set_path_filename_no_date(prod, ancillary_sprod_2, mapset, ext) ancillary_subdir_2 = functions.set_path_sub_directory(prod, ancillary_sprod_2, 'Derived',version, mapset) ancillary_input_2="{subpath[0][4]}"+os.path.sep+ancillary_subdir_2+"{MMDD[0]}"+ancillary_sprod_ident_2 @follows(fewsnet_1monmin, fewsnet_1monmax) @active_if(activate_fewsnet_rfe_comput, activate_1month_comput, activate_1monnp_comput) @transform(fewsnet_1moncum, formatter(formatter_in), add_inputs(ancillary_input_1, ancillary_input_2), formatter_out) def fewsnet_1monnp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file[0], "min_file": input_file[1],"max_file": input_file[2], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_make_vci(**args)
def create_pipeline(input_products, output_product, logfile=None, nrt_products=True, update_stats=False): proc_lists = None if proc_lists is None: proc_lists = functions.ProcLists() spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_modis_pp') # Set DEFAULTS: all off activate_pp_comput = 0 # PP from Chla, SST, Kd490 and PAR activate_stats_comput = 0 # Stats computation (inter-annual clim, min, max) activate_anomalies_comput = 0 # Anomalies computation (not yet done!!) # switch wrt groups - according to options if nrt_products: activate_pp_comput = 1 # PP from Chla, SST, Kd490 and PAR if update_stats: activate_stats_comput = 1 activate_anomalies_comput = 1 activate_pp_prod_comput = 1 activate_pp_stats_clim_comput = 1 activate_pp_stats_min_comput = 1 activate_pp_stats_max_comput = 1 # --------------------------------------------------------------------- # Create lists # my_date='20160601' my_date = '' es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Parse the arguments and extract the 4 input variables # if len(input_products) != 4: spec_logger.error('Modis PP computation requires 4 inputs. Exit') return 1 found_chla = False found_sst = False found_par = False found_kd490 = False for input_product in input_products: if re.search('.*chla.*', input_product.productcode): found_chla = True chla_prod = input_product.productcode chla_version = input_product.version chla_sprod = input_product.subproductcode chla_mapset = input_product.mapsetcode chla_prod_ident = functions.set_path_filename_no_date( chla_prod, chla_sprod, chla_mapset, chla_version, ext) chla_input_dir = es2_data_dir + \ functions.set_path_sub_directory(chla_prod, chla_sprod, 'Derived', chla_version, chla_mapset) if re.search('.*sst.*', input_product.productcode): found_sst = True sst_prod = input_product.productcode sst_version = input_product.version sst_sprod = input_product.subproductcode sst_mapset = input_product.mapsetcode sst_prod_ident = functions.set_path_filename_no_date( sst_prod, sst_sprod, sst_mapset, sst_version, ext) sst_input_dir = es2_data_dir + \ functions.set_path_sub_directory(sst_prod, sst_sprod, 'Derived', sst_version, sst_mapset) if re.search('.*kd490.*', input_product.productcode): found_kd490 = True kd490_prod = input_product.productcode kd490_version = input_product.version kd490_sprod = input_product.subproductcode kd490_mapset = input_product.mapsetcode kd490_prod_ident = functions.set_path_filename_no_date( kd490_prod, kd490_sprod, kd490_mapset, kd490_version, ext) kd490_input_dir = es2_data_dir + \ functions.set_path_sub_directory(kd490_prod, kd490_sprod, 'Derived', kd490_version, kd490_mapset) if re.search('.*par.*', input_product.productcode): found_par = True par_prod = input_product.productcode par_version = input_product.version par_sprod = input_product.subproductcode par_mapset = input_product.mapsetcode par_prod_ident = functions.set_path_filename_no_date( par_prod, par_sprod, par_mapset, par_version, ext) par_input_dir = es2_data_dir + \ functions.set_path_sub_directory(par_prod, par_sprod, 'Derived', par_version, par_mapset) # Check consistency of inputs if not (found_chla) or not (found_kd490) or not (found_par) or not ( found_sst): spec_logger.error('At least one of 4 expected inputs missing. Exit') return 1 if chla_mapset != sst_mapset or chla_mapset != kd490_mapset or chla_mapset != par_mapset: spec_logger.error('All 4 input mapset must be equals. Exit') return 1 # Read input product nodata chla_prod_info = querydb.get_product_out_info(productcode=chla_prod, subproductcode=chla_sprod, version=chla_version) chla_product_info = functions.list_to_element(chla_prod_info) chla_nodata = chla_product_info.nodata chla_frequency = chla_product_info.frequency_id sst_prod_info = querydb.get_product_out_info(productcode=sst_prod, subproductcode=sst_sprod, version=sst_version) sst_product_info = functions.list_to_element(sst_prod_info) sst_nodata = sst_product_info.nodata kd_prod_info = querydb.get_product_out_info(productcode=kd490_prod, subproductcode=kd490_sprod, version=kd490_version) kd_product_info = functions.list_to_element(kd_prod_info) kd_nodata = kd_product_info.nodata par_prod_info = querydb.get_product_out_info(productcode=par_prod, subproductcode=par_sprod, version=par_version) par_product_info = functions.list_to_element(par_prod_info) par_nodata = par_product_info.nodata # Define input files # if starting_dates is not None: # starting_files = [] # for my_date in starting_dates: # starting_files.append(input_dir+my_date+in_prod_ident) # else: # starting_files=input_dir+"*"+in_prod_ident # Define outputs output_nodata = -32767 old = False # NOTE: the prod/mapset/version are taken from the FIRST OUTPUT passed # subprod defined according to the frequency output_prod = output_product[0].productcode output_version = output_product[0].version output_mapset = output_product[0].mapsetcode if old: # Get the first output -> PP subproduct generated (8daysavg or monavg) output_sprod = output_product[0].subproductcode else: # Define the outputs according to the frequency (method in 'functions' to be created !!) if chla_frequency == 'e1month': frequency_string = 'monthly' output_sprod = 'monavg' output_sprod_clim = '1monclim' output_sprod_min = '1monmin' output_sprod_max = '1monmax' sub_product_group = '1monstat' elif chla_frequency == 'e1modis8day': frequency_string = '8 days' output_sprod = '8daysavg' activate_pp_stats_clim_comput = 1 activate_pp_stats_min_comput = 1 activate_pp_stats_max_comput = 1 sub_product_group = '8daysstat' output_sprod_clim = '8daysclim' output_sprod_min = '8daysmin' output_sprod_max = '8daysmax' else: spec_logger.error('Frequency not recognized: %s. Exit!', chla_frequency) return out_prod_ident = functions.set_path_filename_no_date( output_prod, output_sprod, output_mapset, output_version, ext) output_subdir = functions.set_path_sub_directory(output_prod, output_sprod, 'Derived', output_version, output_mapset) # Fixes ES2-36 def generate_input_files_pp(): # Take kd490 as starting point kd_files = kd490_input_dir + my_date + "*" + kd490_prod_ident input_files = sorted(glob.glob(kd_files)) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) ancillary_chla = chla_input_dir + mydate + chla_prod_ident ancillary_par = par_input_dir + mydate + par_prod_ident ancillary_sst = sst_input_dir + mydate + sst_prod_ident do_comp = True if not os.path.isfile(ancillary_chla): do_comp = False if not os.path.isfile(ancillary_par): do_comp = False if not os.path.isfile(ancillary_sst): do_comp = False if do_comp is True: output_file = es_constants.processing_dir + output_subdir + os.path.sep + mydate + out_prod_ident my_inputs = (input_file, ancillary_chla, ancillary_par, ancillary_sst) yield (my_inputs, output_file) @active_if(activate_pp_comput) @files(generate_input_files_pp) def modis_pp_comp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"chla_file": input_file[1], "sst_file": input_file[3], "kd_file": input_file[0], "par_file": input_file[2], \ "sst_nodata": sst_nodata, "kd_nodata": kd_nodata, "chla_nodata": chla_nodata, \ "par_nodata": par_nodata, "output_file": output_file, "output_nodata": output_nodata, "output_format": 'GTIFF', \ "output_type": None, "options": "compress=lzw"} raster_image_math.do_compute_primary_production(**args) # --------------------------------------------------------------------- # Climatology (inter-annual average) prod = output_prod mapset = output_mapset new_input_subprod = output_sprod version = output_version in_prod_ident = functions.set_path_filename_no_date( prod, new_input_subprod, mapset, version, ext) in_prod_subdir = functions.set_path_sub_directory(prod, new_input_subprod, 'Derived', version, mapset) starting_files = es2_data_dir + in_prod_subdir + "*" + in_prod_ident output_sprod_group = proc_lists.proc_add_subprod_group(sub_product_group) output_sprod = proc_lists.proc_add_subprod( output_sprod_clim, sub_product_group, final=False, descriptive_name='Inter-annual Climatology at ' + frequency_string + ' frequency', description='Inter-annual Climatology at ' + frequency_string + ' frequency', frequency_id=chla_frequency, date_format='MMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_clim = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_clim = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir_clim + "{MMDD[0]}" + out_prod_ident_clim ] # Fixes ES2-304 def generate_input_files_pp_stats(): # MMDD_nonleap_list = ['0101', '0109', '0117', '0125', '0202', '0210', '0218', '0226', '0306', '0314', '0314', # '0330', '0407', '0415', '0423', '0501', '0509', '0517', '0525', '0602', '0610', '0618', # '0626', '0704', '0712', '0720', '0728', '0805', '0813', '0821', '0829', '0906', '0914', # '0922', '0930', '1008', '1016', '1024', '1101', '1109', '1117', '1125', '1203', '1211', # '1219', '1227'] MMDD_nonleap_dict = { '0101': '0101', '0109': '0109', '0117': '0117', '0125': '0125', '0202': '0202', '0210': '0210', '0218': '0218', '0226': '0226', '0306': '0305', '0314': '0313', '0322': '0321', '0330': '0329', '0407': '0406', '0415': '0414', '0423': '0422', '0501': '0430', '0509': '0508', '0517': '0516', '0525': '0524', '0602': '0601', '0610': '0609', '0618': '0617', '0626': '0625', '0704': '0703', '0712': '0711', '0720': '0719', '0728': '0727', '0805': '0804', '0813': '0812', '0821': '0820', '0829': '0828', '0906': '0905', '0914': '0913', '0922': '0921', '0930': '0929', '1008': '1007', '1016': '1015', '1024': '1023', '1101': '1031', '1109': '1108', '1117': '1116', '1125': '1124', '1203': '1202', '1211': '1210', '1219': '1218', '1227': '1226' } # for MMDD_nonleap in MMDD_nonleap_list: for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items(): formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident nonleap_files = sorted(glob.glob(formatter_in_nonleap)) formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident leap_files = sorted(glob.glob(formatter_in_leap)) my_inputs = leap_files + nonleap_files input_files_unique = list(set(my_inputs)) output_file = es_constants.processing_dir + output_subdir_clim + os.path.sep + MMDD_nonleap + out_prod_ident_clim yield (input_files_unique, output_file) if frequency_string != 'monthly': @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_clim_comput) @files(generate_input_files_pp_stats) def std_yearly_clim(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) else: @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_clim_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_yearly_clim(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # # --------------------------------------------------------------------- # # Minimum output_sprod = proc_lists.proc_add_subprod( output_sprod_min, sub_product_group, final=False, descriptive_name='Inter-annual Minimum at ' + frequency_string + ' frequency', description='Inter-annual Minimum at ' + frequency_string + ' frequency', frequency_id=chla_frequency, date_format='MMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_min = functions.set_path_filename_no_date( prod, output_sprod_min, mapset, version, ext) output_subdir_min = functions.set_path_sub_directory( prod, output_sprod_min, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir_min + "{MMDD[0]}" + out_prod_ident_min ] def generate_input_files_pp_stats_min(): # MMDD_nonleap_list = ['0101', '0109', '0117', '0125', '0202', '0210', '0218', '0226', '0306', '0314', '0314', # '0330', '0407', '0415', '0423', '0501', '0509', '0517', '0525', '0602', '0610', '0618', # '0626', '0704', '0712', '0720', '0728', '0805', '0813', '0821', '0829', '0906', '0914', # '0922', '0930', '1008', '1016', '1024', '1101', '1109', '1117', '1125', '1203', '1211', # '1219', '1227'] MMDD_nonleap_dict = { '0101': '0101', '0109': '0109', '0117': '0117', '0125': '0125', '0202': '0202', '0210': '0210', '0218': '0218', '0226': '0226', '0306': '0305', '0314': '0313', '0322': '0321', '0330': '0329', '0407': '0406', '0415': '0414', '0423': '0422', '0501': '0430', '0509': '0508', '0517': '0516', '0525': '0524', '0602': '0601', '0610': '0609', '0618': '0617', '0626': '0625', '0704': '0703', '0712': '0711', '0720': '0719', '0728': '0727', '0805': '0804', '0813': '0812', '0821': '0820', '0829': '0828', '0906': '0905', '0914': '0913', '0922': '0921', '0930': '0929', '1008': '1007', '1016': '1015', '1024': '1023', '1101': '1031', '1109': '1108', '1117': '1116', '1125': '1124', '1203': '1202', '1211': '1210', '1219': '1218', '1227': '1226' } # for MMDD_nonleap in MMDD_nonleap_list: for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items(): formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident nonleap_files = sorted(glob.glob(formatter_in_nonleap)) formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident leap_files = sorted(glob.glob(formatter_in_leap)) my_inputs = leap_files + nonleap_files input_files_unique = list(set(my_inputs)) output_file = es_constants.processing_dir + output_subdir_min + os.path.sep + MMDD_nonleap + out_prod_ident_min yield (input_files_unique, output_file) if frequency_string != 'monthly': @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_min_comput) @files(generate_input_files_pp_stats_min) def std_yearly_min(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) else: @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_min_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_yearly_min(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) # # --------------------------------------------------------------------- # # Monthly Maximum output_sprod = proc_lists.proc_add_subprod( output_sprod_max, sub_product_group, final=False, descriptive_name='Inter-annual Maximum at ' + frequency_string + ' frequency', description='Inter-annual Maximum at ' + frequency_string + ' frequency', frequency_id=chla_frequency, date_format='MMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_max = functions.set_path_filename_no_date( prod, output_sprod_max, mapset, version, ext) output_subdir_max = functions.set_path_sub_directory( prod, output_sprod_max, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir_max + "{MMDD[0]}" + out_prod_ident_max ] def generate_input_files_pp_stats_max(): MMDD_nonleap_dict = { '0101': '0101', '0109': '0109', '0117': '0117', '0125': '0125', '0202': '0202', '0210': '0210', '0218': '0218', '0226': '0226', '0306': '0305', '0314': '0313', '0322': '0321', '0330': '0329', '0407': '0406', '0415': '0414', '0423': '0422', '0501': '0430', '0509': '0508', '0517': '0516', '0525': '0524', '0602': '0601', '0610': '0609', '0618': '0617', '0626': '0625', '0704': '0703', '0712': '0711', '0720': '0719', '0728': '0727', '0805': '0804', '0813': '0812', '0821': '0820', '0829': '0828', '0906': '0905', '0914': '0913', '0922': '0921', '0930': '0929', '1008': '1007', '1016': '1015', '1024': '1023', '1101': '1031', '1109': '1108', '1117': '1116', '1125': '1124', '1203': '1202', '1211': '1210', '1219': '1218', '1227': '1226' } # for MMDD_nonleap in MMDD_nonleap_list: for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items(): formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident nonleap_files = sorted(glob.glob(formatter_in_nonleap)) formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident leap_files = sorted(glob.glob(formatter_in_leap)) my_inputs = leap_files + nonleap_files input_files_unique = list(set(my_inputs)) output_file = es_constants.processing_dir + output_subdir_max + os.path.sep + MMDD_nonleap + out_prod_ident_max yield (input_files_unique, output_file) if frequency_string != 'monthly': @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_max_comput) @files(generate_input_files_pp_stats_max) def std_yearly_max(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args) else: @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_max_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_yearly_max(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args)
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None): my_date = None # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() activate_front_detection = 1 activate_shapefile_conversion = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files (SST) in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir + functions.set_path_sub_directory( prod, starting_sprod, 'Ingest', version, mapset) if my_date: starting_files = input_dir + my_date + "*" + in_prod_ident else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # 1. Define and customize parameters # --------------------------------------------------------------------- # Parameters concluded during the Marine Workshop Oct 2018 parameters = { 'histogramWindowStride': 8, # smaller window detects more fronts 'histogramWindowSize': 32, 'minTheta': 0.76, #'minPopProp': 0.25, 'minPopMeanDifference': 25, # Temperature: 0.45 deg (multiply by 100 !!) 'minSinglePopCohesion': 0.60, 'minImageValue': 1, 'minThreshold': 1 } # Default values are from the routine are used if None is passed parameters = { 'histogramWindowStride': 16, 'histogramWindowSize': 32, 'minTheta': 0.76, 'minPopProp': 0.25, 'minPopMeanDifference': 20, # Temperature: 0.45 deg (multiply by 100 !!) 'minSinglePopCohesion': 0.60, 'minImageValue': 1, 'minThreshold': 1 } if prod == 'modis-sst': parameters = { 'histogramWindowStride': 8, # smaller window detects more fronts 'histogramWindowSize': 32, 'minTheta': 0.76, # 'minPopProp': 0.25, 'minPopMeanDifference': 25, # Temperature: 0.45 deg (multiply by 100 !!) 'minSinglePopCohesion': 0.60, 'minImageValue': 1, 'minThreshold': 1 } if prod == 'pml-modis-sst': parameters = { 'histogramWindowSize': 32, 'histogramWindowStride': 16, 'minTheta': 0.76, 'minPopProp': 0.25, 'minPopMeanDifference': 20, 'minSinglePopCohesion': 0.60, 'minImageValue': 1, 'minThreshold': 1 } if prod == 'slstr-sst': parameters = { 'histogramWindowStride': 8, # smaller window detects more fronts 'histogramWindowSize': 32, 'minTheta': 0.76, # 'minPopProp': 0.25, 'minPopMeanDifference': 25, # Temperature: 0.45 deg (multiply by 100 !!) 'minSinglePopCohesion': 0.60, 'minImageValue': 1, 'minThreshold': 1 } # --------------------------------------------------------------------- # SST Fronts (raster) output_sprod_group = proc_lists.proc_add_subprod_group("fronts") output_sprod = proc_lists.proc_add_subprod( "sst-fronts", "fronts", final=False, descriptive_name='SST Fronts', description='Sea Surface Temperature Fronts', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) prod_ident_fronts = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) subdir_fronts = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMMDD>[0-9]{8})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + subdir_fronts + "{YYYYMMDD[0]}" + prod_ident_fronts ] @active_if(activate_front_detection) @transform(starting_files, formatter(formatter_in), formatter_out) def sst_fronts_detection(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress = lzw", "parameters": parameters } raster_image_math.do_detect_sst_fronts(**args) print('Done with raster') # --------------------------------------------------------------------- # SST Fronts (shapefile) input_subprod_fronts = "sst-fronts" in_prod_ident_fronts = functions.set_path_filename_no_date( prod, input_subprod_fronts, mapset, version, ext) input_dir_fronts = es2_data_dir + functions.set_path_sub_directory( prod, input_subprod_fronts, 'Derived', version, mapset) starting_files_fronts = input_dir_fronts + "*" + in_prod_ident_fronts output_sprod = proc_lists.proc_add_subprod( "sst-fronts-shp", "fronts", final=False, descriptive_name='SST Fronts', description='Sea Surface Temperature Fronts (shape)', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) prod_ident_fronts_shp = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, '.shp') subdir_fronts_shp = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMMDD>[0-9]{8})" + in_prod_ident_fronts formatter_out = [ "{subpath[0][5]}" + os.path.sep + subdir_fronts_shp + "{YYYYMMDD[0]}" + prod_ident_fronts_shp ] @active_if(activate_shapefile_conversion) @transform(starting_files_fronts, formatter(formatter_in), formatter_out) def sst_shapefile_conversion(input_file, output_file): output_file = functions.list_to_element(output_file) # Check if the (final) output file already exists - and delete it if os.path.isfile(output_file): files = glob.glob(output_file.replace('.shp', '.*')) for my_file in files: os.remove(my_file) # Create a temporary output file (see also ES2-33) try: tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='_' + os.path.basename(output_file), dir=es_constants.base_tmp_dir) except: raise NameError('Error in creating tmpdir') # Convert to shapefile tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file) functions.check_output_dir(os.path.dirname(tmp_output_file)) command = es_constants.es2globals[ 'gdal_polygonize'] + ' ' + input_file + ' ' + tmp_output_file + ' -nomask -f "ESRI Shapefile"' p = os.system(command) # Check output dir exists (see ES2-285) functions.check_output_dir(os.path.dirname(output_file)) # Move and remove tmpdir files = glob.glob(tmp_output_file.replace('.shp', '.*')) for my_file in files: os.rename( my_file, os.path.dirname(output_file) + os.path.sep + os.path.basename(my_file)) shutil.rmtree(tmpdir) return proc_lists
def do_ratio(self): # --------------------------------------------------------------------- # 10dratio if self.frequency == '10d': output_sprod = self.proc_lists.proc_add_subprod( "10dratio", "10anomalies", final=False, descriptive_name='10d Ratio ' + self.subproduct_code, description='10d Ratio (curr/avg) ' + self.subproduct_code, frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role=self.starting_sprod, #'10d', # display_index=8, active_default=True) out_prod_ident = functions.set_path_filename_no_date( self.prod, output_sprod, self.mapset, self.version, self.ext) output_subdir = functions.set_path_sub_directory( self.prod, output_sprod, 'Derived', self.version, self.mapset) # Starting files + min + max self.formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + self.in_prod_ident self.formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod_1 = "10davg" ancillary_sprod_ident_1 = functions.set_path_filename_no_date( self.prod, ancillary_sprod_1, self.mapset, self.version, self.ext) ancillary_subdir_1 = functions.set_path_sub_directory( self.prod, ancillary_sprod_1, 'Derived', self.version, self.mapset) self.ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1 else: output_sprod = self.proc_lists.proc_add_subprod( "1monratio", "filtered_anomalies", final=False, descriptive_name='Monthly Ratio', description='Monthly Ratio (curr/avg)', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role=self.input_subprod_monthly, # '10d', # display_index=118, active_default=True) prod_ident_ratio_linearx2 = functions.set_path_filename_no_date( self.prod, output_sprod, self.mapset, self.version, self.ext) subdir_ratio_linearx2 = functions.set_path_sub_directory( self.prod, output_sprod, 'Derived', self.version, self.mapset) self.formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + self.in_prod_ident_monthly self.formatter_out = [ "{subpath[0][5]}" + os.path.sep + subdir_ratio_linearx2 + "{YYYY[0]}{MMDD[0]}" + prod_ident_ratio_linearx2 ] ancillary_sprod = "1monavg" ancillary_sprod_ident = functions.set_path_filename_no_date( self.prod, ancillary_sprod, self.mapset, self.version, self.ext) ancillary_subdir = functions.set_path_sub_directory( self.prod, ancillary_sprod, 'Derived', self.version, self.mapset) self.ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident
def create_pipeline(starting_sprod): # --------------------------------------------------------------------- # Define input files: Chla is the 'driver', sst,kd and par 'ancillary inputs' chla_prod = "modis-chla" chla_prod_ident = functions.set_path_filename_no_date( chla_prod, starting_sprod, mapset, version, ext) chla_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(chla_prod, starting_sprod, 'Derived', version, mapset) #chla_files = chla_input_dir+"2014*"+chla_prod_ident # --------------------------------------------------------------------- sst_prod = "modis-sst" sst_prod_ident = functions.set_path_filename_no_date( sst_prod, starting_sprod, mapset, version, ext) sst_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(sst_prod, starting_sprod, 'Derived', version, mapset) # --------------------------------------------------------------------- kd_prod = "modis-kd490" kd_prod_ident = functions.set_path_filename_no_date( kd_prod, starting_sprod, mapset, version, ext) kd_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(kd_prod, starting_sprod, 'Derived', version, mapset) kd_files = kd_input_dir + "*" + kd_prod_ident # --------------------------------------------------------------------- par_prod = "modis-par" par_prod_ident = functions.set_path_filename_no_date( par_prod, starting_sprod, mapset, version, ext) par_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(par_prod, starting_sprod, 'Derived', version, mapset) # Read input product nodata chla_prod_info = querydb.get_product_out_info(productcode=chla_prod, subproductcode="chla-day", version=version) chla_product_info = functions.list_to_element(chla_prod_info) chla_nodata = chla_product_info.nodata sst_prod_info = querydb.get_product_out_info(productcode=sst_prod, subproductcode="sst-day", version=version) sst_product_info = functions.list_to_element(sst_prod_info) sst_nodata = sst_product_info.nodata kd_prod_info = querydb.get_product_out_info(productcode=kd_prod, subproductcode="kd490-day", version=version) kd_product_info = functions.list_to_element(kd_prod_info) kd_nodata = kd_product_info.nodata par_prod_info = querydb.get_product_out_info(productcode=par_prod, subproductcode="par-day", version=version) par_product_info = functions.list_to_element(par_prod_info) par_nodata = par_product_info.nodata # --------------------------------------------------------------------- # Monthly Primary Productivity from chl-a, sst, kd490 and par monthly data output_sprod = "1mon" out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files monthly composites formatter_kd = "(?P<YYYYMM>[0-9]{6})" + kd_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" + out_prod_ident ancillary_sst = sst_input_dir + "{YYYYMM[0]}" + sst_prod_ident ancillary_par = par_input_dir + "{YYYYMM[0]}" + par_prod_ident ancillary_chla = chla_input_dir + "{YYYYMM[0]}" + chla_prod_ident @active_if(activate_pp_1mon_comput) @transform(kd_files, formatter(formatter_kd), add_inputs(ancillary_chla, ancillary_par, ancillary_sst), formatter_out) def modis_pp_1mon(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"chla_file": input_file[1], "sst_file": input_file[3], "kd_file": input_file[0],"par_file": input_file[2], \ "sst_nodata": sst_nodata, "kd_nodata": kd_nodata,\ "par_nodata": par_nodata, "output_file": output_file, "output_nodata": -9999, "output_format": 'GTIFF',\ "output_type": None, "options": "compress=lzw"} raster_image_math.do_compute_primary_production(**args)
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None): # --------------------------------------------------------------------- # Create lists to store definition of the derived products, and their # groups # --------------------------------------------------------------------- if proc_lists is None: proc_lists = functions.ProcLists() # --------------------------------------------------------------------- # Define and assign the flags to control the individual derived products # and the groups. NOT to be changed by the User # --------------------------------------------------------------------- # Set DEFAULTS: all off activate_10dcumul_comput = 1 # 10d cumul activate_1moncum_comput = 1 # 1mon cumul activate_3moncum_comput = 1 # 3mon cumul activate_6moncum_comput = 1 # 6mon cumul activate_1yearcum_comput = 1 # 1year cumul # Conversion scale factor (from 0.01 of daily to 1.0 of all other products) scale_factor_conv = 0.01 # switch wrt groups - according to options es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files from the starting_sprod and starting_dates arguments # --------------------------------------------------------------------- in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) #logger.debug('Base data directory is: %s' % es2_data_dir) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: if os.path.isfile(input_dir + my_date + in_prod_ident): starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident # Look for all input files in input_dir, and sort them if starting_dates is not None: input_files = starting_files else: input_files = glob.glob(starting_files) # --------------------------------------------------------------------- # Derived product: 10dcumul # --------------------------------------------------------------------- output_sprod_group = proc_lists.proc_add_subprod_group("cumul") output_sprod = proc_lists.proc_add_subprod( "10d", "cumul", final=False, descriptive_name='10d Precipitation', description='Precipitation for dekad', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='Initial', active_default=True) out_prod_ident_10dcount = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_10dcount = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_10dcumul(): dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) expected_days = functions.day_per_dekad(my_dekad_str) for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_10dcount + os.path.sep + my_dekad_str + out_prod_ident_10dcount if len(file_list) >= expected_days - 1: yield (file_list, output_file) else: print('Too many missing filed for dekad {0}'.format( my_dekad_str)) @active_if(activate_10dcumul_comput) @files(generate_parameters_10dcumul) def std_precip_10dcumul(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw",\ "scale_factor":scale_factor_conv} raster_image_math.do_cumulate(**args) # --------------------------------------------------------------------- # Derived product: 1moncum # --------------------------------------------------------------------- output_sprod = proc_lists.proc_add_subprod( "1mon", "cumul", final=False, descriptive_name='Monthly Precipitation', description='Precipitation for a month', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='Initial', active_default=True) out_prod_ident_1moncum = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_1moncum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_1moncum(): month_list = [] # Create unique list of all months (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mymonth_yyyymm = str(mydate)[0:6] if mymonth_yyyymm not in month_list: month_list.append(mymonth_yyyymm) month_list = sorted(month_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_mon = today.strftime('%Y%m') for month in month_list: # Exclude the current dekad if month != today_mon: file_list = [] exp_days_last_dk = functions.day_per_dekad(month + '21') expected_days = int(exp_days_last_dk) + 20 for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydate_yyyymm = mydate_yyyymmdd[0:6] if mydate_yyyymm == month: file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_1moncum + os.path.sep + month + '01' + out_prod_ident_1moncum if len(file_list) >= expected_days - 3: yield (file_list, output_file) else: print('Too many missing filed for month {0}'.format(month)) @active_if(activate_1moncum_comput) @files(generate_parameters_1moncum) def std_precip_1moncum(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw",\ "scale_factor":scale_factor_conv} raster_image_math.do_cumulate(**args) # --------------------------------------------------------------------- # Derived product: 3moncum # --------------------------------------------------------------------- output_sprod = proc_lists.proc_add_subprod( "3mon", "cumul", final=False, descriptive_name='3 Months Precipitation', description='Precipitation for 3 months', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='Initial', active_default=True) out_prod_ident_3moncum = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_3moncum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_3moncum(): n_mon = 3 max_missing = 9 month_list = [] # Create unique list of all months (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mymonth_yyyymm = str(mydate)[0:6] if mymonth_yyyymm not in month_list: month_list.append(mymonth_yyyymm) month_list = sorted(month_list) # Compute the current month today = datetime.date.today() today_mon = today.strftime('%Y%m') for month in month_list: # Exclude the current dekad if month != today_mon: file_list = [] # Compute first - last date for current interval first_day_this_month = datetime.date(int(month[0:4]), int(month[4:6]), 1) first_day_next_month = first_day_this_month + relativedelta( months=+1) first_day_2_month_before = first_day_this_month + relativedelta( months=-n_mon + 1) delta_3mon = first_day_next_month - first_day_2_month_before expected_days = delta_3mon.days for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydate = datetime.date(int(mydate_yyyymmdd[0:4]), int(mydate_yyyymmdd[4:6]), int(mydate_yyyymmdd[6:8])) if first_day_2_month_before <= mydate < first_day_next_month: file_list.append(input_file) if len(file_list) >= expected_days - max_missing: output_file = es_constants.processing_dir + output_subdir_3moncum + os.path.sep + month + '01' + out_prod_ident_3moncum yield (file_list, output_file) else: print( 'Too many missing filed for 3moncum, period until: {0}' .format(month)) @active_if(activate_3moncum_comput) @files(generate_parameters_3moncum) def std_precip_3moncum(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw",\ "scale_factor":scale_factor_conv} raster_image_math.do_cumulate(**args) # --------------------------------------------------------------------- # Derived product: 6moncum # --------------------------------------------------------------------- output_sprod = proc_lists.proc_add_subprod( "6mon", "cumul", final=False, descriptive_name='6 Months Precipitation', description='Precipitation for 6 months', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='Initial', active_default=True) out_prod_ident_6moncum = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_6moncum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_6moncum(): n_mon = 6 max_missing = 18 month_list = [] # Create unique list of all months (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mymonth_yyyymm = str(mydate)[0:6] if mymonth_yyyymm not in month_list: month_list.append(mymonth_yyyymm) month_list = sorted(month_list) # Compute the current month today = datetime.date.today() today_mon = today.strftime('%Y%m') for month in month_list: # Exclude the current dekad if month != today_mon: file_list = [] # Compute first - last date for current interval first_day_this_month = datetime.date(int(month[0:4]), int(month[4:6]), 1) first_day_next_month = first_day_this_month + relativedelta( months=+1) first_day_2_month_before = first_day_this_month + relativedelta( months=-n_mon + 1) delta_3mon = first_day_next_month - first_day_2_month_before expected_days = delta_3mon.days for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydate = datetime.date(int(mydate_yyyymmdd[0:4]), int(mydate_yyyymmdd[4:6]), int(mydate_yyyymmdd[6:8])) if first_day_2_month_before <= mydate < first_day_next_month: file_list.append(input_file) if len(file_list) >= expected_days - max_missing: output_file = es_constants.processing_dir + output_subdir_6moncum + os.path.sep + month + '01' + out_prod_ident_6moncum yield (file_list, output_file) else: print( 'Too many missing filed for 6moncum, period until: {0}' .format(month)) @active_if(activate_6moncum_comput) @files(generate_parameters_6moncum) def std_precip_6moncum(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw",\ "scale_factor":scale_factor_conv} raster_image_math.do_cumulate(**args) # --------------------------------------------------------------------- # Derived product: 1yearcum # --------------------------------------------------------------------- output_sprod = proc_lists.proc_add_subprod( "1year", "cumul", final=False, descriptive_name='Yearly Precipitation', description='Precipitation for 1 year', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='Initial', active_default=True) out_prod_ident_1yearcum = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_1yearcum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_1yearcum(): n_mon = 12 max_missing = 35 month_list = [] # Create unique list of all months (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mymonth_yyyymm = str(mydate)[0:6] if mymonth_yyyymm not in month_list: month_list.append(mymonth_yyyymm) month_list = sorted(month_list) # Compute the current month today = datetime.date.today() today_mon = today.strftime('%Y%m') for month in month_list: # Exclude the current dekad if month != today_mon: file_list = [] # Compute first - last date for current interval first_day_this_month = datetime.date(int(month[0:4]), int(month[4:6]), 1) first_day_next_month = first_day_this_month + relativedelta( months=+1) first_day_2_month_before = first_day_this_month + relativedelta( months=-n_mon + 1) delta_3mon = first_day_next_month - first_day_2_month_before expected_days = delta_3mon.days for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydate = datetime.date(int(mydate_yyyymmdd[0:4]), int(mydate_yyyymmdd[4:6]), int(mydate_yyyymmdd[6:8])) if first_day_2_month_before <= mydate < first_day_next_month: file_list.append(input_file) if len(file_list) >= expected_days - max_missing: output_file = es_constants.processing_dir + output_subdir_1yearcum + os.path.sep + month + '01' + out_prod_ident_1yearcum yield (file_list, output_file) else: print( 'Too many missing filed for 1yearcum, period until: {0}' .format(month)) @active_if(activate_1yearcum_comput) @files(generate_parameters_1yearcum) def std_precip_1yearcum(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw",\ "scale_factor":scale_factor_conv} raster_image_math.do_cumulate(**args) # End of pipeline definition return proc_lists
def processing_merge(pipeline_run_level=0, pipeline_printout_level=0, input_products='', output_product='', mapset=''): es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # Do some checks on the integrity of the inputs # Manage output_product data out_product_code = output_product[0].productcode out_sub_product_code = output_product[0].subproductcode out_version = output_product[0].version out_mapset = output_product[0].mapsetcode out_subdir = functions.set_path_sub_directory(out_product_code, out_sub_product_code, 'Ingest', out_version, out_mapset) out_prod_ident = functions.set_path_filename_no_date( out_product_code, out_sub_product_code, out_mapset, out_version, ext) out_dir = es2_data_dir + out_subdir # Check the output product directory functions.check_output_dir(out_dir) # Loop over the input products: for input in input_products: # Extract info from input product product_code = input.productcode sub_product_code = input.subproductcode version = input.version start_date = input.start_date end_date = input.end_date product_info = querydb.get_product_out_info( productcode=product_code, subproductcode=sub_product_code, version=version) prod_type = product_info[0].product_type in_subdir = functions.set_path_sub_directory(product_code, sub_product_code, prod_type, version, out_mapset) in_prod_ident = functions.set_path_filename_no_date( out_product_code, out_sub_product_code, out_mapset, version, ext) # Create the list of dates -> returns empty if start==end==None list_dates = proc_functions.get_list_dates_for_dataset( product_code, sub_product_code, version, start_date=start_date, end_date=end_date) # If list_dates == None, look at all existing files if list_dates is None: print 'To be Done !!!' # Otherwise, build list of files from list of dates else: for my_date in list_dates: in_file_path = es2_data_dir + in_subdir + my_date + in_prod_ident out_file_path = out_dir + my_date + out_prod_ident # Create the link print in_file_path print out_file_path functions.create_sym_link(in_file_path, out_file_path, force=False)
def setFileDir(dataRoot, product, subproduct, version, mapset, productType): subdir = functions.set_path_sub_directory(product, subproduct, productType, version, mapset) fileDir = '{0}/{1}'.format(dataRoot, subdir) return fileDir
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, list_subprods=None, update_stats=False, nrt_products=True): # --------------------------------------------------------------------- # Create lists proc_lists = functions.ProcLists() if list_subprods is not None: list_subprods = proc_lists.list_subprods list_subprod_groups = proc_lists.list_subprod_groups # Set DEFAULTS: all off activate_10danomalies_comput = 0 # 10d anomalies activate_monthly_comput = 0 # monthly cumulation activate_monanomalies_comput = 0 # monthly anomalies activate_10dstats_comput = 0 # 10d stats activate_monstats_comput = 0 # 1mon stats # switch wrt groups - according to options if nrt_products: activate_10danomalies_comput = 1 # 10d anomalies activate_monthly_comput = 1 # monthly cumulation activate_monanomalies_comput = 1 # monthly anomalies if update_stats: activate_10dstats_comput = 1 # 10d stats activate_monstats_comput = 1 # 1mon stats # switch wrt single products: not to be changed !! activate_10davg_comput = 1 activate_10dmin_comput = 1 activate_10dmax_comput = 1 activate_10ddiff_comput = 1 activate_10dperc_comput = 1 activate_10dnp_comput = 1 activate_1moncum_comput = 1 activate_1monavg_comput = 1 activate_1monmin_comput = 1 activate_1monmax_comput = 1 activate_1mondiff_comput = 1 activate_1monperc_comput = 1 activate_1monnp_comput = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) logger.debug('Base data directory is: %s' % es2_data_dir) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) logger.debug('Input data directory is: %s' % input_dir) if starting_dates is not None: starting_files = [] for my_date in starting_dates: starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident logger.debug('Starting files wild card is: %s' % starting_files) # --------------------------------------------------------------------- # Average output_sprod_group = proc_lists.proc_add_subprod_group("10dstats") output_sprod = proc_lists.proc_add_subprod("10davg", "10dstats", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10davg_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_precip_10davg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Minimum output_sprod = proc_lists.proc_add_subprod("10dmin", "10dstats", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dmin_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_precip_10dmin(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # Maximum output_sprod = proc_lists.proc_add_subprod("10dmax", "10dstats", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dmax_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_precip_10dmax(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # 10dDiff output_sprod_group = proc_lists.proc_add_subprod_group("10anomalies") output_sprod = proc_lists.proc_add_subprod("10ddiff", "10anomalies", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10davg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @follows(std_precip_10davg) @active_if(activate_10danomalies_comput, activate_10ddiff_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_precip_10ddiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_oper_subtraction(**args) # --------------------------------------------------------------------- # 10dperc output_sprod = proc_lists.proc_add_subprod("10dperc", "10anomalies", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10davg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @follows(std_precip_10davg) @active_if(activate_10danomalies_comput, activate_10dperc_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_precip_10dperc(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_compute_perc_diff_vs_avg(**args) # --------------------------------------------------------------------- # 10dnp output_sprod = proc_lists.proc_add_subprod("10dnp", "10anomalies", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + min + max formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod_1 = "10dmin" ancillary_sprod_ident_1 = functions.set_path_filename_no_date( prod, ancillary_sprod_1, mapset, version, ext) ancillary_subdir_1 = functions.set_path_sub_directory( prod, ancillary_sprod_1, 'Derived', version, mapset) ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1 ancillary_sprod_2 = "10dmax" ancillary_sprod_ident_2 = functions.set_path_filename_no_date( prod, ancillary_sprod_2, mapset, version, ext) ancillary_subdir_2 = functions.set_path_sub_directory( prod, ancillary_sprod_2, 'Derived', version, mapset) ancillary_input_2 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_2 + "{MMDD[0]}" + ancillary_sprod_ident_2 @follows(std_precip_10dmin, std_precip_10dmax) @active_if(activate_10danomalies_comput, activate_10dnp_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input_1, ancillary_input_2), formatter_out) def std_precip_10dnp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file[0], "min_file": input_file[1], "max_file": input_file[2], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_make_vci(**args) # --------------------------------------------------------------------- # 1moncum output_sprod_group = proc_lists.proc_add_subprod_group("monthly") output_sprod = proc_lists.proc_add_subprod("1moncum", "monthly", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # inputs: files from same months formatter_in = "(?P<YYYYMM>[0-9]{6})(?P<DD>[0-9]{2})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" + '01' + out_prod_ident # @follows(std_precip_10davg) @active_if(activate_monthly_comput, activate_1moncum_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_precip_1moncum(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_cumulate(**args) # --------------------------------------------------------------------- # Monthly Average new_input_subprod = '1moncum' in_prod_ident = functions.set_path_filename_no_date( prod, new_input_subprod, mapset, version, ext) output_sprod_group = proc_lists.proc_add_subprod_group("monstat") output_sprod = proc_lists.proc_add_subprod("1monavg", "monstat", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @follows(std_precip_1moncum) @active_if(activate_monstats_comput, activate_1monavg_comput) @collate(std_precip_1moncum, formatter(formatter_in), formatter_out) def std_precip_1monavg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Minimum output_sprod = proc_lists.proc_add_subprod("1monmin", "monstat", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @follows(std_precip_1moncum) @active_if(activate_monstats_comput, activate_1monmin_comput) @collate(std_precip_1moncum, formatter(formatter_in), formatter_out) def std_precip_1monmin(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # Monthly Maximum output_sprod = proc_lists.proc_add_subprod("1monmax", "monstat", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) reg_ex_in = "[0-9]{4}([0-9]{4})" + in_prod_ident formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @follows(std_precip_1moncum) @active_if(activate_monstats_comput, activate_1monmax_comput) @collate(std_precip_1moncum, formatter(formatter_in), formatter_out) def std_precip_1monmax(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # 1monDiff output_sprod_group = proc_lists.proc_add_subprod_group("monanomalies") output_sprod = proc_lists.proc_add_subprod("1mondiff", "monanomalies", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # inputs # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "1monavg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @follows(std_precip_1monavg) @active_if(activate_monanomalies_comput, activate_1mondiff_comput) @transform(std_precip_1moncum, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_precip_1mondiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_oper_subtraction(**args) # --------------------------------------------------------------------- # 1monperc output_sprod = proc_lists.proc_add_subprod("1monperc", "monanomalies", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # inputs # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "1monavg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @follows(std_precip_1monavg) @active_if(activate_monanomalies_comput, activate_1monperc_comput) @transform(std_precip_1moncum, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_precip_1monperc(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_compute_perc_diff_vs_avg(**args) # --------------------------------------------------------------------- # 1monnp output_sprod = proc_lists.proc_add_subprod("1monnp", "monanomalies", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + min + max formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod_1 = "1monmin" ancillary_sprod_ident_1 = functions.set_path_filename_no_date( prod, ancillary_sprod_1, mapset, version, ext) ancillary_subdir_1 = functions.set_path_sub_directory( prod, ancillary_sprod_1, 'Derived', version, mapset) ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1 ancillary_sprod_2 = "1monmax" ancillary_sprod_ident_2 = functions.set_path_filename_no_date( prod, ancillary_sprod_2, mapset, version, ext) ancillary_subdir_2 = functions.set_path_sub_directory( prod, ancillary_sprod_2, 'Derived', version, mapset) ancillary_input_2 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_2 + "{MMDD[0]}" + ancillary_sprod_ident_2 @follows(std_precip_1monmin, std_precip_1monmax) @active_if(activate_monanomalies_comput, activate_1monnp_comput) @transform(std_precip_1moncum, formatter(formatter_in), add_inputs(ancillary_input_1, ancillary_input_2), formatter_out) def std_precip_1monnp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file[0], "min_file": input_file[1], "max_file": input_file[2], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_make_vci(**args)
def create_pipeline(starting_sprod): # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) starting_files = input_dir + "*" + in_prod_ident # Read input product nodata in_prod_info = querydb.get_product_out_info(productcode=prod, subproductcode=starting_sprod, version=version) product_info = functions.list_to_element(in_prod_info) in_nodata = product_info.nodata print in_nodata # --------------------------------------------------------------------- # Monthly Average for a given month output_sprod = "monavg" out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" + out_prod_ident ] @active_if(activate_monavg_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def modis_kd_monavg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw", "input_nodata": in_nodata} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Climatology for all years new_input_subprod = 'monavg' new_in_prod_ident = functions.set_path_filename_no_date( prod, new_input_subprod, mapset, version, ext) new_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, new_input_subprod, 'Derived', version, mapset) new_starting_files = new_input_dir + "*" + new_in_prod_ident output_sprod = "monclim" out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MM>[0-9]{2})" + new_in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MM[0]}" + out_prod_ident ] @active_if(activate_monclim_comput) @collate(new_starting_files, formatter(formatter_in), formatter_out) def modis_kd_monclim(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw", "input_nodata": in_nodata} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Anomaly for a given monthly output_sprod = "monanom" out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MM>[0-9]{2})" + new_in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MM[0]}" + out_prod_ident ancillary_sprod = "monclim" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MM[0]}" + ancillary_sprod_ident @active_if(activate_monanom_comput) @transform(new_starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def modis_kd_mondiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_oper_subtraction(**args)
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None): # Definitions start_season = '0901' second_dekad = '0911' end_season = '0421' # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files (10d) in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir + functions.set_path_sub_directory( prod, starting_sprod, 'Ingest', version, mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = glob.glob(input_dir + "*" + in_prod_ident) # --------------------------------------------------------------------- # Define output files (onset) output_sprod = proc_lists.proc_add_subprod( "rain-onset", "none", final=False, descriptive_name='Rain Onset', description='Rainfall Start of the season', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) prod_ident_onset = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) subdir_onset = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) def generate_parameters_onset(): starting_files.sort() for file_t0 in starting_files: # Get current date date_t0 = functions.get_date_from_path_full(file_t0) # Check if we are in the seasonal range [start < current <= end] dekad_t0 = int(date_t0[4:]) dekad_start = int(start_season) dekad_second = int(second_dekad) dekad_end = int(end_season) # Initialize processing to 0 do_proc = 0 in_season = False # Check we are within the season -> do_proc if dekad_start < dekad_end: if dekad_t0 > dekad_start and dekad_t0 <= dekad_end: in_season = True else: if dekad_t0 > dekad_start or dekad_t0 <= dekad_end: in_season = True if in_season and (dekad_t0 == dekad_second): do_proc = 1 if in_season and (dekad_t0 != dekad_second): do_proc = 2 if do_proc: output_file = es2_data_dir + subdir_onset + str( date_t0) + prod_ident_onset # Get files at t-1 and t-2 (if they exist) previous_files = functions.previous_files(file_t0) # Check if at least one previous file has been identified if do_proc == 1: # Check at least 1 previous file exist if len(previous_files) < 1: print('Error Case 1: no any previous file') else: # Pass two arguments (t0 and t0-1) input_files = [file_t0, previous_files[0]] yield (input_files, output_file) elif do_proc == 2: error = False # Check 2 previous files exist if len(previous_files) < 2: print('Error Case 2: a previous file is missing') error = True # Look for previous output previous_outputs = functions.previous_files(output_file) if len(previous_outputs) < 1: print('Error Case 2: the previous output is missing') error = True # Pass four arguments (t0, t0-1, t0-2 and output-1) if not error: previous_output = previous_outputs[0] if os.path.isfile(previous_output): input_files = [ file_t0, previous_files[0], previous_files[1], previous_output ] yield (input_files, output_file) @active_if(activate_onset_comput) @files(generate_parameters_onset) def rain_onset(input_files, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) # Need to define the current_dekad number, wrt begin of season current_date = functions.get_date_from_path_full(output_file) current_dekad = current_date[4:] dekad_number = functions.dekad_nbr_in_season(current_dekad, start_season) # Call the function args = { "input_file": input_files, "output_file": output_file, 'input_nodata': None, 'output_nodata': None, 'output_type': 'Int16', "output_format": 'GTIFF', "options": "compress = lzw", 'current_dekad': dekad_number } raster_image_math.do_rain_onset(**args)
# for Group 3.d (monthly_anomalies) -> TB Done activate_1monsndvi = 1 activate_1monandvi = 1 activate_1monvci = 1 activate_1monicn = 1 # --------------------------------------------------------------------- # Define input files (NDV) starting_sprod = 'ndv' in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, ext) logger.debug('Base data directory is: %s' % es_constants.processing_dir) input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) logger.debug('Input data directory is: %s' % input_dir) starting_files = input_dir + "*" + in_prod_ident logger.debug('Starting files wild card is: %s' % starting_files) # --------------------------------------------------------------------- # 1.a 10Day non-filtered Stats # --------------------------------------------------------------------- # --------------------------------------------------------------------- # NDV avg x dekad (i.e. avg_dekad) output_sprod = "10davg" prod_ident_10davg = functions.set_path_filename_no_date( prod, output_sprod, mapset, ext) subdir_10davg = functions.set_path_sub_directory(prod, output_sprod, 'Derived',
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None): my_date=None # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() activate_gradient_computation = 1 #activate_shapefile_conversion = 1 sds_meta = metadata.SdsMetadata() es2_data_dir = es_constants.es2globals['processing_dir']+os.path.sep # --------------------------------------------------------------------- # Define input files (chla) in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir+ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) if my_date: starting_files = input_dir+my_date+"*"+in_prod_ident else: starting_files = input_dir+"*"+in_prod_ident # --------------------------------------------------------------------- # 1. Define and customize parameters # --------------------------------------------------------------------- # # # Default values are from the routine are used if None is passed # parameters = {'histogramWindowStride': 16, # 'histogramWindowSize': 32, # 'minTheta': 0.76, # 'minPopProp': 0.25, # 'minPopMeanDifference': 20, # Temperature: 0.45 deg (multiply by 100 !!) # 'minSinglePopCohesion': 0.60, # 'minImageValue': 1, # 'minThreshold': 1} # if prod == 'modis-sst': # parameters = { 'histogramWindowStride': None, # 'minTheta' : None, # 'minPopProp' : None, # 'minPopMeanDifference' : None, # 'minSinglePopCohesion' : None, # 'histogramWindowSize' : None, # 'minImageValue' : None, # 'minThreshold' : None } # # if prod == 'pml-modis-sst': # parameters = { 'histogramWindowSize' : 32, # 'histogramWindowStride': 16, # 'minTheta' : 0.76, # 'minPopProp' : 0.25, # 'minPopMeanDifference' : 20, # 'minSinglePopCohesion' : 0.60, # 'minImageValue' : 1, # 'minThreshold' : 1 } # --------------------------------------------------------------------- # Chal Gradient (raster) output_sprod_group=proc_lists.proc_add_subprod_group("gradient") output_sprod=proc_lists.proc_add_subprod("gradient", "gradient", final=False, descriptive_name='Gradient', description='Gradient', frequency_id='', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) prod_ident_gradient = functions.set_path_filename_no_date(prod, output_sprod,mapset, version, ext) subdir_gradient = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMMDD>[0-9]{8})"+in_prod_ident formatter_out = ["{subpath[0][5]}"+os.path.sep+subdir_gradient+"{YYYYMMDD[0]}"+prod_ident_gradient] @active_if(activate_gradient_computation) @transform(starting_files, formatter(formatter_in),formatter_out) def gradient_computation(input_file, output_file): no_data = int(sds_meta.get_nodata_value(input_file)) output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "nodata": no_data, "output_format": 'GTIFF', "options": "compress = lzw"} raster_image_math.do_compute_chla_gradient(**args) print ('Done with raster') return proc_lists
def do_standardized_prod(self): # --------------------------------------------------------------------- # Standardized 10d product if self.frequency == '10d': output_sprod = self.proc_lists.proc_add_subprod( "10dzscore", "filtered_anomalies", final=False, descriptive_name='10d Standardized ' + self.subproduct_code, description='Z Score, Standardized ' + self.subproduct_code, frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role=self.starting_sprod, # '10d', # display_index=10, active_default=True) prod_ident_10dsndvi = functions.set_path_filename_no_date( self.prod, output_sprod, self.mapset, self.version, self.ext) subdir_10dsndvi = functions.set_path_sub_directory( self.prod, output_sprod, 'Derived', self.version, self.mapset) input_subprod_10diff = "10ddiff" in_prod_ident_10diff = functions.set_path_filename_no_date( self.prod, input_subprod_10diff, self.mapset, self.version, self.ext) input_dir_10diff = self.es2_data_dir + \ functions.set_path_sub_directory(self.prod, input_subprod_10diff, 'Derived', self.version, self.mapset) self.starting_files_10ddiff = input_dir_10diff + "*" + in_prod_ident_10diff self.formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident_10diff self.formatter_out = "{subpath[0][5]}" + os.path.sep + subdir_10dsndvi + "{YYYY[0]}{MMDD[0]}" + prod_ident_10dsndvi ancillary_sprod = "10dstd" ancillary_sprod_ident = functions.set_path_filename_no_date( self.prod, ancillary_sprod, self.mapset, self.version, self.ext) ancillary_subdir = functions.set_path_sub_directory( self.prod, ancillary_sprod, 'Derived', self.version, self.mapset) self.ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident else: output_sprod = self.proc_lists.proc_add_subprod( "1monzscore", "filtered_anomalies", final=False, descriptive_name='Monthly Standardized ' + self.subproduct_code, description='Z Score, Monthly Standardized ' + self.subproduct_code, frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role=self.input_subprod_monthly, # '10d', # display_index=120, active_default=True) prod_ident_1monstdprod = functions.set_path_filename_no_date( self.prod, output_sprod, self.mapset, self.version, self.ext) subdir_1monstdprod = functions.set_path_sub_directory( self.prod, output_sprod, 'Derived', self.version, self.mapset) input_subprod_mondiff = "1mondiff" in_prod_ident_mondiff = functions.set_path_filename_no_date( self.prod, input_subprod_mondiff, self.mapset, self.version, self.ext) input_dir_mondiff = self.es2_data_dir + \ functions.set_path_sub_directory(self.prod, input_subprod_mondiff, 'Derived', self.version, self.mapset) self.starting_files_mondiff = input_dir_mondiff + "*" + in_prod_ident_mondiff self.formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident_mondiff self.formatter_out = "{subpath[0][5]}" + os.path.sep + subdir_1monstdprod + "{YYYY[0]}{MMDD[0]}" + prod_ident_1monstdprod ancillary_sprod = "1monstd" ancillary_sprod_ident = functions.set_path_filename_no_date( self.prod, ancillary_sprod, self.mapset, self.version, self.ext) ancillary_subdir = functions.set_path_sub_directory( self.prod, ancillary_sprod, 'Derived', self.version, self.mapset) self.ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident
def reproject_output(input_file, native_mapset_id, target_mapset_id, output_dir=None, version=None, logger=None): # Check logger if logger is None: logger = log.my_logger(__name__) # Check output dir if output_dir is None: output_dir = es_constants.es2globals['processing_dir'] # Get the existing dates for the dataset logger.debug("Entering routine %s for file %s" % ('reproject_output', input_file)) ext = es_constants.ES2_OUTFILE_EXTENSION # Test the file/files exists if not os.path.isfile(input_file): logger.error('Input file: %s does not exist' % input_file) return 1 # Instance metadata object (for output_file) sds_meta_out = metadata.SdsMetadata() # Read metadata from input_file sds_meta_in = metadata.SdsMetadata() sds_meta_in.read_from_file(input_file) # Extract info from input file str_date = sds_meta_in.get_item('eStation2_date') product_code = sds_meta_in.get_item('eStation2_product') sub_product_code = sds_meta_in.get_item('eStation2_subProduct') # 22.06.2017 Add the option to force the version if version is None: version = sds_meta_in.get_item('eStation2_product_version') # Define output filename sub_dir = sds_meta_in.get_item('eStation2_subdir') # Fix a bug for 10davg-linearx2 metadata - and make method more robust if re.search('.*derived.*', sub_dir): product_type = 'Derived' elif re.search('.*tif.*', sub_dir): product_type = 'Ingest' # product_type = functions.get_product_type_from_subdir(sub_dir) out_prod_ident = functions.set_path_filename_no_date( product_code, sub_product_code, target_mapset_id, version, ext) output_subdir = functions.set_path_sub_directory(product_code, sub_product_code, product_type, version, target_mapset_id) output_file = output_dir+\ output_subdir +\ str_date +\ out_prod_ident # make sure output dir exists output_dir = os.path.split(output_file)[0] functions.check_output_dir(output_dir) # ------------------------------------------------------------------------- # Manage the geo-referencing associated to input file # ------------------------------------------------------------------------- orig_ds = gdal.Open(input_file, gdal.GA_Update) # Read the data type band = orig_ds.GetRasterBand(1) out_data_type_gdal = band.DataType if native_mapset_id != 'default': native_mapset = MapSet() native_mapset.assigndb(native_mapset_id) orig_cs = osr.SpatialReference( wkt=native_mapset.spatial_ref.ExportToWkt()) # Complement orig_ds info (necessary to Re-project) try: #orig_ds.SetGeoTransform(native_mapset.geo_transform) orig_ds.SetProjection(orig_cs.ExportToWkt()) except: logger.debug('Cannot set the geo-projection .. Continue') else: try: # Read geo-reference from input file orig_cs = osr.SpatialReference() orig_cs.ImportFromWkt(orig_ds.GetProjectionRef()) except: logger.debug('Cannot read geo-reference from file .. Continue') # TODO-M.C.: add a test on the mapset-id in DB table ! trg_mapset = MapSet() trg_mapset.assigndb(target_mapset_id) logger.debug('Target Mapset is: %s' % target_mapset_id) # ------------------------------------------------------------------------- # Generate the output file # ------------------------------------------------------------------------- # Prepare output driver out_driver = gdal.GetDriverByName(es_constants.ES2_OUTFILE_FORMAT) logger.debug('Doing re-projection to target mapset: %s' % trg_mapset.short_name) # Get target SRS from mapset out_cs = trg_mapset.spatial_ref out_size_x = trg_mapset.size_x out_size_y = trg_mapset.size_y # Create target in memory mem_driver = gdal.GetDriverByName('MEM') # Assign mapset to dataset in memory mem_ds = mem_driver.Create('', out_size_x, out_size_y, 1, out_data_type_gdal) mem_ds.SetGeoTransform(trg_mapset.geo_transform) mem_ds.SetProjection(out_cs.ExportToWkt()) # Apply Reproject-Image to the memory-driver orig_wkt = orig_cs.ExportToWkt() res = gdal.ReprojectImage(orig_ds, mem_ds, orig_wkt, out_cs.ExportToWkt(), es_constants.ES2_OUTFILE_INTERP_METHOD) logger.debug('Re-projection to target done.') # Read from the dataset in memory out_data = mem_ds.ReadAsArray() # Write to output_file trg_ds = out_driver.CreateCopy(output_file, mem_ds, 0, [es_constants.ES2_OUTFILE_OPTIONS]) trg_ds.GetRasterBand(1).WriteArray(out_data) # ------------------------------------------------------------------------- # Assign Metadata to the ingested file # ------------------------------------------------------------------------- # Close dataset trg_ds = None sds_meta_out.assign_es2_version() sds_meta_out.assign_mapset(target_mapset_id) sds_meta_out.assign_from_product(product_code, sub_product_code, version) sds_meta_out.assign_date(str_date) sds_meta_out.assign_subdir_from_fullpath(output_dir) sds_meta_out.assign_comput_time_now() # Copy the same input files as in the non-reprojected input file_list = sds_meta_in.get_item('eStation2_input_files') sds_meta_out.assign_input_files(file_list) # Write metadata to file sds_meta_out.write_to_file(output_file) # Return the filename return output_file
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, update_stats=False, nrt_products=True): # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # switch wrt groups - according to options # DEFAULT: ALL off activate_10dstats_comput = 0 # 10d stats activate_10danomalies_comput = 0 # 10d anomalies activate_monthly_comput = 0 # monthly cumulation activate_monstats_comput = 0 # monthly stats activate_monanomalies_comput = 0 # monthly anomalies if nrt_products: activate_monthly_comput = 0 # monthly cumulation activate_monanomalies_comput = 0 # monthly anomalies activate_10danomalies_comput = 1 # 2.d if update_stats: activate_10dstats_comput = 1 # 10d stats activate_monstats_comput = 0 # monthly stats # switch wrt single products: not to be changed !! activate_10davg_comput = 1 activate_10dmin_comput = 1 activate_10dmax_comput = 1 activate_10ddiff_comput = 1 activate_10dperc_comput = 1 activate_10dnp_comput = 0 activate_10dratio_comput = 1 activate_1moncum_comput = 1 activate_1monavg_comput = 1 activate_1monmin_comput = 1 activate_1monmax_comput = 1 activate_1mondiff_comput = 1 activate_1monperc_comput = 1 activate_1monnp_comput = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) #logger.debug('Base data directory is: %s' % es2_data_dir) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # Average output_sprod_group = proc_lists.proc_add_subprod_group("10dstats") output_sprod = proc_lists.proc_add_subprod( "10davg", "10dstats", final=False, descriptive_name='10d Average', description='Average dry matter productivity for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10davg_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_dmp_10davg(input_file, output_file): reduced_list = exclude_current_year(input_file) output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Minimum output_sprod = proc_lists.proc_add_subprod( "10dmin", "10dstats", final=False, descriptive_name='10d Minimum', description='Minimum DMP for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dmin_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_dmp_10dmin(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # Maximum output_sprod = proc_lists.proc_add_subprod( "10dmax", "10dstats", final=False, descriptive_name='10d Maximum', description='Maximum DMP for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dmax_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_dmp_10dmax(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # 10dDiff output_sprod_group = proc_lists.proc_add_subprod_group("10anomalies") output_sprod = proc_lists.proc_add_subprod( "10ddiff", "10anomalies", final=False, descriptive_name='10d Absolute Difference', description='10d Absolute Difference vs. LTA', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10davg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @active_if(activate_10danomalies_comput, activate_10ddiff_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_dmp_10ddiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_oper_subtraction(**args) # --------------------------------------------------------------------- # 10dperc output_sprod = proc_lists.proc_add_subprod( "10dperc", "10anomalies", final=False, descriptive_name='10d Percent Difference', description='10d Percent Difference vs. LTA', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10davg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @follows(std_dmp_10davg) @active_if(activate_10danomalies_comput, activate_10dperc_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_dmp_10dperc(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_compute_perc_diff_vs_avg(**args) # --------------------------------------------------------------------- # 10dnp output_sprod = proc_lists.proc_add_subprod( "10dnp", "10anomalies", final=False, descriptive_name='10d Normalized Anomaly', description='10d Normalized Anomaly', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + min + max formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod_1 = "10dmin" ancillary_sprod_ident_1 = functions.set_path_filename_no_date( prod, ancillary_sprod_1, mapset, version, ext) ancillary_subdir_1 = functions.set_path_sub_directory( prod, ancillary_sprod_1, 'Derived', version, mapset) ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1 ancillary_sprod_2 = "10dmax" ancillary_sprod_ident_2 = functions.set_path_filename_no_date( prod, ancillary_sprod_2, mapset, version, ext) ancillary_subdir_2 = functions.set_path_sub_directory( prod, ancillary_sprod_2, 'Derived', version, mapset) ancillary_input_2 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_2 + "{MMDD[0]}" + ancillary_sprod_ident_2 @active_if(activate_10danomalies_comput, activate_10dnp_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input_1, ancillary_input_2), formatter_out) def std_dmp_10dnp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file[0], "min_file": input_file[1], "max_file": input_file[2], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_make_vci(**args) # --------------------------------------------------------------------- # 10dratio output_sprod = proc_lists.proc_add_subprod( "10dratio", "10anomalies", final=False, descriptive_name='10d Ratio', description='10d Ratio (curr/avg)', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + min + max formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod_1 = "10davg" ancillary_sprod_ident_1 = functions.set_path_filename_no_date( prod, ancillary_sprod_1, mapset, version, ext) ancillary_subdir_1 = functions.set_path_sub_directory( prod, ancillary_sprod_1, 'Derived', version, mapset) ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1 @active_if(activate_10danomalies_comput, activate_10dratio_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input_1), formatter_out) def std_dmp_10dratio(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress = lzw" } raster_image_math.do_oper_division_perc(**args) return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, logger=None): my_date = None # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() sds_meta = metadata.SdsMetadata() es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir + functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) if my_date is not None: starting_files = input_dir + my_date + "*" + in_prod_ident else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # Monthly Average for a given month output_sprod_group = proc_lists.proc_add_subprod_group("monstats") output_sprod = proc_lists.proc_add_subprod("monavg", "monstats", final=False, descriptive_name='Monthly average', description='Monthly average', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" + '01' + out_prod_ident @collate(starting_files, formatter(formatter_in), formatter_out) def compute_monavg(input_file, output_file): output_file = functions.list_to_element(output_file) out_filename = os.path.basename(output_file) functions.check_output_dir(os.path.dirname(output_file)) no_data = int(sds_meta.get_nodata_value(input_file[0])) str_date = out_filename[0:6] today = datetime.date.today() today_yyyymm = today.strftime('%Y%m') # expected_ndays=functions.get_number_days_month(str_date) # current_ndays=len(input_file) if str_date == today_yyyymm: logger.info('Do not perform computation for current month {0}. Skip'.format(str_date)) else: args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "input_nodata": no_data, "options": "compress=lzw"} raster_image_math.do_avg_image(**args) return proc_lists
def create_pipeline(prod, starting_sprod, native_mapset, target_mapset, version, starting_dates=None, proc_lists=None): # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all ON activate_10d30min_comput = 1 activate_10dcum_comput = 1 activate_1moncum_comput = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, native_mapset, version, ext) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, native_mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # Dekad average for every 30min (mm/h) # NOTE: this product is compute w/o re-projection, i.e. on the 'native' mapset output_sprod_group = proc_lists.proc_add_subprod_group("lsasaf-et") output_sprod = proc_lists.proc_add_subprod( "10d30min", "lsasaf-et", final=False, descriptive_name='10day Average over 30 min', description='10day Average computed for every 30 min', frequency_id='e30minute', # Is it OK ??????? date_format='YYYYMMDDHHMM', masked=False, timeseries_role='', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, native_mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, native_mapset) def generate_parameters_10d30min(): # Look for all input files in input_dir, and sort them input_files = glob.glob(starting_files) dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) # Generate the list of 30 min time in a day timelist = [ datetime.time(h, m).strftime("%H%M") for h, m in itertools.product(xrange(0, 24), xrange(0, 60, 30)) ] for time in timelist: files_for_time = glob.glob(input_dir + os.path.sep + '*' + time + in_prod_ident) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) output_file = es_constants.processing_dir + output_subdir + os.path.sep + my_dekad_str + time + out_prod_ident for myfile in files_for_time: basename = os.path.basename(myfile) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(myfile) yield (file_list, output_file) @active_if(activate_10d30min_comput) @files(generate_parameters_10d30min) def lsasaf_etp_10d30min(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw", "input_nodata":-32768} raster_image_math.do_avg_image(**args) # Do also the house-keeping, by deleting the files older than 6 months number_months_keep = 6 remove_old_files(prod, "lsasaf-et", version, native_mapset, 'Ingest', number_months_keep) # ---------------------------------------------------------------------------------------------------------------- # 10 day Cumulate (mm) # NOTE: this product is compute with re-projection, i.e. on the 'target' mapset output_sprod = proc_lists.proc_add_subprod( "10dcum", "lsasaf-et", final=False, descriptive_name='10day Cumulate', description='10day Cumulate in mm', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_10dcum = functions.set_path_filename_no_date( prod, "10dcum", target_mapset, version, ext) output_subdir_10dcum = functions.set_path_sub_directory( prod, "10dcum", 'Derived', version, target_mapset) # Define input files in_prod_10dcum = '10d30min' in_prod_ident_10dcum = functions.set_path_filename_no_date( prod, in_prod_10dcum, native_mapset, version, ext) input_dir_10dcum = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, in_prod_10dcum, 'Derived', version, native_mapset) starting_files_10dcum = input_dir_10dcum + "*" + in_prod_ident_10dcum formatter_in = "(?P<YYYYMMDD>[0-9]{8})[0-9]{4}" + in_prod_ident_10dcum formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir_10dcum + "{YYYYMMDD[0]}" + out_prod_ident_10dcum ] @follows(lsasaf_etp_10d30min) @active_if(activate_10dcum_comput) @collate(starting_files_10dcum, formatter(formatter_in), formatter_out) def lsasaf_etp_10dcum(input_file, output_file): output_file = functions.list_to_element(output_file) # Get the number of days of that dekad basename = os.path.basename(output_file) mydate = functions.get_date_from_path_filename(basename) nbr_days_dekad = functions.day_per_dekad(mydate) # Compute the correcting factor: we sum-up all 48 30min cycles and: # Divide by 2 (mm/h -> mm) # Multiply by number of days # Divide by 100, so that the scale factor changes from 0.0001 (30min) to 0.01 factor = float(nbr_days_dekad) * 0.005 functions.check_output_dir(os.path.dirname(output_file)) tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='', dir=es_constants.base_tmp_dir) tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file) args = { "input_file": input_file, "output_file": tmp_output_file, "output_format": 'GTIFF', "options": "compress=lzw", "scale_factor": factor, "input_nodata": -32768 } raster_image_math.do_cumulate(**args) reproject_output(tmp_output_file, native_mapset, target_mapset) shutil.rmtree(tmpdir) # Do also the house-keeping, by deleting the files older than 6 months number_months_keep = 6 remove_old_files(prod, "10d30min-et", version, native_mapset, 'Derived', number_months_keep) # ---------------------------------------------------------------------------------------------------------------- # 1moncum output_sprod = proc_lists.proc_add_subprod( "1moncum", "lsasaf-et", final=False, descriptive_name='1mon Cumulate', description='Monthly Cumulate in mm', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) output_sprod = '1moncum' out_prod_ident_1moncum = functions.set_path_filename_no_date( prod, output_sprod, target_mapset, version, ext) output_subdir_1moncum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, target_mapset) #file d'entre in_prod_1moncum = '10dcum' in_prod_ident_1moncum = functions.set_path_filename_no_date( prod, in_prod_1moncum, target_mapset, version, ext) input_dir_1moncum = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, in_prod_1moncum, 'Derived', version, target_mapset) starting_files_1moncum = input_dir_1moncum + "*" + in_prod_ident_1moncum formatter_in_1moncum = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident_1moncum formatter_out_1moncum = "{subpath[0][5]}" + os.path.sep + output_subdir_1moncum + "{YYYYMM[0]}" + '01' + out_prod_ident_1moncum # @follows(lsasaf_etp_10dcum) @active_if(activate_1moncum_comput) @collate(starting_files_1moncum, formatter(formatter_in_1moncum), formatter_out_1moncum) def lsasaf_etp_1moncum(input_file, output_file): # output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", "input_nodata": -32768 } raster_image_math.do_cumulate(**args) return proc_lists
def create_pipeline(prod, starting_sprod, native_mapset, target_mapset, version, starting_dates=None, proc_lists=None): # Create Logger logger = log.my_logger('log.lst') # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all ON activate_1dmax_comput = 1 activate_10dmax_comput = 1 activate_10d15min_comput = 1 activate_10dmin_comput = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files ('lst' subproduct) in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, native_mapset, version, ext) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, native_mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident logger.info("starting_files %s" % starting_files) # ---------------------------------------------------------------------------------------------------------------- # 1dmax # Daily maximum from 15min lst, re-projected on target mapset output_sprod = proc_lists.proc_add_subprod("1dmax", "lsasaf-lst", final=False, descriptive_name='1d Maximum', description='Daily Maximum', frequency_id='e1day', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) output_sprod = '1dmax' out_prod_ident_1dmax = functions.set_path_filename_no_date( prod, output_sprod, target_mapset, version, ext) output_subdir_1dmax = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, target_mapset) formatter_in_1dmax = "(?P<YYYYMMDD>[0-9]{8})[0-9]{4}" + in_prod_ident formatter_out_1dmax = "{subpath[0][5]}" + os.path.sep + output_subdir_1dmax + "{YYYYMMDD[0]}" + out_prod_ident_1dmax # @active_if(activate_1dmax_comput) @collate(starting_files, formatter(formatter_in_1dmax), formatter_out_1dmax) def lsasaf_lst_1dmax(input_file, output_file): # output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='', dir=es_constants.base_tmp_dir) tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file) args = { "input_file": input_file, "output_file": tmp_output_file, "output_format": 'GTIFF', "options": "compress=lzw", "input_nodata": -32768 } raster_image_math.do_max_image(**args) reproject_output(tmp_output_file, native_mapset, target_mapset) shutil.rmtree(tmpdir) # ---------------------------------------------------------------------------------------------------------------- # 10dmax # 10 Day maximum from daily max, on target mapset output_sprod = proc_lists.proc_add_subprod("10dmax", "lsasaf-lst", final=False, descriptive_name='10d Maximum', description='10d Maximum', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) output_sprod_10dmax = '10dmax' out_prod_ident_10dmax = functions.set_path_filename_no_date( prod, output_sprod_10dmax, target_mapset, version, ext) output_subdir_10dmax = functions.set_path_sub_directory( prod, output_sprod_10dmax, 'Derived', version, target_mapset) # # Define input files in_prod_10dmax = '1dmax' in_prod_ident_10dmax = functions.set_path_filename_no_date( prod, in_prod_10dmax, target_mapset, version, ext) # input_dir_10dmax = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, in_prod_10dmax, 'Derived', version, target_mapset) # starting_files_10dmax = input_dir_10dmax + "*" + in_prod_ident_10dmax # def generate_parameters_10dmax(): # Look for all input files in input_dir, and sort them input_files = glob.glob(starting_files_10dmax) dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_10dmax + os.path.sep + my_dekad_str + out_prod_ident_10dmax yield (file_list, output_file) # @active_if(activate_10dmax_comput) @files(generate_parameters_10dmax) def lsasaf_lst_10dmax(input_file, output_file): # output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", "input_nodata": -32768 } raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # Dekad maximum for every 15min # NOTE: this product is compute w/o re-projection, i.e. on the 'native' mapset output_sprod = proc_lists.proc_add_subprod( "10d15min", "lsasaf-lst", final=False, descriptive_name='10day Maximum over 15 min', description='10day Maximum computed for every 15 min', frequency_id='e15minute', # Is it OK ??????? date_format='YYYYMMDDHHMM', masked=False, timeseries_role='', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, native_mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, native_mapset) def generate_parameters_10d15min(): # Look for all input files in input_dir, and sort them input_files = glob.glob(starting_files) dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) # Generate the list of 30 min time in a day timelist = [ datetime.time(h, m).strftime("%H%M") for h, m in itertools.product(xrange(0, 24), xrange(0, 60, 15)) ] for time in timelist: files_for_time = glob.glob(input_dir + os.path.sep + '*' + time + in_prod_ident) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) output_file = es_constants.processing_dir + output_subdir + os.path.sep + my_dekad_str + time + out_prod_ident for myfile in files_for_time: basename = os.path.basename(myfile) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(myfile) if len(file_list) > 8: yield (file_list, output_file) @active_if(activate_10d15min_comput) @files(generate_parameters_10d15min) def lsasaf_lst_10d15min(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw", "input_nodata":-32768} raster_image_math.do_max_image(**args) # Do also the house-keeping, by deleting the files older than 6 months number_months_keep = 6 remove_old_files(prod, starting_sprod, version, native_mapset, 'Ingest', number_months_keep) # ---------------------------------------------------------------------------------------------------------------- # 10 day minimum (mm) # NOTE: this product is compute with re-projection, i.e. on the 'target' mapset output_sprod = proc_lists.proc_add_subprod( "10dmin", "lsasaf-et", final=False, descriptive_name='10day Minimum', description='10day minimum', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_10dmin = functions.set_path_filename_no_date( prod, "10dmin", target_mapset, version, ext) output_subdir_10dmin = functions.set_path_sub_directory( prod, "10dmin", 'Derived', version, target_mapset) # Define input files in_prod_10dmin = '10d15min' in_prod_ident_10dmin = functions.set_path_filename_no_date( prod, in_prod_10dmin, native_mapset, version, ext) input_dir_10dmin = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, in_prod_10dmin, 'Derived', version, native_mapset) starting_files_10dmin = input_dir_10dmin + "*" + in_prod_ident_10dmin formatter_in = "(?P<YYYYMMDD>[0-9]{8})[0-9]{4}" + in_prod_ident_10dmin formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir_10dmin + "{YYYYMMDD[0]}" + out_prod_ident_10dmin ] @follows(lsasaf_lst_10d15min) @active_if(activate_10dmin_comput) @collate(starting_files_10dmin, formatter(formatter_in), formatter_out) def lsasaf_lst_10dmin(input_file, output_file): output_file = functions.list_to_element(output_file) # Get the number of days of that dekad basename = os.path.basename(output_file) mydate = functions.get_date_from_path_filename(basename) functions.check_output_dir(os.path.dirname(output_file)) tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='', dir=es_constants.base_tmp_dir) tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file) args = { "input_file": input_file, "output_file": tmp_output_file, "output_format": 'GTIFF', "options": "compress=lzw", "input_nodata": -32768 } raster_image_math.do_min_image(**args) reproject_output(tmp_output_file, native_mapset, target_mapset) shutil.rmtree(tmpdir) # Do also the house-keeping, by deleting the files older than 6 months number_months_keep = 6 remove_old_files(prod, '10d15min', version, native_mapset, 'Ingest', number_months_keep) return proc_lists
def create_pipeline(prod, starting_sprod, native_mapset, target_mapset, version, starting_dates=None, proc_lists=None, day_time=None, logger=None): # Test flag (to save non-projected cumulated products) test_mode = False # Create Logger # logger.fatal('Version 13.06.2017 !!!!!!!!!!!!!!!!!!!!!!!!!!') # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all ON activate_1dcum_comput = 1 activate_10dcum_comput = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files ('mpe' subproduct) in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, native_mapset, version, ext) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, native_mapset) # ---------------------------------------------------------------------------------------------------------------- # 1dcum # Daily cumulate of the 15 min MPE, re-projected on target mapset output_sprod = proc_lists.proc_add_subprod("1dmax", "msg-mpe", final=False, descriptive_name='1d Cumulate', description='Daily Cumulate', frequency_id='e1day', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) output_sprod = '1dcum' out_prod_ident_1dcum = functions.set_path_filename_no_date( prod, output_sprod, target_mapset, version, ext) output_subdir_1dcum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, target_mapset) # Use a specific function, to skip the current day def generate_parameters_1dcum(): # Look for all input files in input_dir, and sort them if starting_dates is not None: input_files = [] for my_date in starting_dates: input_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident input_files = glob.glob(starting_files) logger.debug("starting_files %s" % input_files) day_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] if mydate_yyyymmdd not in day_list: day_list.append(mydate_yyyymmdd) day_list = sorted(day_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for myday in day_list: # Exclude the current day if myday != today_str: file_list = [] for input_file in input_files: basename = os.path.basename(input_file) # Date is in format YYYYMMDDhhmm mydate_yyyymmdd = functions.get_date_from_path_filename( basename) if day_time is None: # Append files for myday if mydate_yyyymmdd[0:8] == myday: file_list.append(input_file) else: # Append files in time range myday+hhmm |-| (myday+1)+ hhmm if int(mydate_yyyymmdd) >= int(myday) * 10000 + int( day_time) and int(mydate_yyyymmdd) < ( int(myday) + 1) * 10000 + int(day_time): file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_1dcum + os.path.sep + str( (int(myday)) * 10000 + int(day_time)) + out_prod_ident_1dcum file_list = sorted(file_list) # Check here the number of missing files (for optimization) if len(file_list) > 86: yield (file_list, output_file) # @active_if(activate_1dcum_comput) @files(generate_parameters_1dcum) def msg_mpe_1dcum(input_file, output_file): # output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='', dir=es_constants.base_tmp_dir) tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file) # Divide by 10 to pass from 0.01 to 0.1 as scale factor for 1d cum factor = 0.1 args = { "input_file": input_file, "output_file": tmp_output_file, "output_format": 'GTIFF', "options": "compress=lzw", "scale_factor": factor, "input_nodata": -32768 } raster_image_math.do_cumulate(**args) reproject_output(tmp_output_file, native_mapset, target_mapset) # Copy the non-reprojected file for validation, only in test_mode if test_mode: msg_proj_dir = es_constants.processing_dir + functions.set_path_sub_directory( prod, '1dcum', 'Derived', version, native_mapset) functions.check_output_dir(msg_proj_dir) shutil.copy(tmp_output_file, msg_proj_dir + os.path.sep) # Copy the non-reprojected file for validation, only in test_mode shutil.rmtree(tmpdir) # ---------------------------------------------------------------------------------------------------------------- # 10 day Cumulate (mm) output_sprod = proc_lists.proc_add_subprod( "10dcum", "msg-mpe", final=False, descriptive_name='10day Cumulate', description='10day Cumulate in mm', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_10dcum = functions.set_path_filename_no_date( prod, "10dcum", target_mapset, version, ext) output_subdir_10dcum = functions.set_path_sub_directory( prod, "10dcum", 'Derived', version, target_mapset) in_prod_10dcum = '1dcum' in_prod_ident_10dcum = functions.set_path_filename_no_date( prod, in_prod_10dcum, target_mapset, version, ext) input_dir_10dcum = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, in_prod_10dcum, 'Derived', version, target_mapset) starting_files_10dcum = input_dir_10dcum + "*" + in_prod_ident_10dcum # Define input files def generate_parameters_10dcum(): # Look for all input files in input_dir, and sort them input_files = glob.glob(starting_files_10dcum) dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_10dcum + os.path.sep + my_dekad_str + out_prod_ident_10dcum yield (file_list, output_file) @follows(msg_mpe_1dcum) @active_if(activate_10dcum_comput) @files(generate_parameters_10dcum) def msg_mpe_10dcum(input_file, output_file): if len(input_file) > 8: output_file = functions.list_to_element(output_file) # Get the number of days of that dekad basename = os.path.basename(output_file) mydate = functions.get_date_from_path_filename(basename) nbr_days_dekad = functions.day_per_dekad(mydate) factor = 1.0 functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", "scale_factor": factor, "input_nodata": -32768 } raster_image_math.do_cumulate(**args) else: logger.warning( 'More than 2 files missing for output {0}: Skip'.format( os.path.basename(output_file))) return proc_lists
def create_pipeline(prod, starting_sprod, native_mapset, version, starting_dates=None, proc_lists=None, logger=None, mapset=None): # Definitions start_season = '0901' end_season = '0421' agriculture_mask = '/data/temp/AGRIC_MASK.tif' # Manage mapset if mapset is None: mapset = native_mapset # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all off activate_seas_cum_comput = 1 # season cumulation activate_cum_comput = 1 # season cumulation es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, native_mapset, version, ext) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, native_mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = glob.glob(input_dir + "*" + in_prod_ident) # --------------------------------------------------------------------- # 3.a NDVI linearx2 Season Cumulation masked using Crop Mask # --------------------------------------------------------------------- # Define output subproduct out_sub_prod_name = 'seas-cum-of-' + starting_sprod output_sprod_group = proc_lists.proc_add_subprod_group("seas_cum_prods") output_sprod = proc_lists.proc_add_subprod( out_sub_prod_name, "seas_cum_prods", final=True, descriptive_name='Season Cumulation for ' + out_sub_prod_name, description='Season Cumulation for ' + out_sub_prod_name, frequency_id='e1year', date_format='YYYYMMDD', masked=True, timeseries_role='', active_default=True) # Generate prod_identifier (_fewsnet-rfe_seas-cum-of-10d_FEWSNET-Africa-8km_2.0.tif) ad subdir prod_ident_seas_cum = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) subdir_ident_seas_cum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_seas_cum(): starting_files.sort() # Convert from string to in (for comparison) dekad_start = int(start_season) dekad_end = int(end_season) # Loop over all input files for file_t0 in starting_files: # Get current date (in format '19980901') date_t0 = functions.get_date_from_path_full(file_t0) # Extract from date-string the dekad/year as integer dekad_t0 = int(date_t0[4:]) year2 = int(date_t0[0:4]) # Check if season goes across two years -> define year1/2 if dekad_start < dekad_end: if dekad_t0 >= dekad_start and dekad_t0 <= dekad_end: year1 = year2 else: if dekad_t0 > dekad_start or dekad_t0 <= dekad_end: year1 = year2 - 1 # Detect the end of the season and trigger processing if dekad_t0 == dekad_end: # Define output filename output_file = es2_data_dir + subdir_ident_seas_cum + str( year2) + end_season + prod_ident_seas_cum # Get list of dates from start of season to end of season list_dates = proc_functions.get_list_dates_for_dataset( prod, starting_sprod, version, start_date=str(year1) + start_season, end_date=str(year2) + end_season) input_files = [] missing_file = False for ldate in list_dates: # Append the file to list if it exists ... if os.path.isfile(input_dir + ldate + in_prod_ident): input_files.append(input_dir + ldate + in_prod_ident) # ... otherwise raise a warning and break else: logger.warning( 'Missing file for date {0}. Season not computed.'. format(ldate)) missing_file = True break if not missing_file: yield (input_files, output_file) @active_if(activate_seas_cum_comput) @files(generate_parameters_seas_cum) # Function to do actual computation from inputs to output def seas_cum(input_files, output_file): # Ensure out subdirectory exists functions.check_output_dir(os.path.dirname(output_file)) # If output_file it is a list, force to a string output_file = functions.list_to_element(output_file) # Prepare temporary working directory for intermediate results tmpdirpath = tempfile.mkdtemp() # Cumulated but not masked output tmp_output_file = tmpdirpath + os.path.sep + os.path.basename( output_file) # Temp mask in the final projection (mapset) tmp_reproj_file = tmpdirpath + os.path.sep + 'my_temp_reprojected_output.tif' # Call the function for cumulating args = { "input_file": input_files, "output_file": tmp_output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_cumulate(**args) # Create from the original mask a new one, by using raster_image_math.do_reprojection() # and save it as a temporary mask # raster_image_math.do_reproject(agriculture_mask, tmp_reproj_file, 'SPOTV-SADC-1km', mapset) raster_image_math.do_reproject(tmp_output_file, tmp_reproj_file, native_mapset, mapset) # Call the function for masking args = { "input_file": tmp_reproj_file, "mask_file": agriculture_mask, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", "mask_value": 0, "out_value": 0 } raster_image_math.do_mask_image(**args) # Remove temp directory shutil.rmtree(tmpdirpath) # --------------------------------------------------------------------- # 3.a Season Cumulation fron start of season to current dekad till end of season # --------------------------------------------------------------------- # Define output subproduct out_sub_prod_name = 'cum-of-' + starting_sprod output_sprod_group = proc_lists.proc_add_subprod_group("cum_prods") output_sprod = proc_lists.proc_add_subprod( out_sub_prod_name, "cum_prods", final=True, descriptive_name='Cumulation for ' + out_sub_prod_name, description='Cumulation for ' + out_sub_prod_name, frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) # Generate prod_identifier (_fewsnet-rfe_cum-of-10d_FEWSNET-Africa-8km_2.0.tif) ad subdir prod_ident_cum = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) subdir_ident_cum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_cum(): starting_files.sort() # Convert from string to in (for comparison) dekad_start = int(start_season) dekad_end = int(end_season) # Loop over all input files for file_t0 in starting_files: # Get current date (in format '19980901') date_t0 = functions.get_date_from_path_full(file_t0) # Extract from date-string the dekad/year as integer dekad_t0 = int(date_t0[4:]) year_t0 = int(date_t0[0:4]) in_season = False # Check if season goes across two years -> define year1/2 if dekad_start < dekad_end: if dekad_t0 >= dekad_start and dekad_t0 <= dekad_end: year_sos = year_t0 in_season = True else: if dekad_t0 >= dekad_start: year_sos = year_t0 in_season = True if dekad_t0 <= dekad_end: year_sos = year_t0 - 1 in_season = True # Detect the end of the season and trigger processing if in_season: # Define output filename output_file = es2_data_dir + subdir_ident_cum + date_t0 + prod_ident_cum # Get list of dates from start of season to end of season list_dates = proc_functions.get_list_dates_for_dataset( prod, starting_sprod, version, start_date=str(year_sos) + start_season, end_date=date_t0) input_files = [] missing_file = False for ldate in list_dates: # Append the file to list if it exists ... if os.path.isfile(input_dir + ldate + in_prod_ident): input_files.append(input_dir + ldate + in_prod_ident) # ... otherwise raise a warning and break else: logger.warning( 'Missing file for date {0}. Season not computed.'. format(ldate)) missing_file = True break if not missing_file: yield (input_files, output_file) @active_if(activate_cum_comput) @files(generate_parameters_cum) # Function to do actual computation from inputs to output def cum(input_files, output_file): # Ensure out subdirectory exists functions.check_output_dir(os.path.dirname(output_file)) # If output_file it is a list, force to a string output_file = functions.list_to_element(output_file) # Prepare temporary working directory for intermediate results tmpdirpath = tempfile.mkdtemp() # Cumulated but not masked output tmp_output_file = tmpdirpath + os.path.sep + os.path.basename( output_file) # Call the function for cumulating args = { "input_file": input_files, "output_file": tmp_output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_cumulate(**args) # Create from the original mask a new one, by using raster_image_math.do_reprojection() # and save it as a temporary mask # raster_image_math.do_reproject(agriculture_mask, tmp_reproj_file, 'SPOTV-SADC-1km', mapset) raster_image_math.do_reproject(tmp_output_file, output_file, native_mapset, mapset) # Remove temp directory shutil.rmtree(tmpdirpath) return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, update_stats=False, nrt_products=True): # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all off activate_10danomalies_comput=0 # 10d anomalies activate_monthly_comput=0 # monthly cumulation activate_monanomalies_comput=0 # monthly anomalies activate_10dstats_comput=0 # 10d stats activate_monstats_comput=0 # 1mon stats # switch wrt groups - according to options if nrt_products: activate_10danomalies_comput=1 # 10d anomalies activate_monthly_comput=1 # monthly cumulation activate_monanomalies_comput=1 # monthly anomalies if update_stats: activate_10dstats_comput= 1 # 10d stats activate_monstats_comput=1 # 1mon stats # switch wrt single products: not to be changed !! activate_10davg_comput=1 activate_10dmin_comput=1 activate_10dmax_comput=1 activate_10ddiff_comput=1 activate_10dperc_comput=1 activate_10dnp_comput=1 activate_1moncum_comput=1 activate_1monavg_comput=1 activate_1monmin_comput=1 activate_1monmax_comput=1 activate_1mondiff_comput=1 activate_1monperc_comput=1 activate_1monnp_comput=1 es2_data_dir = es_constants.es2globals['processing_dir']+os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, version, ext) logger.debug('Base data directory is: %s' % es2_data_dir) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) logger.debug('Input data directory is: %s' % input_dir) if starting_dates is not None: starting_files = [] for my_date in starting_dates: starting_files.append(input_dir+my_date+in_prod_ident) else: starting_files=input_dir+"*"+in_prod_ident logger.debug('Starting files wild card is: %s' % starting_files) # --------------------------------------------------------------------- # Average output_sprod_group=proc_lists.proc_add_subprod_group("10dstats") output_sprod=proc_lists.proc_add_subprod("10davg", "10dstats", final=False, descriptive_name='10d Average', description='Average rainfall for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident] @active_if(activate_10dstats_comput, activate_10davg_comput) @collate(starting_files, formatter(formatter_in),formatter_out) def std_precip_10davg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Minimum output_sprod=proc_lists.proc_add_subprod("10dmin", "10dstats", final=False, descriptive_name='10d Minimum', description='Minimum rainfall for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident] @active_if(activate_10dstats_comput, activate_10dmin_comput) @collate(starting_files, formatter(formatter_in),formatter_out) def std_precip_10dmin(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # Maximum output_sprod=proc_lists.proc_add_subprod("10dmax", "10dstats", final=False, descriptive_name='10d Maximum', description='Maximum rainfall for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident] @active_if(activate_10dstats_comput, activate_10dmax_comput) @collate(starting_files, formatter(formatter_in),formatter_out) def std_precip_10dmax(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # 10dDiff output_sprod_group=proc_lists.proc_add_subprod_group("10anomalies") output_sprod=proc_lists.proc_add_subprod("10ddiff", "10anomalies", final=False, descriptive_name='10d Absolute Difference', description='10d Absolute Difference vs. LTA', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident ancillary_sprod = "10davg" ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived',version, mapset) ancillary_input="{subpath[0][5]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident @follows(std_precip_10davg) @active_if(activate_10danomalies_comput, activate_10ddiff_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_precip_10ddiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_oper_subtraction(**args) # --------------------------------------------------------------------- # 10dperc output_sprod=proc_lists.proc_add_subprod("10dperc", "10anomalies", final=False, descriptive_name='10d Percent Difference', description='10d Percent Difference vs. LTA', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident ancillary_sprod = "10davg" ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input="{subpath[0][5]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident @follows(std_precip_10davg) @active_if(activate_10danomalies_comput, activate_10dperc_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_precip_10dperc(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_compute_perc_diff_vs_avg(**args) # --------------------------------------------------------------------- # 10dnp output_sprod=proc_lists.proc_add_subprod("10dnp", "10anomalies", final=False, descriptive_name='10d Normalized Anomaly', description='10d Normalized Anomaly', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # Starting files + min + max formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident ancillary_sprod_1 = "10dmin" ancillary_sprod_ident_1 = functions.set_path_filename_no_date(prod, ancillary_sprod_1, mapset, version, ext) ancillary_subdir_1 = functions.set_path_sub_directory(prod, ancillary_sprod_1, 'Derived',version, mapset) ancillary_input_1="{subpath[0][5]}"+os.path.sep+ancillary_subdir_1+"{MMDD[0]}"+ancillary_sprod_ident_1 ancillary_sprod_2 = "10dmax" ancillary_sprod_ident_2 = functions.set_path_filename_no_date(prod, ancillary_sprod_2, mapset, version, ext) ancillary_subdir_2 = functions.set_path_sub_directory(prod, ancillary_sprod_2, 'Derived',version, mapset) ancillary_input_2="{subpath[0][5]}"+os.path.sep+ancillary_subdir_2+"{MMDD[0]}"+ancillary_sprod_ident_2 @follows(std_precip_10dmin, std_precip_10dmax) @active_if(activate_10danomalies_comput, activate_10dnp_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input_1, ancillary_input_2), formatter_out) def std_precip_10dnp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file[0], "min_file": input_file[1],"max_file": input_file[2], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_make_vci(**args) # --------------------------------------------------------------------- # 1moncum output_sprod_group=proc_lists.proc_add_subprod_group("monthly") output_sprod=proc_lists.proc_add_subprod("1moncum", "monthly", final=False, descriptive_name='Monthly Cumulate', description='Monthly Cumulate Precipitation', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='1mon', active_default=True) out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # inputs: files from same months formatter_in="(?P<YYYYMM>[0-9]{6})(?P<DD>[0-9]{2})"+in_prod_ident formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYYMM[0]}"+'01'+out_prod_ident # @follows(std_precip_10davg) @active_if(activate_monthly_comput, activate_1moncum_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_precip_1moncum(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file,"output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_cumulate(**args) # --------------------------------------------------------------------- # Monthly Average new_input_subprod='1moncum' in_prod_ident= functions.set_path_filename_no_date(prod, new_input_subprod, mapset, version, ext) output_sprod_group=proc_lists.proc_add_subprod_group("monstat") output_sprod=proc_lists.proc_add_subprod("1monavg", "monstat", final=False, descriptive_name='Monthly Average', description='Monthly Average Precipitation', frequency_id='e1month', date_format='MMDD', masked=False, timeseries_role='1mon', active_default=True) out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident] @follows(std_precip_1moncum) @active_if(activate_monstats_comput, activate_1monavg_comput) @collate(std_precip_1moncum, formatter(formatter_in),formatter_out) def std_precip_1monavg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Minimum output_sprod=proc_lists.proc_add_subprod("1monmin", "monstat",final=False, descriptive_name='Monthly Minimum', description='Monthly Minimum Precipitation', frequency_id='e1month', date_format='MMDD', masked=False, timeseries_role='1mon', active_default=True) out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident] @follows(std_precip_1moncum) @active_if(activate_monstats_comput, activate_1monmin_comput) @collate(std_precip_1moncum, formatter(formatter_in),formatter_out) def std_precip_1monmin(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # Monthly Maximum output_sprod=proc_lists.proc_add_subprod("1monmax", "monstat",final=False, descriptive_name='Monthly Maximum', description='Monthly Maximum Precipitation', frequency_id='e1month', date_format='MMDD', masked=False, timeseries_role='1mon', active_default=True) out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) reg_ex_in="[0-9]{4}([0-9]{4})"+in_prod_ident formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident] @follows(std_precip_1moncum) @active_if(activate_monstats_comput, activate_1monmax_comput) @collate(std_precip_1moncum, formatter(formatter_in),formatter_out) def std_precip_1monmax(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # 1monDiff output_sprod_group=proc_lists.proc_add_subprod_group("monanomalies") output_sprod=proc_lists.proc_add_subprod("1mondiff", "monanomalies", final=False, descriptive_name='Monthly Absolute Difference', description='Monthly Absolute Difference Precipitation', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='1mon', active_default=True) out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # inputs # Starting files + avg formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident ancillary_sprod = "1monavg" ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input="{subpath[0][5]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident @follows(std_precip_1monavg) @active_if(activate_monanomalies_comput, activate_1mondiff_comput) @transform(std_precip_1moncum, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_precip_1mondiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_oper_subtraction(**args) # --------------------------------------------------------------------- # 1monperc output_sprod=proc_lists.proc_add_subprod("1monperc", "monanomalies", final=False, descriptive_name='Monthly Percent Difference', description='Monthly Percent Difference Precipitation', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='1mon', active_default=True) out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # inputs # Starting files + avg formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident ancillary_sprod = "1monavg" ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived',version, mapset) ancillary_input="{subpath[0][5]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident @follows(std_precip_1monavg) @active_if(activate_monanomalies_comput, activate_1monperc_comput) @transform(std_precip_1moncum, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_precip_1monperc(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_compute_perc_diff_vs_avg(**args) # --------------------------------------------------------------------- # 1monnp output_sprod=proc_lists.proc_add_subprod("1monnp", "monanomalies", final=False, descriptive_name='Monthly Normalized Anomaly', description='Monthly Normalized Anomaly Precipitation', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='1mon', active_default=True) out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # Starting files + min + max formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident ancillary_sprod_1 = "1monmin" ancillary_sprod_ident_1 = functions.set_path_filename_no_date(prod, ancillary_sprod_1, mapset, version, ext) ancillary_subdir_1 = functions.set_path_sub_directory(prod, ancillary_sprod_1, 'Derived',version, mapset) ancillary_input_1="{subpath[0][5]}"+os.path.sep+ancillary_subdir_1+"{MMDD[0]}"+ancillary_sprod_ident_1 ancillary_sprod_2 = "1monmax" ancillary_sprod_ident_2 = functions.set_path_filename_no_date(prod, ancillary_sprod_2, mapset, version, ext) ancillary_subdir_2 = functions.set_path_sub_directory(prod, ancillary_sprod_2, 'Derived',version, mapset) ancillary_input_2="{subpath[0][5]}"+os.path.sep+ancillary_subdir_2+"{MMDD[0]}"+ancillary_sprod_ident_2 @follows(std_precip_1monmin, std_precip_1monmax) @active_if(activate_monanomalies_comput, activate_1monnp_comput) @transform(std_precip_1moncum, formatter(formatter_in), add_inputs(ancillary_input_1, ancillary_input_2), formatter_out) def std_precip_1monnp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file[0], "min_file": input_file[1],"max_file": input_file[2], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_make_vci(**args) return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, logger=None): my_date = None # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # 8d cumul activate_8dayavg_comput = 1 # monthly activate_monavg_comput = 1 activate_monclim_comput = 0 activate_monanom_comput = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir + functions.set_path_sub_directory( prod, starting_sprod, 'Ingest', version, mapset) if my_date is not None: starting_files = input_dir + my_date + "*" + in_prod_ident else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # 8-days Average output_sprod_group_8day = proc_lists.proc_add_subprod_group("8days") output_sprod_8day = proc_lists.proc_add_subprod( "8daysavg", "8days", final=False, descriptive_name='8Day average', description='8Day average', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_8day = functions.set_path_filename_no_date( prod, output_sprod_8day, mapset, version, ext) output_subdir_8day = functions.set_path_sub_directory( prod, output_sprod_8day, 'Derived', version, mapset) def generate_parameters_8days(): years_periods_list = [] # Look for all input files in input_dir input_files = glob.glob(starting_files) # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydate_year = str(mydate)[0:4] period_nbr = functions.conv_date_2_8days(mydate_yyyymmdd) if (mydate_year, period_nbr) not in years_periods_list: years_periods_list.append((mydate_year, period_nbr)) periods_sorted = sorted(years_periods_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') year_now = today.strftime('%Y') period_now = functions.conv_date_2_8days(today_str) # Generate the list of 30 min time in a day for year, period in periods_sorted: # Exclude the current dekad if period != period_now or year != year_now: file_list = [] jdoy_period = "{0:03d}".format(1 + 8 * (int(period) - 1)) mmdd_period = functions.conv_date_yyyydoy_2_yyyymmdd( year + jdoy_period) output_file = es_constants.processing_dir + output_subdir_8day + os.path.sep + mmdd_period + out_prod_ident_8day for myfile in input_files: basename = os.path.basename(myfile) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydate_year = mydate_yyyymmdd[0:4] period_nbr = functions.conv_date_2_8days( mydate_yyyymmdd[0:8]) if period_nbr == period and mydate_year == year: file_list.append(myfile) # Special case of last period of the year: add few days of next year if period == 46: next_year = "{0:04d}".format(int(year) + 1) if calendar.isleap(int(year)): add_days = ('0101', '0102', '0103') else: add_days = ('0101', '0102', '0103', '0104') for day in add_days: date = next_year + day matches = [ x for x in input_files if fnmatch.fnmatch(x, '*{0}*'.format(date)) ] # Fixes ES2-35 (see YouTrack) if len(matches) > 0: file_list.append(matches[0]) yield (sorted(file_list), output_file) @active_if(activate_8dayavg_comput) @files(generate_parameters_8days) def modis_8dayavg(input_file, output_file): output_file = functions.list_to_element(output_file) out_filename = os.path.basename(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Average for a given month output_sprod_group = proc_lists.proc_add_subprod_group("monstats") output_sprod = proc_lists.proc_add_subprod( "monavg", "monstats", final=False, descriptive_name='Monthly average', description='Monthly average', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" + '01' + out_prod_ident @active_if(activate_monavg_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def modis_monavg(input_file, output_file): output_file = functions.list_to_element(output_file) out_filename = os.path.basename(output_file) functions.check_output_dir(os.path.dirname(output_file)) str_date = out_filename[0:6] today = datetime.date.today() today_yyyymm = today.strftime('%Y%m') #expected_ndays=functions.get_number_days_month(str_date) #current_ndays=len(input_file) if str_date == today_yyyymm: logger.info( 'Do not perform computation for current month {0}. Skip'. format(str_date)) else: args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Climatology for all years output_sprod = proc_lists.proc_add_subprod( "monclim", "monstats", final=False, descriptive_name='Monthly climatology', description='Monthly climatology', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) new_input_subprod = 'monavg' new_in_prod_ident = functions.set_path_filename_no_date( prod, new_input_subprod, mapset, version, ext) new_input_dir = es2_data_dir + functions.set_path_sub_directory( prod, new_input_subprod, 'Derived', version, mapset) new_starting_files = new_input_dir + "*" + new_in_prod_ident out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + new_in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident @active_if(activate_monclim_comput) @collate(new_starting_files, formatter(formatter_in), formatter_out) def modis_chla_monclim(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw"} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Anomaly for a given monthly output_sprod = proc_lists.proc_add_subprod( "monanom", "monstats", final=False, descriptive_name='Monthly anomaly', description='Monthly anomaly', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + new_in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "monclim" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @active_if(activate_monanom_comput) @transform(new_starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def modis_chla_mondiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_oper_subtraction(**args) return proc_lists
def processing_merge(pipeline_run_level=0, pipeline_printout_level=0, input_products='', output_product='', mapset='', logfile=None): if logfile: spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_merge') # Dummy return arguments proc_lists = functions.ProcLists() list_subprods = proc_lists.list_subprods list_subprod_groups = proc_lists.list_subprod_groups es2_data_dir = es_constants.processing_dir+os.path.sep # Do some checks on the integrity of the inputs # Manage output_product data out_product_code = output_product[0].productcode out_sub_product_code = output_product[0].subproductcode out_version = output_product[0].version out_mapset = output_product[0].mapsetcode out_subdir = functions.set_path_sub_directory(out_product_code, out_sub_product_code,'Ingest', out_version, out_mapset) out_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, out_version, ext) out_dir = es2_data_dir + out_subdir # Check the output product directory functions.check_output_dir(out_dir) # Fill the processing list -> some fields to be taken by innput products output_sprod_group=proc_lists.proc_add_subprod_group("merged") output_sprod=proc_lists.proc_add_subprod(out_sub_product_code, "merged", final=False, descriptive_name='undefined', description='undefined', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) # Loop over the input products: for input in input_products: # Extract info from input product product_code = input.productcode sub_product_code = input.subproductcode version = input.version start_date = input.start_date end_date = input.end_date product_info = querydb.get_product_out_info_connect(productcode=product_code, subproductcode=sub_product_code, version=version) prod_type = product_info[0].product_type in_subdir = functions.set_path_sub_directory(product_code, sub_product_code, prod_type, version, out_mapset) in_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, version, ext) # Create the list of dates -> returns empty if start==end==None list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version, start_date=start_date, end_date=end_date) # If list_dates == None, look at all existing files if list_dates is None: print ('To be Done !!!') # Otherwise, build list of files from list of dates else: for my_date in list_dates: in_file_path = es2_data_dir + in_subdir + my_date + in_prod_ident out_file_path = out_dir+my_date+out_prod_ident # Create the link status = functions.create_sym_link(in_file_path, out_file_path, force=False) if status == 0 and logfile: spec_logger.info("Merged file %s created" % out_file_path) return list_subprods, list_subprod_groups
def do_standard_deviation(self): # # --------------------------------------------------------------------- # # standard deviation if self.frequency == '10d': output_sprod = self.proc_lists.proc_add_subprod( "10dstd", "10dstats", final=False, descriptive_name='10d Standard deviation ' + self.subproduct_code, description='Standard deviation ' + self.subproduct_code, frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role=self.starting_sprod, # '10d', # display_index=5, active_default=True) prod_ident_10dstd = functions.set_path_filename_no_date( self.prod, output_sprod, self.mapset, self.version, self.ext) subdir_10dstd = functions.set_path_sub_directory( self.prod, output_sprod, 'Derived', self.version, self.mapset) self.formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + self.in_prod_ident self.formatter_out = [ "{subpath[0][5]}" + os.path.sep + subdir_10dstd + "{MMDD[0]}" + prod_ident_10dstd ] ancillary_sprod = "10davg" ancillary_sprod_ident = functions.set_path_filename_no_date( self.prod, ancillary_sprod, self.mapset, self.version, self.ext) ancillary_subdir = functions.set_path_sub_directory( self.prod, ancillary_sprod, 'Derived', self.version, self.mapset) self.ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident else: output_sprod = self.proc_lists.proc_add_subprod( "1monstd", "monthly_stats", final=False, descriptive_name='Monthly Standard deviation ' + self.subproduct_code, description='Monthly Standard deviation ' + self.subproduct_code, frequency_id='e1month', date_format='MMDD', masked=False, timeseries_role=self.input_subprod_monthly, # '10d', # display_index=115, active_default=True) prod_ident_1mondev = functions.set_path_filename_no_date( self.prod, output_sprod, self.mapset, self.version, self.ext) subdir_1mondev = functions.set_path_sub_directory( self.prod, output_sprod, 'Derived', self.version, self.mapset) self.formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + self.in_prod_ident_monthly self.formatter_out = "{subpath[0][5]}" + os.path.sep + subdir_1mondev + "{MMDD[0]}" + prod_ident_1mondev ancillary_sprod = "1monavg" ancillary_sprod_ident = functions.set_path_filename_no_date( self.prod, ancillary_sprod, self.mapset, self.version, self.ext) ancillary_subdir = functions.set_path_sub_directory( self.prod, ancillary_sprod, 'Derived', self.version, self.mapset) self.ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None): # --------------------------------------------------------------------- # Create lists to store definition of the derived products, and their # groups # --------------------------------------------------------------------- if proc_lists is None: proc_lists = functions.ProcLists() # --------------------------------------------------------------------- # Define and assign the flags to control the individual derived products # and the groups. NOT to be changed by the User # --------------------------------------------------------------------- # Set DEFAULTS: all off activate_cumul_comput = 1 # cumulated products activate_spi_comput = 1 # spi indicators # Set DEFAULTS: all off activate_cumul_3mon_comput = 1 # cumulated product 3mon activate_cumul_6mon_comput = 1 # cumulated product 6mon activate_cumul_1year_comput = 1 # cumulated product 1year activate_spi_1mon_comput = 1 # spi indicator 1mon activate_spi_3mon_comput = 1 # spi indicator 3mon activate_spi_6mon_comput = 1 # spi indicator 6mon activate_spi_1year_comput = 1 # spi indicator 1year # switch wrt groups - according to options es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files from the starting_sprod and starting_dates arguments # --------------------------------------------------------------------- in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) #logger.debug('Base data directory is: %s' % es2_data_dir) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Derived', version, mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: if os.path.isfile(input_dir + my_date + in_prod_ident): starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident # Look for all input files in input_dir, and sort them if starting_dates is not None: input_files = starting_files else: input_files = glob.glob(starting_files) # --------------------------------------------------------------------- # Cumulated products - 3mon # --------------------------------------------------------------------- output_sprod_group = proc_lists.proc_add_subprod_group("cumul") output_sprod = proc_lists.proc_add_subprod( "3mon", "cumul", final=False, descriptive_name='3-monthly Precipitation', description='Precipitation for 3 months', frequency_id='e3month', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_3moncum = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_3moncum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_3moncum(): # Number of months to consider n_mon = 3 dates_list = [] # Extract and sort all dates for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] dates_list.append(mydate_yyyymmdd) dates_list = sorted(dates_list) # loop from the 'n_mon'-1 date to the last date - this is the period end-limit for date_index in range(n_mon - 1, len(dates_list) - 1): mydate = dates_list[date_index] prev_date = dates_list[date_index - n_mon + 1] file_list = [] # Get month-date and m_1 = datetime.date(int(mydate[0:4]), int(mydate[4:6]), 1) m_2 = datetime.date(int(prev_date[0:4]), int(prev_date[4:6]), 1) delta = m_1 - m_2 # Check there are no missing month, i.e. tot_delta < 155 days if delta.days <= (31 * (n_mon - 1)): for curr_index in range(0, n_mon): curr_date = dates_list[date_index - curr_index] if os.path.isfile(input_dir + curr_date + in_prod_ident): file_list.append(input_dir + curr_date + in_prod_ident) output_file = es_constants.processing_dir + output_subdir_3moncum + os.path.sep + mydate + out_prod_ident_3moncum yield (file_list, output_file) else: print( 'At least 1 month is missing for period ending {0}'.format( mydate)) @active_if(activate_cumul_3mon_comput) @files(generate_parameters_3moncum) def std_precip_3moncum(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_cumulate(**args) # # --------------------------------------------------------------------- # # Cumulated products - 6mon # # --------------------------------------------------------------------- # # output_sprod_group=proc_lists.proc_add_subprod_group("cumul") # output_sprod=proc_lists.proc_add_subprod("6mon", "cumul", final=False, # descriptive_name='3-monthly Precipitation', # description='Precipitation for 3 months', # frequency_id='e3month', # date_format='YYYYMMDD', # masked=False, # timeseries_role='', # active_default=True) # # out_prod_ident_6moncum = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) # output_subdir_6moncum = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # # def generate_parameters_6moncum(): # # # Number of months to consider # n_mon = 6 # dates_list = [] # # # Extract and sort all dates # for input_file in input_files: # basename=os.path.basename(input_file) # mydate=functions.get_date_from_path_filename(basename) # mydate_yyyymmdd=str(mydate)[0:8] # dates_list.append(mydate_yyyymmdd) # # dates_list = sorted(dates_list) # # loop from the 'n_mon'-1 date to the last date - this is the period end-limit # for date_index in range(n_mon-1,len(dates_list)-1): # # mydate = dates_list[date_index] # prev_date = dates_list[date_index-n_mon+1] # file_list = [] # # Get month-date and # m_1 = datetime.date(int(mydate[0:4]),int(mydate[4:6]),1) # m_2 = datetime.date(int(prev_date[0:4]),int(prev_date[4:6]),1) # delta = m_1 - m_2 # # Check there are no missing month, i.e. tot_delta < 155 days # if delta.days <=(31*(n_mon-1)): # for curr_index in range(0,n_mon): # curr_date = dates_list[date_index-curr_index] # if os.path.isfile(input_dir+curr_date+in_prod_ident): # file_list.append(input_dir+curr_date+in_prod_ident) # # output_file=es_constants.processing_dir+output_subdir_6moncum+os.path.sep+mydate+out_prod_ident_6moncum # yield (file_list, output_file) # else: # print 'At least 1 month is missing for period ending {0}'.format(mydate) # # @active_if(activate_cumul_6mon_comput) # @files(generate_parameters_6moncum) # def std_precip_6moncum(input_file, output_file): # # output_file = functions.list_to_element(output_file) # functions.check_output_dir(os.path.dirname(output_file)) # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} # raster_image_math.do_cumulate(**args) # End of pipeline definition return proc_lists
def do_normalized_anomaly(self): # --------------------------------------------------------------------- # Normalized Anomaly if self.frequency == '10d': output_sprod = self.proc_lists.proc_add_subprod( "10dna", "10anomalies", final=False, descriptive_name='10d Normalized Anomaly ' + self.subproduct_code, description='10d Normalized Anomaly ' + self.subproduct_code, frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role=self.starting_sprod, #'10d', # display_index=9, active_default=True) out_prod_ident = functions.set_path_filename_no_date( self.prod, output_sprod, self.mapset, self.version, self.ext) output_subdir = functions.set_path_sub_directory( self.prod, output_sprod, 'Derived', self.version, self.mapset) # Starting files + min + max self.formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + self.in_prod_ident self.formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod_1 = "10dmin" ancillary_sprod_ident_1 = functions.set_path_filename_no_date( self.prod, ancillary_sprod_1, self.mapset, self.version, self.ext) ancillary_subdir_1 = functions.set_path_sub_directory( self.prod, ancillary_sprod_1, 'Derived', self.version, self.mapset) self.ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1 ancillary_sprod_2 = "10dmax" ancillary_sprod_ident_2 = functions.set_path_filename_no_date( self.prod, ancillary_sprod_2, self.mapset, self.version, self.ext) ancillary_subdir_2 = functions.set_path_sub_directory( self.prod, ancillary_sprod_2, 'Derived', self.version, self.mapset) self.ancillary_input_2 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_2 + "{MMDD[0]}" + ancillary_sprod_ident_2 else: output_sprod = self.proc_lists.proc_add_subprod( "1monna", "monanomalies", final=False, descriptive_name='Monthly Normalized Anomaly ' + self.subproduct_code, description='Monthly Normalized Anomaly ' + self.subproduct_code, frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role=self.input_subprod_monthly, # '10d', # display_index=119, active_default=True) out_prod_ident = functions.set_path_filename_no_date( self.prod, output_sprod, self.mapset, self.version, self.ext) output_subdir = functions.set_path_sub_directory( self.prod, output_sprod, 'Derived', self.version, self.mapset) # Starting files + min + max self.formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + self.in_prod_ident_monthly self.formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod_1 = "1monmin" ancillary_sprod_ident_1 = functions.set_path_filename_no_date( self.prod, ancillary_sprod_1, self.mapset, self.version, self.ext) ancillary_subdir_1 = functions.set_path_sub_directory( self.prod, ancillary_sprod_1, 'Derived', self.version, self.mapset) self.ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1 ancillary_sprod_2 = "1monmax" ancillary_sprod_ident_2 = functions.set_path_filename_no_date( self.prod, ancillary_sprod_2, self.mapset, self.version, self.ext) ancillary_subdir_2 = functions.set_path_sub_directory( self.prod, ancillary_sprod_2, 'Derived', self.version, self.mapset) self.ancillary_input_2 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_2 + "{MMDD[0]}" + ancillary_sprod_ident_2
def push_data_ftp(dry_run=False, user=None, psw=None, url=None, trg_dir=None, masked=True): # Synchronized data towards an ftp server (only for JRC) # It replaces, since the new srv-ies-ftp.jrc.it ftp is set, the bash script: mirror_to_ftp.sh # Configuration: it looks at all 'non-masked' products and pushes them # For the mapsets, find what is in the filesystem, and pushes only the 'largest' # It uses a command like: # lftp -e "mirror -RLe /data/processing/vgt-ndvi/sv2-pv2.1/SPOTV-Africa-1km/derived/10dmax-linearx2/ # /narma/eStation_2.0/processing/vgt-ndvi/sv2-pv2.1/SPOTV-Africa-1km/derived/10dmax-linearx2/;exit" # -u narma:JRCVRw2960 sftp://srv-ies-ftp.jrc.it"" >> /eStation2/log/push_data_ftp.log # spec_logger = log.my_logger('apps.es2system.push_data_ftp') try: from config import server_ftp except: logger.warning('Configuration file for ftp sync not found. Exit') return 1 if user is None: user = server_ftp.server['user'] if psw is None: psw = server_ftp.server['psw'] if url is None: url = server_ftp.server['url'] if trg_dir is None: trg_dir = server_ftp.server['data_dir'] # Create an ad-hoc file for the lftp command output (beside the standard logger) logfile = es_constants.es2globals['log_dir'] + 'push_data_ftp.log' message = time.strftime( "%Y-%m-%d %H:%M") + ' INFO: Running the ftp sync now ... \n' logger.debug("Entering routine %s" % 'push_data_ftp') # Loop over 'not-masked' products products = querydb.get_products(masked=False) # produts=products[21:23] # test a subset for row in products: prod_dict = functions.row2dict(row) productcode = prod_dict['productcode'] version = prod_dict['version'] spec_logger.info('Working on product {}/{}'.format( productcode, version)) # TEMP - For testing only # if productcode!='vgt-ndvi' or version !='sv2-pv2.2': # continue # Check it if is in the list of 'exclusions' defined in ./config/server_ftp.py key = '{}/{}'.format(productcode, version) skip = False if key in server_ftp.exclusions: skip = True logger.debug('Do not sync for {}/{}'.format(productcode, version)) p = Product(product_code=productcode, version=version) all_prod_mapsets = p.mapsets all_prod_subproducts = p.subproducts # Check there is at least one mapset and one subproduct if all_prod_mapsets.__len__() > 0 and all_prod_subproducts.__len__( ) > 0 and not skip: # In case of several mapsets, check if there is a 'larger' one if len(all_prod_mapsets) > 1: mapset_to_use = [] for my_mapset in all_prod_mapsets: mapset_info = querydb.get_mapset(mapsetcode=my_mapset, allrecs=False) if hasattr(mapset_info, "mapsetcode"): my_mapobj = MapSet() my_mapobj.assigndb(my_mapset) larger_mapset = my_mapobj.get_larger_mapset() if larger_mapset is not None: if larger_mapset not in mapset_to_use: mapset_to_use.append(larger_mapset) else: if my_mapset not in mapset_to_use: mapset_to_use.append(my_mapset) else: mapset_to_use = all_prod_mapsets # Loop over existing mapset for mapset in mapset_to_use: all_mapset_datasets = p.get_subproducts(mapset=mapset) # Loop over existing subproducts for subproductcode in all_mapset_datasets: # Get info - and ONLY for NOT masked products dataset_info = querydb.get_subproduct( productcode=productcode, version=version, subproductcode=subproductcode, masked=masked) # -> TRUE means only NOT masked sprods if dataset_info is not None: dataset_dict = functions.row2dict(dataset_info) dataset_dict['mapsetcode'] = mapset logger.debug('Working on {}/{}/{}/{}'.format( productcode, version, mapset, subproductcode)) subdir = functions.set_path_sub_directory( productcode, subproductcode, dataset_dict['product_type'], version, mapset) source = data_dir + subdir target = trg_dir + subdir # command = 'lftp -e "mirror -RLe {} {};exit" -u {}:{} {}"" >> {}'.format(source,target,user,psw,url,logfile) command = 'lftp -e "mirror -RLe {} {};exit" -u {}:{} {}"" >> /dev/null'.format( source, target, user, psw, url) logger.debug("Executing %s" % command) spec_logger.info( 'Working on mapset/subproduct {}/{} \n'.format( mapset, subproductcode)) # return try: status = os.system(command) if status: logger.error("Error in executing %s" % command) spec_logger.error("Error in executing %s" % command) except: logger.error( 'Error in executing command: {}'.format( command)) spec_logger.error( 'Error in executing command: {}'.format( command))
def create_pipeline(starting_sprod): # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, version, ext) input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) starting_files = input_dir+"*"+in_prod_ident # Read input product nodata in_prod_info = querydb.get_product_out_info(productcode=prod, subproductcode=starting_sprod, version=version) product_info = functions.list_to_element(in_prod_info) in_nodata = product_info.nodata print in_nodata # --------------------------------------------------------------------- # Monthly Average for a given month output_sprod="monavg" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) formatter_in="(?P<YYYYMM>[0-9]{6})[0-9]{2}"+in_prod_ident formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYYMM[0]}"+out_prod_ident] @active_if(activate_monavg_comput) @collate(starting_files, formatter(formatter_in),formatter_out) def modis_chla_monavg(input_file, output_file): output_file = functions.list_to_element(output_file) out_filename=os.path.basename(output_file) str_date=out_filename[0:6] expected_ndays=functions.get_number_days_month(str_date) functions.check_output_dir(os.path.dirname(output_file)) current_ndays=len(input_file) if expected_ndays != current_ndays: logger.info('Missing days for period: %s. Skip' % str_date) else: args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw", "input_nodata": in_nodata} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Climatology for all years new_input_subprod='monavg' new_in_prod_ident= functions.set_path_filename_no_date(prod, new_input_subprod, mapset, version, ext) new_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, new_input_subprod, 'Derived', version, mapset) new_starting_files = new_input_dir+"*"+new_in_prod_ident output_sprod="monclim" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) formatter_in="[0-9]{4}(?P<MM>[0-9]{2})"+new_in_prod_ident formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{MM[0]}"+out_prod_ident] @active_if(activate_monclim_comput) @collate(new_starting_files, formatter(formatter_in),formatter_out) def modis_chla_monclim(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw", "input_nodata": in_nodata} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Anomaly for a given monthly output_sprod="monanom" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset,version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in="(?P<YYYY>[0-9]{4})(?P<MM>[0-9]{2})"+new_in_prod_ident formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MM[0]}"+out_prod_ident ancillary_sprod = "monclim" ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset,version,ext) ancillary_subdir = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived',version, mapset) ancillary_input="{subpath[0][5]}"+os.path.sep+ancillary_subdir+"{MM[0]}"+ancillary_sprod_ident @active_if(activate_monanom_comput) @transform(new_starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def modis_chla_mondiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} raster_image_math.do_oper_subtraction(**args)
def create_pipeline(starting_sprod): # --------------------------------------------------------------------- # Define input files: Chla is the 'driver', sst,kd and par 'ancillary inputs' chla_prod="modis-chla" chla_prod_ident = functions.set_path_filename_no_date(chla_prod, starting_sprod, mapset, version, ext) chla_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(chla_prod, starting_sprod, 'Derived', version, mapset) #chla_files = chla_input_dir+"2014*"+chla_prod_ident # --------------------------------------------------------------------- sst_prod="modis-sst" sst_prod_ident = functions.set_path_filename_no_date(sst_prod, starting_sprod, mapset, version, ext) sst_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(sst_prod, starting_sprod, 'Derived', version, mapset) # --------------------------------------------------------------------- kd_prod="modis-kd490" kd_prod_ident = functions.set_path_filename_no_date(kd_prod, starting_sprod, mapset, version, ext) kd_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(kd_prod, starting_sprod, 'Derived', version, mapset) kd_files = kd_input_dir+"*"+kd_prod_ident # --------------------------------------------------------------------- par_prod="modis-par" par_prod_ident = functions.set_path_filename_no_date(par_prod, starting_sprod, mapset, version, ext) par_input_dir = es_constants.processing_dir+ \ functions.set_path_sub_directory(par_prod, starting_sprod, 'Derived', version, mapset) # Read input product nodata chla_prod_info = querydb.get_product_out_info(productcode=chla_prod, subproductcode="chla-day", version=version) chla_product_info = functions.list_to_element(chla_prod_info) chla_nodata = chla_product_info.nodata sst_prod_info = querydb.get_product_out_info(productcode=sst_prod, subproductcode="sst-day", version=version) sst_product_info = functions.list_to_element(sst_prod_info) sst_nodata = sst_product_info.nodata kd_prod_info = querydb.get_product_out_info(productcode=kd_prod, subproductcode="kd490-day", version=version) kd_product_info = functions.list_to_element(kd_prod_info) kd_nodata = kd_product_info.nodata par_prod_info = querydb.get_product_out_info(productcode=par_prod, subproductcode="par-day", version=version) par_product_info = functions.list_to_element(par_prod_info) par_nodata = par_product_info.nodata # --------------------------------------------------------------------- # Monthly Primary Productivity from chl-a, sst, kd490 and par monthly data output_sprod="1mon" out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset,version, ext) output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # Starting files monthly composites formatter_kd="(?P<YYYYMM>[0-9]{6})"+kd_prod_ident formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYYMM[0]}"+out_prod_ident ancillary_sst = sst_input_dir+"{YYYYMM[0]}"+sst_prod_ident ancillary_par = par_input_dir+"{YYYYMM[0]}"+par_prod_ident ancillary_chla = chla_input_dir+"{YYYYMM[0]}"+chla_prod_ident @active_if(activate_pp_1mon_comput) @transform(kd_files, formatter(formatter_kd), add_inputs(ancillary_chla, ancillary_par, ancillary_sst), formatter_out) def modis_pp_1mon(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"chla_file": input_file[1], "sst_file": input_file[3], "kd_file": input_file[0],"par_file": input_file[2], \ "sst_nodata": sst_nodata, "kd_nodata": kd_nodata,\ "par_nodata": par_nodata, "output_file": output_file, "output_nodata": -9999, "output_format": 'GTIFF',\ "output_type": None, "options": "compress=lzw"} raster_image_math.do_compute_primary_production(**args)
def do_percent_difference(self): # Percent Difference if self.frequency == '10d': output_sprod = self.proc_lists.proc_add_subprod( "10dperc", "10anomalies", final=False, descriptive_name='10d Percent Difference ' + self.subproduct_code, description='10d Percent Difference vs. LTA', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role=self.starting_sprod, # '10d', # display_index=7, active_default=True) out_prod_ident = functions.set_path_filename_no_date( self.prod, output_sprod, self.mapset, self.version, self.ext) output_subdir = functions.set_path_sub_directory( self.prod, output_sprod, 'Derived', self.version, self.mapset) # Starting files + avg self.formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + self.in_prod_ident self.formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10davg" ancillary_sprod_ident = functions.set_path_filename_no_date( self.prod, ancillary_sprod, self.mapset, self.version, self.ext) ancillary_subdir = functions.set_path_sub_directory( self.prod, ancillary_sprod, 'Derived', self.version, self.mapset) self.ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident else: output_sprod = self.proc_lists.proc_add_subprod( "1monperc", "filtered_anomalies", final=False, descriptive_name='Monthly Percent Difference', description='Monthly Percent Difference', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role=self.input_subprod_monthly, # '10d', # display_index=117, active_default=True) prod_ident_1monperc = functions.set_path_filename_no_date( self.prod, output_sprod, self.mapset, self.version, self.ext) subdir_1monperc = functions.set_path_sub_directory( self.prod, output_sprod, 'Derived', self.version, self.mapset) # inputs # Starting files + avg self.formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + self.in_prod_ident_monthly self.formatter_out = "{subpath[0][5]}" + os.path.sep + subdir_1monperc + "{YYYY[0]}{MMDD[0]}" + prod_ident_1monperc ancillary_sprod = "1monavg" ancillary_sprod_ident = functions.set_path_filename_no_date( self.prod, ancillary_sprod, self.mapset, self.version, self.ext) ancillary_subdir = functions.set_path_sub_directory( self.prod, ancillary_sprod, 'Derived', self.version, self.mapset) self.ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident