def create_pipeline(prod, starting_sprod, native_mapset, version, starting_dates=None, proc_lists=None, logger=None, mapset=None): # Definitions start_season = '0901' end_season = '0421' agriculture_mask = '/data/temp/AGRIC_MASK.tif' # Manage mapset if mapset is None: mapset = native_mapset # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all off activate_seas_cum_comput = 1 # season cumulation activate_cum_comput = 1 # season cumulation es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, native_mapset, version, ext) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, native_mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = glob.glob(input_dir + "*" + in_prod_ident) # --------------------------------------------------------------------- # 3.a NDVI linearx2 Season Cumulation masked using Crop Mask # --------------------------------------------------------------------- # Define output subproduct out_sub_prod_name = 'seas-cum-of-' + starting_sprod output_sprod_group = proc_lists.proc_add_subprod_group("seas_cum_prods") output_sprod = proc_lists.proc_add_subprod( out_sub_prod_name, "seas_cum_prods", final=True, descriptive_name='Season Cumulation for ' + out_sub_prod_name, description='Season Cumulation for ' + out_sub_prod_name, frequency_id='e1year', date_format='YYYYMMDD', masked=True, timeseries_role='', active_default=True) # Generate prod_identifier (_fewsnet-rfe_seas-cum-of-10d_FEWSNET-Africa-8km_2.0.tif) ad subdir prod_ident_seas_cum = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) subdir_ident_seas_cum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_seas_cum(): starting_files.sort() # Convert from string to in (for comparison) dekad_start = int(start_season) dekad_end = int(end_season) # Loop over all input files for file_t0 in starting_files: # Get current date (in format '19980901') date_t0 = functions.get_date_from_path_full(file_t0) # Extract from date-string the dekad/year as integer dekad_t0 = int(date_t0[4:]) year2 = int(date_t0[0:4]) # Check if season goes across two years -> define year1/2 if dekad_start < dekad_end: if dekad_t0 >= dekad_start and dekad_t0 <= dekad_end: year1 = year2 else: if dekad_t0 > dekad_start or dekad_t0 <= dekad_end: year1 = year2 - 1 # Detect the end of the season and trigger processing if dekad_t0 == dekad_end: # Define output filename output_file = es2_data_dir + subdir_ident_seas_cum + str( year2) + end_season + prod_ident_seas_cum # Get list of dates from start of season to end of season list_dates = proc_functions.get_list_dates_for_dataset( prod, starting_sprod, version, start_date=str(year1) + start_season, end_date=str(year2) + end_season) input_files = [] missing_file = False for ldate in list_dates: # Append the file to list if it exists ... if os.path.isfile(input_dir + ldate + in_prod_ident): input_files.append(input_dir + ldate + in_prod_ident) # ... otherwise raise a warning and break else: logger.warning( 'Missing file for date {0}. Season not computed.'. format(ldate)) missing_file = True break if not missing_file: yield (input_files, output_file) @active_if(activate_seas_cum_comput) @files(generate_parameters_seas_cum) # Function to do actual computation from inputs to output def seas_cum(input_files, output_file): # Ensure out subdirectory exists functions.check_output_dir(os.path.dirname(output_file)) # If output_file it is a list, force to a string output_file = functions.list_to_element(output_file) # Prepare temporary working directory for intermediate results tmpdirpath = tempfile.mkdtemp() # Cumulated but not masked output tmp_output_file = tmpdirpath + os.path.sep + os.path.basename( output_file) # Temp mask in the final projection (mapset) tmp_reproj_file = tmpdirpath + os.path.sep + 'my_temp_reprojected_output.tif' # Call the function for cumulating args = { "input_file": input_files, "output_file": tmp_output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_cumulate(**args) # Create from the original mask a new one, by using raster_image_math.do_reprojection() # and save it as a temporary mask # raster_image_math.do_reproject(agriculture_mask, tmp_reproj_file, 'SPOTV-SADC-1km', mapset) raster_image_math.do_reproject(tmp_output_file, tmp_reproj_file, native_mapset, mapset) # Call the function for masking args = { "input_file": tmp_reproj_file, "mask_file": agriculture_mask, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", "mask_value": 0, "out_value": 0 } raster_image_math.do_mask_image(**args) # Remove temp directory shutil.rmtree(tmpdirpath) # --------------------------------------------------------------------- # 3.a Season Cumulation fron start of season to current dekad till end of season # --------------------------------------------------------------------- # Define output subproduct out_sub_prod_name = 'cum-of-' + starting_sprod output_sprod_group = proc_lists.proc_add_subprod_group("cum_prods") output_sprod = proc_lists.proc_add_subprod( out_sub_prod_name, "cum_prods", final=True, descriptive_name='Cumulation for ' + out_sub_prod_name, description='Cumulation for ' + out_sub_prod_name, frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) # Generate prod_identifier (_fewsnet-rfe_cum-of-10d_FEWSNET-Africa-8km_2.0.tif) ad subdir prod_ident_cum = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) subdir_ident_cum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_cum(): starting_files.sort() # Convert from string to in (for comparison) dekad_start = int(start_season) dekad_end = int(end_season) # Loop over all input files for file_t0 in starting_files: # Get current date (in format '19980901') date_t0 = functions.get_date_from_path_full(file_t0) # Extract from date-string the dekad/year as integer dekad_t0 = int(date_t0[4:]) year_t0 = int(date_t0[0:4]) in_season = False # Check if season goes across two years -> define year1/2 if dekad_start < dekad_end: if dekad_t0 >= dekad_start and dekad_t0 <= dekad_end: year_sos = year_t0 in_season = True else: if dekad_t0 >= dekad_start: year_sos = year_t0 in_season = True if dekad_t0 <= dekad_end: year_sos = year_t0 - 1 in_season = True # Detect the end of the season and trigger processing if in_season: # Define output filename output_file = es2_data_dir + subdir_ident_cum + date_t0 + prod_ident_cum # Get list of dates from start of season to end of season list_dates = proc_functions.get_list_dates_for_dataset( prod, starting_sprod, version, start_date=str(year_sos) + start_season, end_date=date_t0) input_files = [] missing_file = False for ldate in list_dates: # Append the file to list if it exists ... if os.path.isfile(input_dir + ldate + in_prod_ident): input_files.append(input_dir + ldate + in_prod_ident) # ... otherwise raise a warning and break else: logger.warning( 'Missing file for date {0}. Season not computed.'. format(ldate)) missing_file = True break if not missing_file: yield (input_files, output_file) @active_if(activate_cum_comput) @files(generate_parameters_cum) # Function to do actual computation from inputs to output def cum(input_files, output_file): # Ensure out subdirectory exists functions.check_output_dir(os.path.dirname(output_file)) # If output_file it is a list, force to a string output_file = functions.list_to_element(output_file) # Prepare temporary working directory for intermediate results tmpdirpath = tempfile.mkdtemp() # Cumulated but not masked output tmp_output_file = tmpdirpath + os.path.sep + os.path.basename( output_file) # Call the function for cumulating args = { "input_file": input_files, "output_file": tmp_output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_cumulate(**args) # Create from the original mask a new one, by using raster_image_math.do_reprojection() # and save it as a temporary mask # raster_image_math.do_reproject(agriculture_mask, tmp_reproj_file, 'SPOTV-SADC-1km', mapset) raster_image_math.do_reproject(tmp_output_file, output_file, native_mapset, mapset) # Remove temp directory shutil.rmtree(tmpdirpath) return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, nrt_products=True, update_stats=False): # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all off activate_pp_1mon_comput = 0 # 10d stats activate_10danomalies_comput = 0 # 10d anomalies activate_monthly_comput = 0 # monthly cumulation activate_monstats_comput = 0 # monthly stats activate_monanomalies_comput = 0 # monthly anomalies # switch wrt groups - according to options if nrt_products: activate_pp_1mon_comput = 1 # Primary Production Monthly activate_monthly_comput = 1 # monthly cumulation activate_monanomalies_comput = 1 # monthly anomalies if update_stats: activate_pp_8dstats_comput = 1 # 10d stats activate_pp_monstats_comput = 1 # monthly stats # Primary Production Monthly # Always true #activate_pp_1mon_comput = 1 #my_date='20160601' my_date = '' es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Primary Productivity from chl-a, sst, kd490 and par data # Define inputs chla_prod = prod chla_version = 'v2013.1' chla_prod_ident = functions.set_path_filename_no_date( chla_prod, starting_sprod, mapset, chla_version, ext) chla_input_dir = es2_data_dir+ \ functions.set_path_sub_directory(chla_prod, starting_sprod, 'Derived', chla_version, mapset) # --------------------------------------------------------------------- sst_prod = "modis-sst" sst_version = 'v2013.1' sst_prod_ident = functions.set_path_filename_no_date( sst_prod, starting_sprod, mapset, sst_version, ext) sst_input_dir = es2_data_dir+ \ functions.set_path_sub_directory(sst_prod, starting_sprod, 'Derived', sst_version, mapset) # --------------------------------------------------------------------- kd_prod = "modis-kd490" kd_version = 'v2012.0' kd_prod_ident = functions.set_path_filename_no_date( kd_prod, starting_sprod, mapset, kd_version, ext) kd_input_dir = es2_data_dir+ \ functions.set_path_sub_directory(kd_prod, starting_sprod, 'Derived', kd_version, mapset) kd_files = kd_input_dir + my_date + "*" + kd_prod_ident # --------------------------------------------------------------------- par_prod = "modis-par" par_version = 'v2012.0' par_prod_ident = functions.set_path_filename_no_date( par_prod, starting_sprod, mapset, par_version, ext) par_input_dir = es2_data_dir+ \ functions.set_path_sub_directory(par_prod, starting_sprod, 'Derived', par_version, mapset) # Read input product nodata chla_prod_info = querydb.get_product_out_info(productcode=chla_prod, subproductcode="monavg", version=chla_version) chla_product_info = functions.list_to_element(chla_prod_info) chla_nodata = chla_product_info.nodata sst_prod_info = querydb.get_product_out_info(productcode=sst_prod, subproductcode="monavg", version=sst_version) sst_product_info = functions.list_to_element(sst_prod_info) sst_nodata = sst_product_info.nodata kd_prod_info = querydb.get_product_out_info(productcode=kd_prod, subproductcode="monavg", version=kd_version) kd_product_info = functions.list_to_element(kd_prod_info) kd_nodata = kd_product_info.nodata par_prod_info = querydb.get_product_out_info(productcode=par_prod, subproductcode="monavg", version=par_version) par_product_info = functions.list_to_element(par_prod_info) par_nodata = par_product_info.nodata # Define outputs output_prod = "modis-pp" output_sprod = starting_sprod out_prod_ident = functions.set_path_filename_no_date( output_prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(output_prod, output_sprod, 'Derived', version, mapset) # Starting files monthly composites formatter_kd = "(?P<YYYYMMDD>[0-9]{8})" + kd_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMMDD[0]}" + out_prod_ident ancillary_sst = sst_input_dir + "{YYYYMMDD[0]}" + sst_prod_ident ancillary_par = par_input_dir + "{YYYYMMDD[0]}" + par_prod_ident ancillary_chla = chla_input_dir + "{YYYYMMDD[0]}" + chla_prod_ident @active_if(activate_pp_1mon_comput) @transform(kd_files, formatter(formatter_kd), add_inputs(ancillary_chla, ancillary_par, ancillary_sst), formatter_out) def modis_pp_1mon(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"chla_file": input_file[1], "sst_file": input_file[3], "kd_file": input_file[0],"par_file": input_file[2], \ "sst_nodata": sst_nodata, "kd_nodata": kd_nodata, "chla_nodata": chla_nodata,\ "par_nodata": par_nodata, "output_file": output_file, "output_nodata": -9999, "output_format": 'GTIFF',\ "output_type": None, "options": "compress=lzw"} raster_image_math.do_compute_primary_production(**args)
def create_pipeline(prod, starting_sprod, native_mapset, target_mapset, version, starting_dates=None, proc_lists=None): # Create Logger logger = log.my_logger('log.lst') # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all ON activate_1dmax_comput = 1 activate_10dmax_comput = 1 activate_10d15min_comput = 1 activate_10dmin_comput = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files ('lst' subproduct) in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, native_mapset, version, ext) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, native_mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident logger.info("starting_files %s" % starting_files) # ---------------------------------------------------------------------------------------------------------------- # 1dmax # Daily maximum from 15min lst, re-projected on target mapset output_sprod = proc_lists.proc_add_subprod("1dmax", "lsasaf-lst", final=False, descriptive_name='1d Maximum', description='Daily Maximum', frequency_id='e1day', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) output_sprod = '1dmax' out_prod_ident_1dmax = functions.set_path_filename_no_date( prod, output_sprod, target_mapset, version, ext) output_subdir_1dmax = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, target_mapset) formatter_in_1dmax = "(?P<YYYYMMDD>[0-9]{8})[0-9]{4}" + in_prod_ident formatter_out_1dmax = "{subpath[0][5]}" + os.path.sep + output_subdir_1dmax + "{YYYYMMDD[0]}" + out_prod_ident_1dmax # @active_if(activate_1dmax_comput) @collate(starting_files, formatter(formatter_in_1dmax), formatter_out_1dmax) def lsasaf_lst_1dmax(input_file, output_file): # output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='', dir=es_constants.base_tmp_dir) tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file) args = { "input_file": input_file, "output_file": tmp_output_file, "output_format": 'GTIFF', "options": "compress=lzw", "input_nodata": -32768 } raster_image_math.do_max_image(**args) reproject_output(tmp_output_file, native_mapset, target_mapset) shutil.rmtree(tmpdir) # ---------------------------------------------------------------------------------------------------------------- # 10dmax # 10 Day maximum from daily max, on target mapset output_sprod = proc_lists.proc_add_subprod("10dmax", "lsasaf-lst", final=False, descriptive_name='10d Maximum', description='10d Maximum', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) output_sprod_10dmax = '10dmax' out_prod_ident_10dmax = functions.set_path_filename_no_date( prod, output_sprod_10dmax, target_mapset, version, ext) output_subdir_10dmax = functions.set_path_sub_directory( prod, output_sprod_10dmax, 'Derived', version, target_mapset) # # Define input files in_prod_10dmax = '1dmax' in_prod_ident_10dmax = functions.set_path_filename_no_date( prod, in_prod_10dmax, target_mapset, version, ext) # input_dir_10dmax = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, in_prod_10dmax, 'Derived', version, target_mapset) # starting_files_10dmax = input_dir_10dmax + "*" + in_prod_ident_10dmax # def generate_parameters_10dmax(): # Look for all input files in input_dir, and sort them input_files = glob.glob(starting_files_10dmax) dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_10dmax + os.path.sep + my_dekad_str + out_prod_ident_10dmax yield (file_list, output_file) # @active_if(activate_10dmax_comput) @files(generate_parameters_10dmax) def lsasaf_lst_10dmax(input_file, output_file): # output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", "input_nodata": -32768 } raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # Dekad maximum for every 15min # NOTE: this product is compute w/o re-projection, i.e. on the 'native' mapset output_sprod = proc_lists.proc_add_subprod( "10d15min", "lsasaf-lst", final=False, descriptive_name='10day Maximum over 15 min', description='10day Maximum computed for every 15 min', frequency_id='e15minute', # Is it OK ??????? date_format='YYYYMMDDHHMM', masked=False, timeseries_role='', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, native_mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, native_mapset) def generate_parameters_10d15min(): # Look for all input files in input_dir, and sort them input_files = glob.glob(starting_files) dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) # Generate the list of 30 min time in a day timelist = [ datetime.time(h, m).strftime("%H%M") for h, m in itertools.product(xrange(0, 24), xrange(0, 60, 15)) ] for time in timelist: files_for_time = glob.glob(input_dir + os.path.sep + '*' + time + in_prod_ident) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) output_file = es_constants.processing_dir + output_subdir + os.path.sep + my_dekad_str + time + out_prod_ident for myfile in files_for_time: basename = os.path.basename(myfile) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(myfile) if len(file_list) > 8: yield (file_list, output_file) @active_if(activate_10d15min_comput) @files(generate_parameters_10d15min) def lsasaf_lst_10d15min(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw", "input_nodata":-32768} raster_image_math.do_max_image(**args) # Do also the house-keeping, by deleting the files older than 6 months number_months_keep = 6 remove_old_files(prod, starting_sprod, version, native_mapset, 'Ingest', number_months_keep) # ---------------------------------------------------------------------------------------------------------------- # 10 day minimum (mm) # NOTE: this product is compute with re-projection, i.e. on the 'target' mapset output_sprod = proc_lists.proc_add_subprod( "10dmin", "lsasaf-et", final=False, descriptive_name='10day Minimum', description='10day minimum', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_10dmin = functions.set_path_filename_no_date( prod, "10dmin", target_mapset, version, ext) output_subdir_10dmin = functions.set_path_sub_directory( prod, "10dmin", 'Derived', version, target_mapset) # Define input files in_prod_10dmin = '10d15min' in_prod_ident_10dmin = functions.set_path_filename_no_date( prod, in_prod_10dmin, native_mapset, version, ext) input_dir_10dmin = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, in_prod_10dmin, 'Derived', version, native_mapset) starting_files_10dmin = input_dir_10dmin + "*" + in_prod_ident_10dmin formatter_in = "(?P<YYYYMMDD>[0-9]{8})[0-9]{4}" + in_prod_ident_10dmin formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir_10dmin + "{YYYYMMDD[0]}" + out_prod_ident_10dmin ] @follows(lsasaf_lst_10d15min) @active_if(activate_10dmin_comput) @collate(starting_files_10dmin, formatter(formatter_in), formatter_out) def lsasaf_lst_10dmin(input_file, output_file): output_file = functions.list_to_element(output_file) # Get the number of days of that dekad basename = os.path.basename(output_file) mydate = functions.get_date_from_path_filename(basename) functions.check_output_dir(os.path.dirname(output_file)) tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='', dir=es_constants.base_tmp_dir) tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file) args = { "input_file": input_file, "output_file": tmp_output_file, "output_format": 'GTIFF', "options": "compress=lzw", "input_nodata": -32768 } raster_image_math.do_min_image(**args) reproject_output(tmp_output_file, native_mapset, target_mapset) shutil.rmtree(tmpdir) # Do also the house-keeping, by deleting the files older than 6 months number_months_keep = 6 remove_old_files(prod, '10d15min', version, native_mapset, 'Ingest', number_months_keep) return proc_lists
def processing_merge(pipeline_run_level=0, pipeline_printout_level=0, input_products='', output_product='', mapset='', logfile=None): if logfile: spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_merge') # Dummy return arguments proc_lists = functions.ProcLists() list_subprods = proc_lists.list_subprods list_subprod_groups = proc_lists.list_subprod_groups es2_data_dir = es_constants.processing_dir+os.path.sep # Do some checks on the integrity of the inputs # Manage output_product data out_product_code = output_product[0].productcode out_sub_product_code = output_product[0].subproductcode out_version = output_product[0].version out_mapset = output_product[0].mapsetcode out_subdir = functions.set_path_sub_directory(out_product_code, out_sub_product_code,'Ingest', out_version, out_mapset) out_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, out_version, ext) out_dir = es2_data_dir + out_subdir # Check the output product directory functions.check_output_dir(out_dir) # Fill the processing list -> some fields to be taken by innput products output_sprod_group=proc_lists.proc_add_subprod_group("merged") output_sprod=proc_lists.proc_add_subprod(out_sub_product_code, "merged", final=False, descriptive_name='undefined', description='undefined', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) # Loop over the input products: for input in input_products: # Extract info from input product product_code = input.productcode sub_product_code = input.subproductcode version = input.version start_date = input.start_date end_date = input.end_date product_info = querydb.get_product_out_info_connect(productcode=product_code, subproductcode=sub_product_code, version=version) prod_type = product_info[0].product_type in_subdir = functions.set_path_sub_directory(product_code, sub_product_code, prod_type, version, out_mapset) in_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, version, ext) # Create the list of dates -> returns empty if start==end==None list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version, start_date=start_date, end_date=end_date) # If list_dates == None, look at all existing files if list_dates is None: print ('To be Done !!!') # Otherwise, build list of files from list of dates else: for my_date in list_dates: in_file_path = es2_data_dir + in_subdir + my_date + in_prod_ident out_file_path = out_dir+my_date+out_prod_ident # Create the link status = functions.create_sym_link(in_file_path, out_file_path, force=False) if status == 0 and logfile: spec_logger.info("Merged file %s created" % out_file_path) return list_subprods, list_subprod_groups
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None): # --------------------------------------------------------------------- # Create lists to store definition of the derived products, and their # groups # --------------------------------------------------------------------- if proc_lists is None: proc_lists = functions.ProcLists() # --------------------------------------------------------------------- # Define and assign the flags to control the individual derived products # and the groups. NOT to be changed by the User # --------------------------------------------------------------------- # Set DEFAULTS: all off activate_cumul_comput = 1 # cumulated products activate_spi_comput = 1 # spi indicators # Set DEFAULTS: all off activate_cumul_3mon_comput = 1 # cumulated product 3mon activate_cumul_6mon_comput = 1 # cumulated product 6mon activate_cumul_1year_comput = 1 # cumulated product 1year activate_spi_1mon_comput = 1 # spi indicator 1mon activate_spi_3mon_comput = 1 # spi indicator 3mon activate_spi_6mon_comput = 1 # spi indicator 6mon activate_spi_1year_comput = 1 # spi indicator 1year # switch wrt groups - according to options es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files from the starting_sprod and starting_dates arguments # --------------------------------------------------------------------- in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) #logger.debug('Base data directory is: %s' % es2_data_dir) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Derived', version, mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: if os.path.isfile(input_dir + my_date + in_prod_ident): starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident # Look for all input files in input_dir, and sort them if starting_dates is not None: input_files = starting_files else: input_files = glob.glob(starting_files) # --------------------------------------------------------------------- # Cumulated products - 3mon # --------------------------------------------------------------------- output_sprod_group = proc_lists.proc_add_subprod_group("cumul") output_sprod = proc_lists.proc_add_subprod( "3mon", "cumul", final=False, descriptive_name='3-monthly Precipitation', description='Precipitation for 3 months', frequency_id='e3month', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_3moncum = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_3moncum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_3moncum(): # Number of months to consider n_mon = 3 dates_list = [] # Extract and sort all dates for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] dates_list.append(mydate_yyyymmdd) dates_list = sorted(dates_list) # loop from the 'n_mon'-1 date to the last date - this is the period end-limit for date_index in range(n_mon - 1, len(dates_list) - 1): mydate = dates_list[date_index] prev_date = dates_list[date_index - n_mon + 1] file_list = [] # Get month-date and m_1 = datetime.date(int(mydate[0:4]), int(mydate[4:6]), 1) m_2 = datetime.date(int(prev_date[0:4]), int(prev_date[4:6]), 1) delta = m_1 - m_2 # Check there are no missing month, i.e. tot_delta < 155 days if delta.days <= (31 * (n_mon - 1)): for curr_index in range(0, n_mon): curr_date = dates_list[date_index - curr_index] if os.path.isfile(input_dir + curr_date + in_prod_ident): file_list.append(input_dir + curr_date + in_prod_ident) output_file = es_constants.processing_dir + output_subdir_3moncum + os.path.sep + mydate + out_prod_ident_3moncum yield (file_list, output_file) else: print( 'At least 1 month is missing for period ending {0}'.format( mydate)) @active_if(activate_cumul_3mon_comput) @files(generate_parameters_3moncum) def std_precip_3moncum(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_cumulate(**args) # # --------------------------------------------------------------------- # # Cumulated products - 6mon # # --------------------------------------------------------------------- # # output_sprod_group=proc_lists.proc_add_subprod_group("cumul") # output_sprod=proc_lists.proc_add_subprod("6mon", "cumul", final=False, # descriptive_name='3-monthly Precipitation', # description='Precipitation for 3 months', # frequency_id='e3month', # date_format='YYYYMMDD', # masked=False, # timeseries_role='', # active_default=True) # # out_prod_ident_6moncum = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) # output_subdir_6moncum = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # # def generate_parameters_6moncum(): # # # Number of months to consider # n_mon = 6 # dates_list = [] # # # Extract and sort all dates # for input_file in input_files: # basename=os.path.basename(input_file) # mydate=functions.get_date_from_path_filename(basename) # mydate_yyyymmdd=str(mydate)[0:8] # dates_list.append(mydate_yyyymmdd) # # dates_list = sorted(dates_list) # # loop from the 'n_mon'-1 date to the last date - this is the period end-limit # for date_index in range(n_mon-1,len(dates_list)-1): # # mydate = dates_list[date_index] # prev_date = dates_list[date_index-n_mon+1] # file_list = [] # # Get month-date and # m_1 = datetime.date(int(mydate[0:4]),int(mydate[4:6]),1) # m_2 = datetime.date(int(prev_date[0:4]),int(prev_date[4:6]),1) # delta = m_1 - m_2 # # Check there are no missing month, i.e. tot_delta < 155 days # if delta.days <=(31*(n_mon-1)): # for curr_index in range(0,n_mon): # curr_date = dates_list[date_index-curr_index] # if os.path.isfile(input_dir+curr_date+in_prod_ident): # file_list.append(input_dir+curr_date+in_prod_ident) # # output_file=es_constants.processing_dir+output_subdir_6moncum+os.path.sep+mydate+out_prod_ident_6moncum # yield (file_list, output_file) # else: # print 'At least 1 month is missing for period ending {0}'.format(mydate) # # @active_if(activate_cumul_6mon_comput) # @files(generate_parameters_6moncum) # def std_precip_6moncum(input_file, output_file): # # output_file = functions.list_to_element(output_file) # functions.check_output_dir(os.path.dirname(output_file)) # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} # raster_image_math.do_cumulate(**args) # End of pipeline definition return proc_lists
def create_pipeline(prod, starting_sprod, native_mapset, target_mapset, version, starting_dates=None, proc_lists=None): # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all ON activate_10d30min_comput = 1 activate_10dcum_comput = 1 activate_1moncum_comput = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, native_mapset, version, ext) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, native_mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # Dekad average for every 30min (mm/h) # NOTE: this product is compute w/o re-projection, i.e. on the 'native' mapset output_sprod_group = proc_lists.proc_add_subprod_group("lsasaf-et") output_sprod = proc_lists.proc_add_subprod( "10d30min", "lsasaf-et", final=False, descriptive_name='10day Average over 30 min', description='10day Average computed for every 30 min', frequency_id='e30minute', # Is it OK ??????? date_format='YYYYMMDDHHMM', masked=False, timeseries_role='', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, native_mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, native_mapset) def generate_parameters_10d30min(): # Look for all input files in input_dir, and sort them input_files = glob.glob(starting_files) dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) # Generate the list of 30 min time in a day timelist = [ datetime.time(h, m).strftime("%H%M") for h, m in itertools.product(xrange(0, 24), xrange(0, 60, 30)) ] for time in timelist: files_for_time = glob.glob(input_dir + os.path.sep + '*' + time + in_prod_ident) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) output_file = es_constants.processing_dir + output_subdir + os.path.sep + my_dekad_str + time + out_prod_ident for myfile in files_for_time: basename = os.path.basename(myfile) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(myfile) yield (file_list, output_file) @active_if(activate_10d30min_comput) @files(generate_parameters_10d30min) def lsasaf_etp_10d30min(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw", "input_nodata":-32768} raster_image_math.do_avg_image(**args) # Do also the house-keeping, by deleting the files older than 6 months number_months_keep = 6 remove_old_files(prod, "lsasaf-et", version, native_mapset, 'Ingest', number_months_keep) # ---------------------------------------------------------------------------------------------------------------- # 10 day Cumulate (mm) # NOTE: this product is compute with re-projection, i.e. on the 'target' mapset output_sprod = proc_lists.proc_add_subprod( "10dcum", "lsasaf-et", final=False, descriptive_name='10day Cumulate', description='10day Cumulate in mm', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_10dcum = functions.set_path_filename_no_date( prod, "10dcum", target_mapset, version, ext) output_subdir_10dcum = functions.set_path_sub_directory( prod, "10dcum", 'Derived', version, target_mapset) # Define input files in_prod_10dcum = '10d30min' in_prod_ident_10dcum = functions.set_path_filename_no_date( prod, in_prod_10dcum, native_mapset, version, ext) input_dir_10dcum = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, in_prod_10dcum, 'Derived', version, native_mapset) starting_files_10dcum = input_dir_10dcum + "*" + in_prod_ident_10dcum formatter_in = "(?P<YYYYMMDD>[0-9]{8})[0-9]{4}" + in_prod_ident_10dcum formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir_10dcum + "{YYYYMMDD[0]}" + out_prod_ident_10dcum ] @follows(lsasaf_etp_10d30min) @active_if(activate_10dcum_comput) @collate(starting_files_10dcum, formatter(formatter_in), formatter_out) def lsasaf_etp_10dcum(input_file, output_file): output_file = functions.list_to_element(output_file) # Get the number of days of that dekad basename = os.path.basename(output_file) mydate = functions.get_date_from_path_filename(basename) nbr_days_dekad = functions.day_per_dekad(mydate) # Compute the correcting factor: we sum-up all 48 30min cycles and: # Divide by 2 (mm/h -> mm) # Multiply by number of days # Divide by 100, so that the scale factor changes from 0.0001 (30min) to 0.01 factor = float(nbr_days_dekad) * 0.005 functions.check_output_dir(os.path.dirname(output_file)) tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='', dir=es_constants.base_tmp_dir) tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file) args = { "input_file": input_file, "output_file": tmp_output_file, "output_format": 'GTIFF', "options": "compress=lzw", "scale_factor": factor, "input_nodata": -32768 } raster_image_math.do_cumulate(**args) reproject_output(tmp_output_file, native_mapset, target_mapset) shutil.rmtree(tmpdir) # Do also the house-keeping, by deleting the files older than 6 months number_months_keep = 6 remove_old_files(prod, "10d30min-et", version, native_mapset, 'Derived', number_months_keep) # ---------------------------------------------------------------------------------------------------------------- # 1moncum output_sprod = proc_lists.proc_add_subprod( "1moncum", "lsasaf-et", final=False, descriptive_name='1mon Cumulate', description='Monthly Cumulate in mm', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) output_sprod = '1moncum' out_prod_ident_1moncum = functions.set_path_filename_no_date( prod, output_sprod, target_mapset, version, ext) output_subdir_1moncum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, target_mapset) #file d'entre in_prod_1moncum = '10dcum' in_prod_ident_1moncum = functions.set_path_filename_no_date( prod, in_prod_1moncum, target_mapset, version, ext) input_dir_1moncum = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, in_prod_1moncum, 'Derived', version, target_mapset) starting_files_1moncum = input_dir_1moncum + "*" + in_prod_ident_1moncum formatter_in_1moncum = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident_1moncum formatter_out_1moncum = "{subpath[0][5]}" + os.path.sep + output_subdir_1moncum + "{YYYYMM[0]}" + '01' + out_prod_ident_1moncum # @follows(lsasaf_etp_10dcum) @active_if(activate_1moncum_comput) @collate(starting_files_1moncum, formatter(formatter_in_1moncum), formatter_out_1moncum) def lsasaf_etp_1moncum(input_file, output_file): # output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", "input_nodata": -32768 } raster_image_math.do_cumulate(**args) return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None): my_date = None # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # 1. 10d prod stats activate_monavg_comput = 1 activate_monclim_comput = 1 activate_monanom_comput = 0 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir + functions.set_path_sub_directory( prod, starting_sprod, 'Ingest', version, mapset) if my_date is not None: starting_files = input_dir + my_date + "*" + in_prod_ident else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # Monthly Average for a given month output_sprod_group = proc_lists.proc_add_subprod_group("monstats") output_sprod = proc_lists.proc_add_subprod( "monavg", "monstats", final=False, descriptive_name='Monthly average', description='Chla Monthly average', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" + out_prod_ident ] @active_if(activate_monavg_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def modis_chla_monavg(input_file, output_file): output_file = functions.list_to_element(output_file) out_filename = os.path.basename(output_file) str_date = out_filename[0:6] expected_ndays = functions.get_number_days_month(str_date) functions.check_output_dir(os.path.dirname(output_file)) current_ndays = len(input_file) # if expected_ndays != current_ndays: # logger.info('Missing days for period: %s. Skip' % str_date) # else: args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Climatology for all years output_sprod = proc_lists.proc_add_subprod( "monclim", "monstats", final=False, descriptive_name='Monthly climatology', description='Chla Monthly climatology', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) new_input_subprod = 'monavg' new_in_prod_ident = functions.set_path_filename_no_date( prod, new_input_subprod, mapset, version, ext) new_input_dir = es2_data_dir + functions.set_path_sub_directory( prod, new_input_subprod, 'Derived', version, mapset) new_starting_files = new_input_dir + "*" + new_in_prod_ident out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MM>[0-9]{2})" + new_in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MM[0]}" + out_prod_ident ] @active_if(activate_monclim_comput) @collate(new_starting_files, formatter(formatter_in), formatter_out) def modis_chla_monclim(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw"} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Anomaly for a given monthly output_sprod = proc_lists.proc_add_subprod( "monanom", "monstats", final=False, descriptive_name='Monthly anomaly', description='Chla Monthly anomaly', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MM>[0-9]{2})" + new_in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MM[0]}" + out_prod_ident ancillary_sprod = "monclim" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MM[0]}" + ancillary_sprod_ident @active_if(activate_monanom_comput) @transform(new_starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def modis_chla_mondiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_oper_subtraction(**args) return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, update_stats=False, nrt_products=True): # switch wrt groups - according to options # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # DEFAULT: ALL off activate_10dstats_comput = 0 # 10d stats activate_10danomalies_comput = 0 # 10d anomalies activate_monthly_comput = 0 # monthly cumulation activate_monstats_comput = 0 # monthly stats activate_monanomalies_comput = 0 # monthly anomalies if nrt_products: activate_monthly_comput = 1 # monthly cumulation activate_monanomalies_comput = 1 # monthly anomalies activate_10danomalies_comput = 1 # 2.d if update_stats: activate_10dstats_comput = 1 # 10d stats activate_monstats_comput = 1 # monthly stats # switch wrt single products: not to be changed !! activate_10davg_comput = 1 activate_10dmin_comput = 1 activate_10dmax_comput = 1 activate_10ddiff_comput = 1 activate_10dperc_comput = 1 activate_10dnp_comput = 1 activate_10dratio_comput = 1 activate_10dstd_comput = 1 activate_10dstandardized_comput = 1 activate_1moncum_comput = 1 activate_1monavg_comput = 1 activate_1monmin_comput = 1 activate_1monmax_comput = 1 activate_1mondiff_comput = 1 activate_1monstd_comput = 1 activate_1monperc_comput = 1 activate_1monnp_comput = 1 activate_1monratio_comput = 1 activate_1monstandardized_comput = 1 # subprocess_stats = None # Intialize the stats subprocess subprocess_stats = subprocess_std_stats.SubProcessStatsES2( prod=prod, starting_sprod=starting_sprod, mapset=mapset, version=version, starting_dates=starting_dates, proc_lists=proc_lists, frequency='10d') # --------------------------------------------------------------------- # Average # subprocess_stats.create_basic_stats_proclist('Average') subprocess_stats.do_average() @active_if(activate_10dstats_comput, activate_10davg_comput) @collate(subprocess_stats.starting_files, formatter(subprocess_stats.formatter_in), subprocess_stats.formatter_out) def std_vgt_10davg(input_file, output_file): subprocess_std_stats.compute_average(input_file, output_file) # --------------------------------------------------------------------- # Minimum # subprocess_stats.create_basic_stats_proclist('Minimum') subprocess_stats.do_minimum() @active_if(activate_10dstats_comput, activate_10dmin_comput) @collate(subprocess_stats.starting_files, formatter(subprocess_stats.formatter_in), subprocess_stats.formatter_out) def std_vgt_10dmin(input_file, output_file): subprocess_std_stats.compute_minimum(input_file, output_file) # --------------------------------------------------------------------- # Maximum # subprocess_stats.create_basic_stats_proclist('Maximum') subprocess_stats.do_maximum() @active_if(activate_10dstats_comput, activate_10dmax_comput) @collate(subprocess_stats.starting_files, formatter(subprocess_stats.formatter_in), subprocess_stats.formatter_out) def std_vgt_10dmax(input_file, output_file): subprocess_std_stats.compute_maximum(input_file, output_file) # --------------------------------------------------------------------- # standard deviation # subprocess_stats.create_basic_stats_proclist('standard_deviation') subprocess_stats.do_standard_deviation() @active_if(activate_10dstats_comput, activate_10dstd_comput) # @follows(std_vgt_10davg) @collate(subprocess_stats.starting_files, formatter(subprocess_stats.formatter_in), add_inputs(subprocess_stats.ancillary_input), subprocess_stats.formatter_out) def vgt_ndvi_10dstddev(input_file, output_file): subprocess_std_stats.compute_product_std_deviation( input_file, output_file) # Intialize the 10d Prods subprocess subprocess_prods = subprocess_std_prods.SubProcessProdsES2( prod=prod, starting_sprod=starting_sprod, mapset=mapset, version=version, starting_dates=starting_dates, proc_lists=proc_lists, frequency='10d') # --------------------------------------------------------------------- # 10dDiff # subprocess_prods.create_basic_anomaly_proclist('10dDiff') subprocess_prods.do_absolute_difference() @active_if(activate_10danomalies_comput, activate_10ddiff_comput) @transform(subprocess_prods.starting_files, formatter(subprocess_prods.formatter_in), add_inputs(subprocess_prods.ancillary_input), subprocess_prods.formatter_out) def std_vgt_10ddiff(input_file, output_file): subprocess_std_prods.compute_absolute_diff(input_file, output_file) # --------------------------------------------------------------------- # 10dperc # subprocess_prods.create_basic_anomaly_proclist('10dperc') subprocess_prods.do_percent_difference() # @follows(std_vgt_10davg) @active_if(activate_10danomalies_comput, activate_10dperc_comput) @transform(subprocess_prods.starting_files, formatter(subprocess_prods.formatter_in), add_inputs(subprocess_prods.ancillary_input), subprocess_prods.formatter_out) def std_vgt_10dperc(input_file, output_file): subprocess_std_prods.compute_percentage_diff_vs_avg( input_file, output_file) # --------------------------------------------------------------------- # 10dnp # subprocess_prods.create_basic_anomaly_proclist('10dnp') subprocess_prods.do_normalized_anomaly() @active_if(activate_10danomalies_comput, activate_10dnp_comput) @transform(subprocess_prods.starting_files, formatter(subprocess_prods.formatter_in), add_inputs(subprocess_prods.ancillary_input_1, subprocess_prods.ancillary_input_2), subprocess_prods.formatter_out) def std_vgt_10dnp(input_file, output_file): subprocess_std_prods.compute_normalized_anomaly( input_file, output_file) # --------------------------------------------------------------------- # 10dratio # subprocess_prods.create_basic_anomaly_proclist('10dratio') subprocess_prods.do_ratio() @active_if(activate_10danomalies_comput, activate_10dratio_comput) @transform(subprocess_prods.starting_files, formatter(subprocess_prods.formatter_in), add_inputs(subprocess_prods.ancillary_input_1), subprocess_prods.formatter_out) def std_vgt_10dratio(input_file, output_file): subprocess_std_prods.compute_product_ratio(input_file, output_file) # --------------------------------------------------------------------- # Standardized 10d product # subprocess_prods.create_basic_anomaly_proclist('10standardized') subprocess_prods.do_standardized_prod() @active_if(activate_10danomalies_comput, activate_10dstandardized_comput) @transform(subprocess_prods.starting_files_10ddiff, formatter(subprocess_prods.formatter_in), add_inputs(subprocess_prods.ancillary_input), subprocess_prods.formatter_out) def vgt_ndvi_10dsndvi(input_file, output_file): subprocess_std_prods.compute_standardized_products( input_file, output_file) # --------------------------------------------------------------------- # 3.a monthly product (avg) # --------------------------------------------------------------------- subprocess_prods.do_monthly_prod() @active_if(activate_monthly_comput, activate_1moncum_comput) @collate(subprocess_prods.starting_files, formatter(subprocess_prods.formatter_in), subprocess_prods.formatter_out) def vgt_mon_prod(input_file, output_file): subprocess_std_prods.compute_monthly_prod_from_10d( input_file, output_file) # --------------------------------------------------------------------- # 3.b monthly masks # --------------------------------------------------------------------- # input_subprod_monthly = "mon"+starting_sprod # #output_sprod = proc_lists.proc_add_subprod("monndvi", "monthly_prod", False, True) # # in_prod_ident_monthly = functions.set_path_filename_no_date(prod, input_subprod_monthly,mapset, version, ext) # # input_dir_monthly =es2_data_dir+ \ # functions.set_path_sub_directory(prod, input_subprod_monthly, 'Derived', version, mapset) # # starting_files_mon_prod = input_dir_monthly+"*"+in_prod_ident_monthly # # # --------------------------------------------------------------------- # # 3.b monthly stats # # --------------------------------------------------------------------- # # # --------------------------------------------------------------------- # # avg x month # Intialize the stats subprocess by changing the frequency to monthly subprocess_stats.change_subProds_params(starting_sprod=starting_sprod, frequency='month') # subprocess_stats = subprocess_std_stats.SubProcessStatsES2(prod=prod, starting_sprod=starting_sprod, mapset=mapset, # version=version, starting_dates=starting_dates, # proc_lists=proc_lists, frequency='month') subprocess_stats.do_average() @active_if(activate_monstats_comput, activate_1monavg_comput) @collate(subprocess_stats.starting_files_mon_prod, formatter(subprocess_stats.formatter_in), subprocess_stats.formatter_out) @follows(vgt_mon_prod) def vgt_1monavg(input_file, output_file): subprocess_std_stats.compute_average(input_file, output_file) # # --------------------------------------------------------------------- # # min x month subprocess_stats.do_minimum() @active_if(activate_monstats_comput, activate_1monmin_comput) @collate(subprocess_stats.starting_files_mon_prod, formatter(subprocess_stats.formatter_in), subprocess_stats.formatter_out) @follows(vgt_1monavg) def vgt_1monmin(input_file, output_file): subprocess_std_stats.compute_minimum(input_file, output_file) # # # --------------------------------------------------------------------- # # NDV max x month subprocess_stats.do_maximum() @active_if(activate_monstats_comput, activate_1monmax_comput) @collate(subprocess_stats.starting_files_mon_prod, formatter(subprocess_stats.formatter_in), subprocess_stats.formatter_out) @follows(vgt_1monmin) def vgt_1monmax(input_file, output_file): subprocess_std_stats.compute_maximum(input_file, output_file) # # --------------------------------------------------------------------- # # Monthly standard deviation () -> subprocess_stats.do_standard_deviation() @active_if(activate_monstats_comput, activate_1monstd_comput) @follows(vgt_1monavg) @collate(subprocess_stats.starting_files_mon_prod, formatter(subprocess_stats.formatter_in), add_inputs(subprocess_stats.ancillary_input), subprocess_stats.formatter_out) def vgt_1monstddev(input_file, output_file): subprocess_std_stats.compute_product_std_deviation( input_file, output_file) # --------------------------------------------------------------------- # 3.d Product monthly anomalies # --------------------------------------------------------------------- # Intialize the 10d Prods subprocess subprocess_prods.change_subProds_params(starting_sprod=starting_sprod, frequency='month') # subprocess_prods = subprocess_std_prods.SubProcessProdsES2(prod=prod, starting_sprod=starting_sprod, mapset=mapset, version=version, # starting_dates=starting_dates, proc_lists=proc_lists, frequency='month') # --------------------------------------------------------------------- # Absolute Difference x month subprocess_prods.do_absolute_difference() @active_if(activate_monanomalies_comput, activate_1mondiff_comput) # @follows(vgt_1monavg) @transform(subprocess_prods.starting_files_mon_prod, formatter(subprocess_prods.formatter_in), add_inputs(subprocess_prods.ancillary_input), subprocess_prods.formatter_out) def vgt_1mondiff(input_file, output_file): subprocess_std_prods.compute_absolute_diff(input_file, output_file) # --------------------------------------------------------------------- # 1monperc subprocess_prods.do_percent_difference() # @follows(vgt_1monavg) @active_if(activate_monanomalies_comput, activate_1monperc_comput) @transform(subprocess_prods.starting_files_mon_prod, formatter(subprocess_prods.formatter_in), add_inputs(subprocess_prods.ancillary_input), subprocess_prods.formatter_out) def vgt_1monperc(input_file, output_file): subprocess_std_prods.compute_percentage_diff_vs_avg( input_file, output_file) # --------------------------------------------------------------------- # Monthly ratio (linearx2/avg) -> 0-100 % value subprocess_prods.do_ratio() @active_if(activate_monanomalies_comput, activate_1monratio_comput) @transform(subprocess_prods.starting_files_mon_prod, formatter(subprocess_prods.formatter_in), add_inputs(subprocess_prods.ancillary_input), subprocess_prods.formatter_out) # @follows(vgt_1monavg) def vgt_monthly_ratio(input_file, output_file): subprocess_std_prods.compute_product_ratio(input_file, output_file) # --------------------------------------------------------------------- # 1monnp subprocess_prods.do_normalized_anomaly() # @follows(vgt_1monmax, vgt_1monmin) @active_if(activate_monanomalies_comput, activate_1monnp_comput) @transform(subprocess_prods.starting_files_mon_prod, formatter(subprocess_prods.formatter_in), add_inputs(subprocess_prods.ancillary_input_1, subprocess_prods.ancillary_input_2), subprocess_prods.formatter_out) def vgt_1monnp(input_file, output_file): subprocess_std_prods.compute_normalized_anomaly( input_file, output_file) # --------------------------------------------------------------------- # Standardized Product subprocess_prods.do_standardized_prod() @active_if(activate_monanomalies_comput, activate_1monstandardized_comput) # @follows(vgt_1mondiff, vgt_1monstddev) @transform(subprocess_prods.starting_files_mondiff, formatter(subprocess_prods.formatter_in), add_inputs(subprocess_prods.ancillary_input), subprocess_prods.formatter_out) def vgt_prod_1monstdprod(input_file, output_file): subprocess_std_prods.compute_standardized_products( input_file, output_file) return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, list_subprods=None, update_stats=False, nrt_products=True): # --------------------------------------------------------------------- # Create lists proc_lists = functions.ProcLists() if list_subprods is not None: list_subprods = proc_lists.list_subprods list_subprod_groups = proc_lists.list_subprod_groups # Set DEFAULTS: all off activate_10danomalies_comput = 0 # 10d anomalies activate_monthly_comput = 0 # monthly cumulation activate_monanomalies_comput = 0 # monthly anomalies activate_10dstats_comput = 0 # 10d stats activate_monstats_comput = 0 # 1mon stats # switch wrt groups - according to options if nrt_products: activate_10danomalies_comput = 1 # 10d anomalies activate_monthly_comput = 1 # monthly cumulation activate_monanomalies_comput = 1 # monthly anomalies if update_stats: activate_10dstats_comput = 1 # 10d stats activate_monstats_comput = 1 # 1mon stats # switch wrt single products: not to be changed !! activate_10davg_comput = 1 activate_10dmin_comput = 1 activate_10dmax_comput = 1 activate_10ddiff_comput = 1 activate_10dperc_comput = 1 activate_10dnp_comput = 1 activate_1moncum_comput = 1 activate_1monavg_comput = 1 activate_1monmin_comput = 1 activate_1monmax_comput = 1 activate_1mondiff_comput = 1 activate_1monperc_comput = 1 activate_1monnp_comput = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) logger.debug('Base data directory is: %s' % es2_data_dir) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) logger.debug('Input data directory is: %s' % input_dir) if starting_dates is not None: starting_files = [] for my_date in starting_dates: starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident logger.debug('Starting files wild card is: %s' % starting_files) # --------------------------------------------------------------------- # Average output_sprod_group = proc_lists.proc_add_subprod_group("10dstats") output_sprod = proc_lists.proc_add_subprod("10davg", "10dstats", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10davg_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_precip_10davg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Minimum output_sprod = proc_lists.proc_add_subprod("10dmin", "10dstats", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dmin_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_precip_10dmin(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # Maximum output_sprod = proc_lists.proc_add_subprod("10dmax", "10dstats", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dmax_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_precip_10dmax(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # 10dDiff output_sprod_group = proc_lists.proc_add_subprod_group("10anomalies") output_sprod = proc_lists.proc_add_subprod("10ddiff", "10anomalies", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10davg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @follows(std_precip_10davg) @active_if(activate_10danomalies_comput, activate_10ddiff_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_precip_10ddiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_oper_subtraction(**args) # --------------------------------------------------------------------- # 10dperc output_sprod = proc_lists.proc_add_subprod("10dperc", "10anomalies", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10davg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @follows(std_precip_10davg) @active_if(activate_10danomalies_comput, activate_10dperc_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_precip_10dperc(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_compute_perc_diff_vs_avg(**args) # --------------------------------------------------------------------- # 10dnp output_sprod = proc_lists.proc_add_subprod("10dnp", "10anomalies", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + min + max formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod_1 = "10dmin" ancillary_sprod_ident_1 = functions.set_path_filename_no_date( prod, ancillary_sprod_1, mapset, version, ext) ancillary_subdir_1 = functions.set_path_sub_directory( prod, ancillary_sprod_1, 'Derived', version, mapset) ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1 ancillary_sprod_2 = "10dmax" ancillary_sprod_ident_2 = functions.set_path_filename_no_date( prod, ancillary_sprod_2, mapset, version, ext) ancillary_subdir_2 = functions.set_path_sub_directory( prod, ancillary_sprod_2, 'Derived', version, mapset) ancillary_input_2 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_2 + "{MMDD[0]}" + ancillary_sprod_ident_2 @follows(std_precip_10dmin, std_precip_10dmax) @active_if(activate_10danomalies_comput, activate_10dnp_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input_1, ancillary_input_2), formatter_out) def std_precip_10dnp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file[0], "min_file": input_file[1], "max_file": input_file[2], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_make_vci(**args) # --------------------------------------------------------------------- # 1moncum output_sprod_group = proc_lists.proc_add_subprod_group("monthly") output_sprod = proc_lists.proc_add_subprod("1moncum", "monthly", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # inputs: files from same months formatter_in = "(?P<YYYYMM>[0-9]{6})(?P<DD>[0-9]{2})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" + '01' + out_prod_ident # @follows(std_precip_10davg) @active_if(activate_monthly_comput, activate_1moncum_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_precip_1moncum(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_cumulate(**args) # --------------------------------------------------------------------- # Monthly Average new_input_subprod = '1moncum' in_prod_ident = functions.set_path_filename_no_date( prod, new_input_subprod, mapset, version, ext) output_sprod_group = proc_lists.proc_add_subprod_group("monstat") output_sprod = proc_lists.proc_add_subprod("1monavg", "monstat", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @follows(std_precip_1moncum) @active_if(activate_monstats_comput, activate_1monavg_comput) @collate(std_precip_1moncum, formatter(formatter_in), formatter_out) def std_precip_1monavg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Minimum output_sprod = proc_lists.proc_add_subprod("1monmin", "monstat", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @follows(std_precip_1moncum) @active_if(activate_monstats_comput, activate_1monmin_comput) @collate(std_precip_1moncum, formatter(formatter_in), formatter_out) def std_precip_1monmin(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # Monthly Maximum output_sprod = proc_lists.proc_add_subprod("1monmax", "monstat", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) reg_ex_in = "[0-9]{4}([0-9]{4})" + in_prod_ident formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @follows(std_precip_1moncum) @active_if(activate_monstats_comput, activate_1monmax_comput) @collate(std_precip_1moncum, formatter(formatter_in), formatter_out) def std_precip_1monmax(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # 1monDiff output_sprod_group = proc_lists.proc_add_subprod_group("monanomalies") output_sprod = proc_lists.proc_add_subprod("1mondiff", "monanomalies", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # inputs # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "1monavg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @follows(std_precip_1monavg) @active_if(activate_monanomalies_comput, activate_1mondiff_comput) @transform(std_precip_1moncum, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_precip_1mondiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_oper_subtraction(**args) # --------------------------------------------------------------------- # 1monperc output_sprod = proc_lists.proc_add_subprod("1monperc", "monanomalies", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # inputs # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "1monavg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @follows(std_precip_1monavg) @active_if(activate_monanomalies_comput, activate_1monperc_comput) @transform(std_precip_1moncum, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_precip_1monperc(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_compute_perc_diff_vs_avg(**args) # --------------------------------------------------------------------- # 1monnp output_sprod = proc_lists.proc_add_subprod("1monnp", "monanomalies", False, True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + min + max formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod_1 = "1monmin" ancillary_sprod_ident_1 = functions.set_path_filename_no_date( prod, ancillary_sprod_1, mapset, version, ext) ancillary_subdir_1 = functions.set_path_sub_directory( prod, ancillary_sprod_1, 'Derived', version, mapset) ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1 ancillary_sprod_2 = "1monmax" ancillary_sprod_ident_2 = functions.set_path_filename_no_date( prod, ancillary_sprod_2, mapset, version, ext) ancillary_subdir_2 = functions.set_path_sub_directory( prod, ancillary_sprod_2, 'Derived', version, mapset) ancillary_input_2 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_2 + "{MMDD[0]}" + ancillary_sprod_ident_2 @follows(std_precip_1monmin, std_precip_1monmax) @active_if(activate_monanomalies_comput, activate_1monnp_comput) @transform(std_precip_1moncum, formatter(formatter_in), add_inputs(ancillary_input_1, ancillary_input_2), formatter_out) def std_precip_1monnp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file[0], "min_file": input_file[1], "max_file": input_file[2], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_make_vci(**args)
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, starting_dates_stats=None, update_stats=False, nrt_products=True): # --------------------------------------------------------------------- # Create lists to store definition of the derived products, and their # groups. # Two starting dates ranges are passed: # # starting_dates: range - 1d frequency - for 1day -> 10dcount # Normally not used: only for tests (the number of 1day files i large!) # # starting_dates_stats: range - 10d frequency - for 10dcount -> 10dcountmin/max/avg # Used to define a specific range for stats, normally 20030101 -> <prev-year>1221 # # For the 10d products anomalies (both 1km and 10km) ALL available files are used for anomaly computation # # --------------------------------------------------------------------- if proc_lists is None: proc_lists = functions.ProcLists() # --------------------------------------------------------------------- # Define and assign the flags to control the individual derived products # and the groups. NOT to be changed by the User # --------------------------------------------------------------------- # Set DEFAULTS: all off activate_10dcount_comput = 0 # 2.a - 10d count activate_10dstats_comput = 0 # 2.b - 10d stats activate_10danomalies_comput = 0 # 2.c - 10d anomalies activate_10d_10k_comput = 0 # 3.a - 10d on 10km cells activate_10d_10k_stats_comput = 0 # 3.b - 10d on 10km statistics activate_10d_10k_anom_comput = 0 # 3.c - 10d on 10km anomalies # switch wrt groups - according to options if nrt_products: activate_10dcount_comput = 1 # 10d count activate_10danomalies_comput = 1 # 10d anomalies activate_10d_10k_comput = 1 # 10d on 10k activate_10d_10k_anom_comput = 1 # 10d on 10km anomalies if update_stats: activate_10dstats_comput = 1 # 10d stats activate_10d_10k_stats_comput = 1 # 10d on 10km statistics # Switch wrt single products: not to be changed !! # 2.b -> 10d stats activate_10dcountavg_comput = 1 activate_10dcountmin_comput = 1 activate_10dcountmax_comput = 1 # 2.c -> 10d anomalies activate_10ddiff_comput = 1 # 3.a -> 10d on 10 km activate_10dcount10k_comput = 1 # 3.b -> 10d on 10 km stats activate_10dcount10kavg_comput = 1 activate_10dcount10kmin_comput = 1 activate_10dcount10kmax_comput = 1 # 3.c -> 10d on 10 km anomalies activate_10dcount10kdiff_comput = 1 activate_10dcount10kperc_comput = 1 activate_10dcount10kratio_comput = 1 # --------------------------------------------------------------------- # Define the 'grid' file for the 10k count conversion # If it does not exists, disable computation # --------------------------------------------------------------------- grid_mapset_name = 'SPOTV-Africa-1km' # grid_file='/eStation2/layers/Mask_Africa_SPOTV_10km.tif' grid_file = es_constants.es2globals[ 'estation2_layers_dir'] + os.path.sep + 'Mask_Africa_SPOTV_10km.tif' if not os.path.isfile(grid_file): activate_10d_10k_comput = 0 # 10d on 10km activate_10d_10k_anom_comput = 0 # 10d on 10km anomalies activate_10d_10k_stats_comput = 0 # 10d on 10km statistics es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files from the starting_sprod and starting_dates arguments # --------------------------------------------------------------------- in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) # logger.debug('Base data directory is: %s' % es2_data_dir) input_dir = es2_data_dir + \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) # starting_dates -> 1 day if starting_dates is not None: starting_files_1day = [] for my_date in starting_dates: starting_files_1day.append(input_dir + my_date + in_prod_ident) else: starting_files_1day = glob.glob(input_dir + "*" + in_prod_ident) # --------------------------------------------------------------------- # Derived product: 10dcount # --------------------------------------------------------------------- output_sprod_group = proc_lists.proc_add_subprod_group("10dcount") output_sprod = proc_lists.proc_add_subprod( "10dcount", "10dcount", final=False, descriptive_name='10d Count', description='Fire Count for dekad', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident_10dcount = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_10dcount = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_10dcount(): # Look for all input files in input_dir, and sort them input_files = starting_files_1day dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_10dcount + os.path.sep + my_dekad_str + out_prod_ident_10dcount yield (file_list, output_file) @active_if(activate_10dcount_comput) @files(generate_parameters_10dcount) def std_fire_10dcount(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_cumulate(**args) # --------------------------------------------------------------------- # Derived product: 10dcountavg # --------------------------------------------------------------------- if starting_dates_stats is not None: files_10dcount_4stats = [] for my_date in starting_dates_stats: files_10dcount_4stats.append(es2_data_dir + output_subdir_10dcount + my_date + out_prod_ident_10dcount) else: files_10dcount_4stats = es2_data_dir + output_subdir_10dcount + "*" + out_prod_ident_10dcount output_sprod_group = proc_lists.proc_add_subprod_group("10dstats") output_sprod = proc_lists.proc_add_subprod( "10dcountavg", "10dstats", final=False, descriptive_name='10d Fire Average', description='Average fire for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dcountavg_comput) @collate(files_10dcount_4stats, formatter(formatter_in), formatter_out) @follows(std_fire_10dcount) def std_fire_10dcountavg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Float32', 'input_nodata': -32768 } # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type':'Float32', 'input_nodata':0} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Derived product: 10dcountmin # --------------------------------------------------------------------- output_sprod = proc_lists.proc_add_subprod( "10dcountmin", "10dstats", final=False, descriptive_name='10d Fire Minimum', description='Minimum Fire for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dcountmin_comput) @collate(files_10dcount_4stats, formatter(formatter_in), formatter_out) @follows(std_fire_10dcountavg) def std_fire_10dcountmin(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) # The coded value (nodata=0) leads to the wrong result args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", "input_nodata": -32768 } raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # Derived product: 10dcountmax # --------------------------------------------------------------------- output_sprod = proc_lists.proc_add_subprod( "10dcountmax", "10dstats", final=False, descriptive_name='10d Maximum', description='Maximum rainfall for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dcountmax_comput) @collate(files_10dcount_4stats, formatter(formatter_in), formatter_out) @follows(std_fire_10dcountmin) def std_fire_10dcountmax(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # Derived product: 10dDiff # --------------------------------------------------------------------- # Define the input files for conversion to 10k on the basis of the 'starting_dates' (not 'starting_dates_stats') if starting_dates is not None: files_10dcount_4anom = [] use_dates_10dcount = proc_functions.get_list_dates_for_dataset( prod, '10dcount', version, start_date=starting_dates[0], end_date=starting_dates[-1]) for my_date in use_dates_10dcount: files_10dcount_4anom.append(es2_data_dir + output_subdir_10dcount + my_date + out_prod_ident_10dcount) else: files_10dcount_4anom = glob.glob(es2_data_dir + output_subdir_10dcount + "*" + out_prod_ident_10dcount) output_sprod_group = proc_lists.proc_add_subprod_group("10danomalies") output_sprod = proc_lists.proc_add_subprod( "10dcountdiff", "10danomalies", final=False, descriptive_name='10d Absolute Difference', description='10d Absolute Difference vs. LTA', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10dcountavg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident # @follows(std_fire_10dcountavg) @active_if(activate_10danomalies_comput, activate_10ddiff_comput) @transform(files_10dcount_4anom, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) @follows(std_fire_10dcountmax) def std_fire_10dcountdiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type':'Float32', 'input_nodata':-32768} args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Float32', 'input_nodata': -32768, 'output_nodata': -32768 } raster_image_math.do_oper_subtraction(**args) # --------------------------------------------------------------------- # Derived product: 10dcount10km # --------------------------------------------------------------------- # target_mapset_name = 'SPOTV-Africa-10km' output_sprod_group = proc_lists.proc_add_subprod_group("10dcount10k") output_sprod_10dcount10k = proc_lists.proc_add_subprod( "10dcount10k", "10dcount10k", final=False, descriptive_name='10d Gridded at 10 km', description='10d Count Gridded at 10 km', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident_10dcount10k = functions.set_path_filename_no_date( prod, output_sprod_10dcount10k, target_mapset_name, version, ext) output_subdir_10dcount10k = functions.set_path_sub_directory( prod, output_sprod_10dcount10k, 'Derived', version, target_mapset_name) # Starting files + avg formatter_in = "(?P<YYYYMMDD>[0-9]{8})" + out_prod_ident_10dcount formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir_10dcount10k + "{YYYYMMDD[0]}" + out_prod_ident_10dcount10k @active_if(activate_10d_10k_comput, activate_10dcount10k_comput) @transform(files_10dcount_4anom, formatter(formatter_in), formatter_out) @follows(std_fire_10dcountdiff) def std_fire_10dcount10k(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='_' + os.path.basename(output_file), dir=es_constants.base_tmp_dir) # Temporary (not masked) file output_file_temp = tmpdir + os.path.sep + os.path.basename(output_file) input_mapset_name = mapset operation = 'sum' args = { "input_file": input_file, "grid_file": grid_file, "output_file": output_file_temp, "operation": operation, "input_mapset_name": input_mapset_name, "grid_mapset_name": grid_mapset_name, "output_format": None, 'nodata': -32768, "options": "compress=lzw", "output_type": 'Int16' } raster_image_math.do_stats_4_raster(**args) args = { "inputfile": output_file_temp, "output_file": output_file, "native_mapset_name": grid_mapset_name, "target_mapset_name": target_mapset_name } raster_image_math.do_reproject(**args) shutil.rmtree(tmpdir) # --------------------------------------------------------------------- # Derived product: 10dcount10kavg # --------------------------------------------------------------------- if starting_dates_stats is not None: files_10dcount10k_4stats = [] for my_date in starting_dates_stats: files_10dcount10k_4stats.append(es2_data_dir + output_subdir_10dcount10k + my_date + out_prod_ident_10dcount10k) else: files_10dcount10k_4stats = es2_data_dir + output_subdir_10dcount10k + "*" + out_prod_ident_10dcount10k output_sprod_group = proc_lists.proc_add_subprod_group("10dcount10kstats") output_sprod = proc_lists.proc_add_subprod( "10dcount10kavg", "10dcount10kstats", final=False, descriptive_name='10d Fire count 10km Average', description='10d Fire count 10km Average', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, target_mapset_name, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, target_mapset_name) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10d_10k_stats_comput, activate_10dcount10kavg_comput) @collate(files_10dcount10k_4stats, formatter(formatter_in), formatter_out) @follows(std_fire_10dcount10k) def std_fire_10dcount10kavg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Float32', 'input_nodata': -32768 } # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Float32', 'input_nodata': 0} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Derived product: 10dcount10kmin # --------------------------------------------------------------------- output_sprod_group = proc_lists.proc_add_subprod_group("10dcount10kstats") output_sprod = proc_lists.proc_add_subprod( "10dcount10kmin", "10dcount10kstats", final=False, descriptive_name='10d Fire count 10km minimum', description='10d Fire count 10km minimum', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, target_mapset_name, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, target_mapset_name) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10d_10k_stats_comput, activate_10dcount10kmin_comput) @collate(files_10dcount10k_4stats, formatter(formatter_in), formatter_out) @follows(std_fire_10dcount10kavg) def std_fire_10dcount10kmin(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Int16', 'input_nodata': -32768 } # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Int16', 'input_nodata': 0} raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # Derived product: 10dcount10kmax # --------------------------------------------------------------------- output_sprod_group = proc_lists.proc_add_subprod_group("10dcount10kstats") output_sprod = proc_lists.proc_add_subprod( "10dcount10kmax", "10dcount10kstats", final=False, descriptive_name='10d Fire count 10km maximum', description='10d Fire count 10km maximum', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, target_mapset_name, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, target_mapset_name) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident @active_if(activate_10d_10k_stats_comput, activate_10dcount10kmax_comput) @collate(files_10dcount10k_4stats, formatter(formatter_in), formatter_out) @follows(std_fire_10dcount10kmin) def std_fire_10dcount10kmax(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Int16', 'input_nodata': -32768 } # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Int16', 'input_nodata': 0} raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # Derived product: 10dcount10kdiff # --------------------------------------------------------------------- # Define the input files for conversion to 10k on the basis of the 'starting_dates' (not 'starting_dates_stats') if starting_dates is not None: files_10dcount10k_4anom = [] use_dates_10dcount10k = proc_functions.get_list_dates_for_dataset( prod, '10dcount10k', version, start_date=starting_dates[0], end_date=starting_dates[-1]) for my_date in use_dates_10dcount10k: files_10dcount10k_4anom.append(es2_data_dir + output_subdir_10dcount10k + my_date + out_prod_ident_10dcount10k) else: files_10dcount10k_4anom = glob.glob(es2_data_dir + output_subdir_10dcount10k + "*" + out_prod_ident_10dcount10k) output_sprod_group = proc_lists.proc_add_subprod_group( "10dcount10kanomalies") output_sprod = proc_lists.proc_add_subprod( "10dcount10kdiff", "10dcount10kanomalies", final=False, descriptive_name='10d 10 km Absolute Difference', description='10d 10 km Absolute Difference vs. LTA', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, target_mapset_name, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, target_mapset_name) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10dcount10kavg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, target_mapset_name, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, target_mapset_name) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @active_if(activate_10d_10k_anom_comput, activate_10dcount10kdiff_comput) @transform(files_10dcount10k_4anom, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) @follows(std_fire_10dcount10kmax) def std_fire_10dcount10kdiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Float32', 'input_nodata': -32768, 'output_nodata': -32768 } # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type':'Float32', 'input_nodata':-32768} raster_image_math.do_oper_subtraction(**args) # --------------------------------------------------------------------- # Derived product: 10dcount10kperc # --------------------------------------------------------------------- output_sprod_group = proc_lists.proc_add_subprod_group( "10dcount10kanomalies") output_sprod = proc_lists.proc_add_subprod( "10dcount10kperc", "10dcount10kanomalies", final=False, descriptive_name='10d 10 km Percent Difference', description='10d 10 km Percent Difference vs. LTA', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, target_mapset_name, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, target_mapset_name) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10dcount10kavg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, target_mapset_name, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, target_mapset_name) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @active_if(activate_10d_10k_anom_comput, activate_10dcount10kperc_comput) @transform(files_10dcount10k_4anom, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) @follows(std_fire_10dcount10kdiff) def std_fire_10dcount10kperc(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) # args = {"input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type':'Float32', 'input_nodata':-32768} args = { "input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Float32', 'input_nodata': -32768, 'output_nodata': -32768 } raster_image_math.do_compute_perc_diff_vs_avg(**args) # --------------------------------------------------------------------- # Derived product: 10dcount10kratio # --------------------------------------------------------------------- output_sprod_group = proc_lists.proc_add_subprod_group( "10dcount10kanomalies") output_sprod = proc_lists.proc_add_subprod( "10dcount10kratio", "10dcount10kanomalies", final=False, descriptive_name='10d 10 km Ratio with AVG', description='10d 10 km Ratio with LTA AVG', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, target_mapset_name, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, target_mapset_name) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10dcount10kavg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, target_mapset_name, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, target_mapset_name) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @active_if(activate_10d_10k_anom_comput, activate_10dcount10kratio_comput) @transform(files_10dcount10k_4anom, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) @follows(std_fire_10dcount10kperc) def std_fire_10dcount10kratio(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type':'Float32', 'input_nodata':-32768} args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Float32', 'input_nodata': -32768, 'output_nodata': -32768 } raster_image_math.do_oper_division_perc(**args) # # End of pipeline definition return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None): my_date = None # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() activate_opfish_computation = 1 #activate_shapefile_conversion = 1 sds_meta = metadata.SdsMetadata() es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files (chla) in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir + functions.set_path_sub_directory( prod, starting_sprod, 'Ingest', version, mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: if functions.is_file_exists_in_path( input_dir + my_date + in_prod_ident ): # ES2 450 #+++++++ Check file exists before appending +++++++++++++++ starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # 1. Define and customize parameters # --------------------------------------------------------------------- parameters = { 'chl_grad_min': 0.00032131, # smaller window detects more fronts 'chl_grad_int': 0.021107, 'chl_feed_min': 0.08, 'chl_feed_max': 11.0, # Temperature: 0.45 deg (multiply by 100 !!) 'dc': 0.91 } if prod == 'modis-chla': parameters = { 'chl_grad_min': 0.00032131, # smaller window detects more fronts 'chl_grad_int': 0.021107, 'chl_feed_min': 0.08, 'chl_feed_max': 11.0, # Temperature: 0.45 deg (multiply by 100 !!) 'dc': 0.91 } # --------------------------------------------------------------------- # Chal Gradient (raster) output_sprod_group = proc_lists.proc_add_subprod_group("gradient") output_sprod = proc_lists.proc_add_subprod( "opfish", "gradient", final=False, descriptive_name='Ocean Productive index for Fish', description='Ocean Productive index for Fish', frequency_id='', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) prod_ident_gradient = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) subdir_gradient = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMMDD>[0-9]{8})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + subdir_gradient + "{YYYYMMDD[0]}" + prod_ident_gradient ] @active_if(activate_opfish_computation) @transform(starting_files, formatter(formatter_in), formatter_out) def opfish_computation(input_file, output_file): no_data = int(sds_meta.get_nodata_value(input_file)) output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "nodata": no_data, "output_format": 'GTIFF', "options": "compress = lzw", "parameters": parameters } raster_image_math.compute_opFish_indicator(**args) print('Done with raster') return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None): land_mask = '/eStation2/static/sadc_mask_byte_1km.tif' tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='_' + os.path.basename(prod), dir=es_constants.base_tmp_dir) # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files (daily products) in_prod_ident_noext = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, '') in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, '.shp') input_dir = es2_data_dir + functions.set_path_sub_directory( prod, starting_sprod, 'Ingest', version, mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: if os.path.isfile(input_dir + my_date + in_prod_ident): starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = glob.glob(input_dir + "*" + in_prod_ident) # --------------------------------------------------------------------- # Define output files (1dmeas-interp) output_sprod = proc_lists.proc_add_subprod( "1d-grf", "none", final=False, descriptive_name='1 Day interpolated', description='1 Day interpolated', frequency_id='e1day', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) prod_ident_1d_gRf = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) subdir_1d_gRf = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMMDD>[0-9]{8})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + subdir_1d_gRf + "{YYYYMMDD[0]}" + prod_ident_1d_gRf @active_if(activate_1d_gRf_comput) @transform(starting_files, formatter(formatter_in), formatter_out) def std_precip_1d_gRf(input_file, output_file): functions.check_output_dir(os.path.dirname(output_file)) my_date = functions.get_date_from_path_full(input_file) layer = my_date + in_prod_ident_noext mapset_obj = MapSet() mapset_obj.assigndb(mapset) # Prepare tmpfile output_file_tmp = tmpdir + os.path.basename(output_file) # Extract info from mapset size_x = mapset_obj.size_x size_y = mapset_obj.size_y geo_transform = mapset_obj.geo_transform pixel_shift_x = geo_transform[1] pixel_shift_y = geo_transform[5] ulx = geo_transform[0] uly = geo_transform[3] lrx = ulx + pixel_shift_x * size_x lry = uly + pixel_shift_y * size_y txe = str(ulx) + ' ' + str(lrx) tye = str(uly) + ' ' + str(lry) te = str(ulx) + ' ' + str(lry) + ' ' + str(lrx) + ' ' + str(uly) tr = str(pixel_shift_x) + ' ' + str(pixel_shift_x) outsize = str(size_x) + ' ' + str(size_y) # Interpolate at the original resolution (no outsize) command = 'gdal_grid '\ + ' -ot Float32 -of GTiff -co "compress=LZW" ' \ + ' -txe ' + txe\ + ' -tye ' + tye\ + ' -zfield precipitat '\ + ' -l '+layer \ + ' -a invdist:power=2.0:smooting=1:radius1=0.0:radius2=0.0:angle=0.0:max_points=0:min_points=0:nodata:0.0 '\ + input_file +' '+output_file_tmp try: os.system(command) except: pass # Interpolate at the original resolution command = 'gdalwarp '\ + '-t_srs "EPSG:4326" '\ + ' -of GTiff -co "compress=LZW" ' \ + ' -te ' + te\ + ' -tr ' + tr+ ' '\ + output_file_tmp +' '+output_file # + ' -ts ' + outsize \ try: print command os.system(command) except: pass try: shutil.rmtree(tmpdir) except: print('Error in removing temporary directory. Continue') raise NameError('Error in removing tmpdir') # --------------------------------------------------------------------- # Define output files (10dmeas) output_sprod = proc_lists.proc_add_subprod( "10d-grf", "none", final=False, descriptive_name='10 Day interpolated cum', description='10 Day interpolated cumulate', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) prod_ident_10dmeas = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) subdir_10dmeas = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) def generate_parameters_10dmeas(): dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in starting_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) expected_days = functions.day_per_dekad(my_dekad_str) for input_file in starting_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(input_file) output_file = es_constants.processing_dir + subdir_10dmeas + os.path.sep + my_dekad_str + prod_ident_10dmeas if len(file_list) >= expected_days - 1: yield (file_list, output_file) else: print 'Too many missing files for dekad {0}'.format( my_dekad_str) @active_if(activate_10d_gRf_comput) @files(generate_parameters_10dmeas) def std_precip_10dmeas(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) # Prepare temporary working directory for intermediate results tmpdirpath = tempfile.mkdtemp() # Cumulated but not masked output tmp_output_file = tmpdirpath + os.path.sep + os.path.basename( output_file) # Call the function for cumulating args = { "input_file": input_file, "output_file": tmp_output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_cumulate(**args) # Call the function for masking args = { "input_file": tmp_output_file, "mask_file": land_mask, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", "mask_value": 0, "out_value": 0 } raster_image_math.do_mask_image(**args) # Remove temp directory shutil.rmtree(tmpdirpath)
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, update_stats=False, nrt_products=True): # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all off activate_10dstats_comput = 0 # 10d stats activate_10danomalies_comput = 0 # 10d anomalies activate_monthly_comput = 0 # monthly cumulation activate_monstats_comput = 0 # monthly stats activate_monanomalies_comput = 0 # monthly anomalies # switch wrt groups - according to options if nrt_products: activate_10danomalies_comput = 1 # 10d anomalies activate_monthly_comput = 1 # monthly cumulation activate_monanomalies_comput = 1 # monthly anomalies if update_stats: activate_10dstats_comput = 1 # 10d stats activate_monstats_comput = 1 # monthly stats # switch wrt single products: not to be changed !! activate_10davg_comput = 1 activate_10dmin_comput = 1 activate_10dmax_comput = 1 activate_10ddiff_comput = 1 activate_10dperc_comput = 1 activate_10dnp_comput = 1 activate_10dratio_comput = 1 activate_1moncum_comput = 1 activate_1monavg_comput = 1 activate_1monmin_comput = 1 activate_1monmax_comput = 1 activate_1mondiff_comput = 1 activate_1monperc_comput = 1 activate_1monnp_comput = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) #logger.debug('Base data directory is: %s' % es2_data_dir) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # Average output_sprod_group = proc_lists.proc_add_subprod_group("10dstats") output_sprod = proc_lists.proc_add_subprod( "10davg", "10dstats", final=False, descriptive_name='10d Average', description='Average rainfall for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10davg_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_precip_10davg(input_file, output_file): reduced_list = exclude_current_year(input_file) output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Minimum output_sprod = proc_lists.proc_add_subprod( "10dmin", "10dstats", final=False, descriptive_name='10d Minimum', description='Minimum rainfall for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dmin_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_precip_10dmin(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # Maximum output_sprod = proc_lists.proc_add_subprod( "10dmax", "10dstats", final=False, descriptive_name='10d Maximum', description='Maximum rainfall for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dmax_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_precip_10dmax(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # 10dDiff output_sprod_group = proc_lists.proc_add_subprod_group("10anomalies") output_sprod = proc_lists.proc_add_subprod( "10ddiff", "10anomalies", final=False, descriptive_name='10d Absolute Difference', description='10d Absolute Difference vs. LTA', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10davg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @follows(std_precip_10davg) @active_if(activate_10danomalies_comput, activate_10ddiff_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_precip_10ddiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_oper_subtraction(**args) # --------------------------------------------------------------------- # 10dperc output_sprod = proc_lists.proc_add_subprod( "10dperc", "10anomalies", final=False, descriptive_name='10d Percent Difference', description='10d Percent Difference vs. LTA', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10davg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @follows(std_precip_10davg) @active_if(activate_10danomalies_comput, activate_10dperc_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_precip_10dperc(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_compute_perc_diff_vs_avg(**args) # --------------------------------------------------------------------- # 10dnp output_sprod = proc_lists.proc_add_subprod( "10dnp", "10anomalies", final=False, descriptive_name='10d Normalized Anomaly', description='10d Normalized Anomaly', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + min + max formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod_1 = "10dmin" ancillary_sprod_ident_1 = functions.set_path_filename_no_date( prod, ancillary_sprod_1, mapset, version, ext) ancillary_subdir_1 = functions.set_path_sub_directory( prod, ancillary_sprod_1, 'Derived', version, mapset) ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1 ancillary_sprod_2 = "10dmax" ancillary_sprod_ident_2 = functions.set_path_filename_no_date( prod, ancillary_sprod_2, mapset, version, ext) ancillary_subdir_2 = functions.set_path_sub_directory( prod, ancillary_sprod_2, 'Derived', version, mapset) ancillary_input_2 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_2 + "{MMDD[0]}" + ancillary_sprod_ident_2 @follows(std_precip_10dmin, std_precip_10dmax) @active_if(activate_10danomalies_comput, activate_10dnp_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input_1, ancillary_input_2), formatter_out) def std_precip_10dnp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file[0], "min_file": input_file[1], "max_file": input_file[2], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_make_vci(**args) # --------------------------------------------------------------------- # 10dratio output_sprod = proc_lists.proc_add_subprod( "10dratio", "10anomalies", final=False, descriptive_name='10d Ratio', description='10d Ratio (curr/avg)', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + min + max formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod_1 = "10davg" ancillary_sprod_ident_1 = functions.set_path_filename_no_date( prod, ancillary_sprod_1, mapset, version, ext) ancillary_subdir_1 = functions.set_path_sub_directory( prod, ancillary_sprod_1, 'Derived', version, mapset) ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1 @follows(std_precip_10dmin, std_precip_10dmax) @active_if(activate_10danomalies_comput, activate_10dratio_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input_1), formatter_out) def std_precip_10dratio(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress = lzw" } raster_image_math.do_oper_division_perc(**args) # --------------------------------------------------------------------- # 1moncum output_sprod_group = proc_lists.proc_add_subprod_group("monthly") output_sprod = proc_lists.proc_add_subprod( "1moncum", "monthly", final=False, descriptive_name='Monthly Cumulate', description='Monthly Cumulate Precipitation', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # inputs: files from same months formatter_in = "(?P<YYYYMM>[0-9]{6})(?P<DD>[0-9]{2})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" + '01' + out_prod_ident # @follows(std_precip_10davg) @active_if(activate_monthly_comput, activate_1moncum_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_precip_1moncum(input_file, output_file): #ES2- 235 Do not show temporary products like composite not complete (ex monthly composite available mid month...) # ex: monthly RFE in the middle of the month should not be available because incomplete and lead to wrong analysis... # Check current month ---> yes ---> skip # ----> NO ---> Check No of days (10% tolerance) # acceptable ----> # Yes ---> proceed # No ----> Skip input_file_date = functions.get_date_from_path_full(input_file[0]) if len(input_file) == 3: if not functions.is_date_current_month(input_file_date): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_cumulate(**args) # --------------------------------------------------------------------- # Monthly Average new_input_subprod = '1moncum' in_prod_ident = functions.set_path_filename_no_date( prod, new_input_subprod, mapset, version, ext) in_prod_subdir = functions.set_path_sub_directory(prod, new_input_subprod, 'Derived', version, mapset) starting_files = es2_data_dir + in_prod_subdir + "*" + in_prod_ident output_sprod_group = proc_lists.proc_add_subprod_group("monstat") output_sprod = proc_lists.proc_add_subprod( "1monavg", "monstat", final=False, descriptive_name='Monthly Average', description='Monthly Average Precipitation', frequency_id='e1month', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @follows(std_precip_1moncum) @active_if(activate_monstats_comput, activate_1monavg_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_precip_1monavg(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Minimum output_sprod = proc_lists.proc_add_subprod( "1monmin", "monstat", final=False, descriptive_name='Monthly Minimum', description='Monthly Minimum Precipitation', frequency_id='e1month', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @follows(std_precip_1moncum) @active_if(activate_monstats_comput, activate_1monmin_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_precip_1monmin(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # Monthly Maximum output_sprod = proc_lists.proc_add_subprod( "1monmax", "monstat", final=False, descriptive_name='Monthly Maximum', description='Monthly Maximum Precipitation', frequency_id='e1month', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) reg_ex_in = "[0-9]{4}([0-9]{4})" + in_prod_ident formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @follows(std_precip_1moncum) @active_if(activate_monstats_comput, activate_1monmax_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_precip_1monmax(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # 1monDiff output_sprod_group = proc_lists.proc_add_subprod_group("monanomalies") output_sprod = proc_lists.proc_add_subprod( "1mondiff", "monanomalies", final=False, descriptive_name='Monthly Absolute Difference', description='Monthly Absolute Difference Precipitation', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # inputs # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "1monavg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @follows(std_precip_1monavg) @active_if(activate_monanomalies_comput, activate_1mondiff_comput) @transform(std_precip_1moncum, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_precip_1mondiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_oper_subtraction(**args) # --------------------------------------------------------------------- # 1monperc output_sprod = proc_lists.proc_add_subprod( "1monperc", "monanomalies", final=False, descriptive_name='Monthly Percent Difference', description='Monthly Percent Difference Precipitation', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # inputs # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "1monavg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @follows(std_precip_1monavg) @active_if(activate_monanomalies_comput, activate_1monperc_comput) @transform(std_precip_1moncum, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_precip_1monperc(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_compute_perc_diff_vs_avg(**args) # --------------------------------------------------------------------- # 1monnp output_sprod = proc_lists.proc_add_subprod( "1monnp", "monanomalies", final=False, descriptive_name='Monthly Normalized Anomaly', description='Monthly Normalized Anomaly Precipitation', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + min + max formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod_1 = "1monmin" ancillary_sprod_ident_1 = functions.set_path_filename_no_date( prod, ancillary_sprod_1, mapset, version, ext) ancillary_subdir_1 = functions.set_path_sub_directory( prod, ancillary_sprod_1, 'Derived', version, mapset) ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1 ancillary_sprod_2 = "1monmax" ancillary_sprod_ident_2 = functions.set_path_filename_no_date( prod, ancillary_sprod_2, mapset, version, ext) ancillary_subdir_2 = functions.set_path_sub_directory( prod, ancillary_sprod_2, 'Derived', version, mapset) ancillary_input_2 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_2 + "{MMDD[0]}" + ancillary_sprod_ident_2 @follows(std_precip_1monmin, std_precip_1monmax) @active_if(activate_monanomalies_comput, activate_1monnp_comput) @transform(std_precip_1moncum, formatter(formatter_in), add_inputs(ancillary_input_1, ancillary_input_2), formatter_out) def std_precip_1monnp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file[0], "min_file": input_file[1], "max_file": input_file[2], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_make_vci(**args) return proc_lists
def test_ProcLists(self): # Create 'manually' an empty proc_list (normally done by pipeline) proc_lists = functions.ProcLists() self.assertEqual(type(proc_lists).__name__, 'ProcLists')
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, update_stats=False, nrt_products=True): # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # switch wrt groups - according to options # DEFAULT: ALL off activate_10dstats_comput = 0 # 10d stats activate_10danomalies_comput = 0 # 10d anomalies activate_monthly_comput = 0 # monthly cumulation activate_monstats_comput = 0 # monthly stats activate_monanomalies_comput = 0 # monthly anomalies if nrt_products: activate_monthly_comput = 0 # monthly cumulation activate_monanomalies_comput = 0 # monthly anomalies activate_10danomalies_comput = 1 # 2.d if update_stats: activate_10dstats_comput = 1 # 10d stats activate_monstats_comput = 0 # monthly stats # switch wrt single products: not to be changed !! activate_10davg_comput = 1 activate_10dmin_comput = 1 activate_10dmax_comput = 1 activate_10ddiff_comput = 1 activate_10dperc_comput = 1 activate_10dnp_comput = 0 activate_10dratio_comput = 1 activate_1moncum_comput = 1 activate_1monavg_comput = 1 activate_1monmin_comput = 1 activate_1monmax_comput = 1 activate_1mondiff_comput = 1 activate_1monperc_comput = 1 activate_1monnp_comput = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) #logger.debug('Base data directory is: %s' % es2_data_dir) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # Average output_sprod_group = proc_lists.proc_add_subprod_group("10dstats") output_sprod = proc_lists.proc_add_subprod( "10davg", "10dstats", final=False, descriptive_name='10d Average', description='Average dry matter productivity for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10davg_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_dmp_10davg(input_file, output_file): reduced_list = exclude_current_year(input_file) output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Minimum output_sprod = proc_lists.proc_add_subprod( "10dmin", "10dstats", final=False, descriptive_name='10d Minimum', description='Minimum DMP for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dmin_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_dmp_10dmin(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # Maximum output_sprod = proc_lists.proc_add_subprod( "10dmax", "10dstats", final=False, descriptive_name='10d Maximum', description='Maximum DMP for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dmax_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_dmp_10dmax(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # 10dDiff output_sprod_group = proc_lists.proc_add_subprod_group("10anomalies") output_sprod = proc_lists.proc_add_subprod( "10ddiff", "10anomalies", final=False, descriptive_name='10d Absolute Difference', description='10d Absolute Difference vs. LTA', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10davg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @active_if(activate_10danomalies_comput, activate_10ddiff_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_dmp_10ddiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_oper_subtraction(**args) # --------------------------------------------------------------------- # 10dperc output_sprod = proc_lists.proc_add_subprod( "10dperc", "10anomalies", final=False, descriptive_name='10d Percent Difference', description='10d Percent Difference vs. LTA', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10davg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @follows(std_dmp_10davg) @active_if(activate_10danomalies_comput, activate_10dperc_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def std_dmp_10dperc(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_compute_perc_diff_vs_avg(**args) # --------------------------------------------------------------------- # 10dnp output_sprod = proc_lists.proc_add_subprod( "10dnp", "10anomalies", final=False, descriptive_name='10d Normalized Anomaly', description='10d Normalized Anomaly', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + min + max formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod_1 = "10dmin" ancillary_sprod_ident_1 = functions.set_path_filename_no_date( prod, ancillary_sprod_1, mapset, version, ext) ancillary_subdir_1 = functions.set_path_sub_directory( prod, ancillary_sprod_1, 'Derived', version, mapset) ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1 ancillary_sprod_2 = "10dmax" ancillary_sprod_ident_2 = functions.set_path_filename_no_date( prod, ancillary_sprod_2, mapset, version, ext) ancillary_subdir_2 = functions.set_path_sub_directory( prod, ancillary_sprod_2, 'Derived', version, mapset) ancillary_input_2 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_2 + "{MMDD[0]}" + ancillary_sprod_ident_2 @active_if(activate_10danomalies_comput, activate_10dnp_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input_1, ancillary_input_2), formatter_out) def std_dmp_10dnp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file[0], "min_file": input_file[1], "max_file": input_file[2], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_make_vci(**args) # --------------------------------------------------------------------- # 10dratio output_sprod = proc_lists.proc_add_subprod( "10dratio", "10anomalies", final=False, descriptive_name='10d Ratio', description='10d Ratio (curr/avg)', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + min + max formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod_1 = "10davg" ancillary_sprod_ident_1 = functions.set_path_filename_no_date( prod, ancillary_sprod_1, mapset, version, ext) ancillary_subdir_1 = functions.set_path_sub_directory( prod, ancillary_sprod_1, 'Derived', version, mapset) ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1 @active_if(activate_10danomalies_comput, activate_10dratio_comput) @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input_1), formatter_out) def std_dmp_10dratio(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress = lzw" } raster_image_math.do_oper_division_perc(**args) return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None): # --------------------------------------------------------------------- # Create lists to store definition of the derived products, and their # groups # --------------------------------------------------------------------- if proc_lists is None: proc_lists = functions.ProcLists() # --------------------------------------------------------------------- # Define and assign the flags to control the individual derived products # and the groups. NOT to be changed by the User # --------------------------------------------------------------------- # Set DEFAULTS: all off activate_10dcumul_comput = 1 # 10d cumul activate_1moncum_comput = 1 # 1mon cumul activate_3moncum_comput = 1 # 3mon cumul activate_6moncum_comput = 1 # 6mon cumul activate_1yearcum_comput = 1 # 1year cumul # Conversion scale factor (from 0.01 of daily to 1.0 of all other products) scale_factor_conv = 0.01 # switch wrt groups - according to options es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files from the starting_sprod and starting_dates arguments # --------------------------------------------------------------------- in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) #logger.debug('Base data directory is: %s' % es2_data_dir) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: if os.path.isfile(input_dir + my_date + in_prod_ident): starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident # Look for all input files in input_dir, and sort them if starting_dates is not None: input_files = starting_files else: input_files = glob.glob(starting_files) # --------------------------------------------------------------------- # Derived product: 10dcumul # --------------------------------------------------------------------- output_sprod_group = proc_lists.proc_add_subprod_group("cumul") output_sprod = proc_lists.proc_add_subprod( "10d", "cumul", final=False, descriptive_name='10d Precipitation', description='Precipitation for dekad', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='Initial', active_default=True) out_prod_ident_10dcount = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_10dcount = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_10dcumul(): dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) expected_days = functions.day_per_dekad(my_dekad_str) for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_10dcount + os.path.sep + my_dekad_str + out_prod_ident_10dcount if len(file_list) >= expected_days - 1: yield (file_list, output_file) else: print('Too many missing filed for dekad {0}'.format( my_dekad_str)) @active_if(activate_10dcumul_comput) @files(generate_parameters_10dcumul) def std_precip_10dcumul(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw",\ "scale_factor":scale_factor_conv} raster_image_math.do_cumulate(**args) # --------------------------------------------------------------------- # Derived product: 1moncum # --------------------------------------------------------------------- output_sprod = proc_lists.proc_add_subprod( "1mon", "cumul", final=False, descriptive_name='Monthly Precipitation', description='Precipitation for a month', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='Initial', active_default=True) out_prod_ident_1moncum = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_1moncum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_1moncum(): month_list = [] # Create unique list of all months (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mymonth_yyyymm = str(mydate)[0:6] if mymonth_yyyymm not in month_list: month_list.append(mymonth_yyyymm) month_list = sorted(month_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_mon = today.strftime('%Y%m') for month in month_list: # Exclude the current dekad if month != today_mon: file_list = [] exp_days_last_dk = functions.day_per_dekad(month + '21') expected_days = int(exp_days_last_dk) + 20 for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydate_yyyymm = mydate_yyyymmdd[0:6] if mydate_yyyymm == month: file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_1moncum + os.path.sep + month + '01' + out_prod_ident_1moncum if len(file_list) >= expected_days - 3: yield (file_list, output_file) else: print('Too many missing filed for month {0}'.format(month)) @active_if(activate_1moncum_comput) @files(generate_parameters_1moncum) def std_precip_1moncum(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw",\ "scale_factor":scale_factor_conv} raster_image_math.do_cumulate(**args) # --------------------------------------------------------------------- # Derived product: 3moncum # --------------------------------------------------------------------- output_sprod = proc_lists.proc_add_subprod( "3mon", "cumul", final=False, descriptive_name='3 Months Precipitation', description='Precipitation for 3 months', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='Initial', active_default=True) out_prod_ident_3moncum = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_3moncum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_3moncum(): n_mon = 3 max_missing = 9 month_list = [] # Create unique list of all months (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mymonth_yyyymm = str(mydate)[0:6] if mymonth_yyyymm not in month_list: month_list.append(mymonth_yyyymm) month_list = sorted(month_list) # Compute the current month today = datetime.date.today() today_mon = today.strftime('%Y%m') for month in month_list: # Exclude the current dekad if month != today_mon: file_list = [] # Compute first - last date for current interval first_day_this_month = datetime.date(int(month[0:4]), int(month[4:6]), 1) first_day_next_month = first_day_this_month + relativedelta( months=+1) first_day_2_month_before = first_day_this_month + relativedelta( months=-n_mon + 1) delta_3mon = first_day_next_month - first_day_2_month_before expected_days = delta_3mon.days for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydate = datetime.date(int(mydate_yyyymmdd[0:4]), int(mydate_yyyymmdd[4:6]), int(mydate_yyyymmdd[6:8])) if first_day_2_month_before <= mydate < first_day_next_month: file_list.append(input_file) if len(file_list) >= expected_days - max_missing: output_file = es_constants.processing_dir + output_subdir_3moncum + os.path.sep + month + '01' + out_prod_ident_3moncum yield (file_list, output_file) else: print( 'Too many missing filed for 3moncum, period until: {0}' .format(month)) @active_if(activate_3moncum_comput) @files(generate_parameters_3moncum) def std_precip_3moncum(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw",\ "scale_factor":scale_factor_conv} raster_image_math.do_cumulate(**args) # --------------------------------------------------------------------- # Derived product: 6moncum # --------------------------------------------------------------------- output_sprod = proc_lists.proc_add_subprod( "6mon", "cumul", final=False, descriptive_name='6 Months Precipitation', description='Precipitation for 6 months', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='Initial', active_default=True) out_prod_ident_6moncum = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_6moncum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_6moncum(): n_mon = 6 max_missing = 18 month_list = [] # Create unique list of all months (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mymonth_yyyymm = str(mydate)[0:6] if mymonth_yyyymm not in month_list: month_list.append(mymonth_yyyymm) month_list = sorted(month_list) # Compute the current month today = datetime.date.today() today_mon = today.strftime('%Y%m') for month in month_list: # Exclude the current dekad if month != today_mon: file_list = [] # Compute first - last date for current interval first_day_this_month = datetime.date(int(month[0:4]), int(month[4:6]), 1) first_day_next_month = first_day_this_month + relativedelta( months=+1) first_day_2_month_before = first_day_this_month + relativedelta( months=-n_mon + 1) delta_3mon = first_day_next_month - first_day_2_month_before expected_days = delta_3mon.days for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydate = datetime.date(int(mydate_yyyymmdd[0:4]), int(mydate_yyyymmdd[4:6]), int(mydate_yyyymmdd[6:8])) if first_day_2_month_before <= mydate < first_day_next_month: file_list.append(input_file) if len(file_list) >= expected_days - max_missing: output_file = es_constants.processing_dir + output_subdir_6moncum + os.path.sep + month + '01' + out_prod_ident_6moncum yield (file_list, output_file) else: print( 'Too many missing filed for 6moncum, period until: {0}' .format(month)) @active_if(activate_6moncum_comput) @files(generate_parameters_6moncum) def std_precip_6moncum(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw",\ "scale_factor":scale_factor_conv} raster_image_math.do_cumulate(**args) # --------------------------------------------------------------------- # Derived product: 1yearcum # --------------------------------------------------------------------- output_sprod = proc_lists.proc_add_subprod( "1year", "cumul", final=False, descriptive_name='Yearly Precipitation', description='Precipitation for 1 year', frequency_id='e1month', date_format='YYYYMMDD', masked=False, timeseries_role='Initial', active_default=True) out_prod_ident_1yearcum = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_1yearcum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_1yearcum(): n_mon = 12 max_missing = 35 month_list = [] # Create unique list of all months (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mymonth_yyyymm = str(mydate)[0:6] if mymonth_yyyymm not in month_list: month_list.append(mymonth_yyyymm) month_list = sorted(month_list) # Compute the current month today = datetime.date.today() today_mon = today.strftime('%Y%m') for month in month_list: # Exclude the current dekad if month != today_mon: file_list = [] # Compute first - last date for current interval first_day_this_month = datetime.date(int(month[0:4]), int(month[4:6]), 1) first_day_next_month = first_day_this_month + relativedelta( months=+1) first_day_2_month_before = first_day_this_month + relativedelta( months=-n_mon + 1) delta_3mon = first_day_next_month - first_day_2_month_before expected_days = delta_3mon.days for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydate = datetime.date(int(mydate_yyyymmdd[0:4]), int(mydate_yyyymmdd[4:6]), int(mydate_yyyymmdd[6:8])) if first_day_2_month_before <= mydate < first_day_next_month: file_list.append(input_file) if len(file_list) >= expected_days - max_missing: output_file = es_constants.processing_dir + output_subdir_1yearcum + os.path.sep + month + '01' + out_prod_ident_1yearcum yield (file_list, output_file) else: print( 'Too many missing filed for 1yearcum, period until: {0}' .format(month)) @active_if(activate_1yearcum_comput) @files(generate_parameters_1yearcum) def std_precip_1yearcum(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw",\ "scale_factor":scale_factor_conv} raster_image_math.do_cumulate(**args) # End of pipeline definition return proc_lists
def create_pipeline(input_products, output_product, logfile=None, nrt_products=True, update_stats=False): proc_lists = None if proc_lists is None: proc_lists = functions.ProcLists() spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_modis_pp') # Set DEFAULTS: all off activate_pp_comput = 0 # PP from Chla, SST, Kd490 and PAR activate_stats_comput = 0 # Stats computation (inter-annual clim, min, max) activate_anomalies_comput = 0 # Anomalies computation (not yet done!!) # switch wrt groups - according to options if nrt_products: activate_pp_comput = 1 # PP from Chla, SST, Kd490 and PAR if update_stats: activate_stats_comput = 1 activate_anomalies_comput = 1 activate_pp_prod_comput = 1 activate_pp_stats_clim_comput = 1 activate_pp_stats_min_comput = 1 activate_pp_stats_max_comput = 1 # --------------------------------------------------------------------- # Create lists # my_date='20160601' my_date = '' es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Parse the arguments and extract the 4 input variables # if len(input_products) != 4: spec_logger.error('Modis PP computation requires 4 inputs. Exit') return 1 found_chla = False found_sst = False found_par = False found_kd490 = False for input_product in input_products: if re.search('.*chla.*', input_product.productcode): found_chla = True chla_prod = input_product.productcode chla_version = input_product.version chla_sprod = input_product.subproductcode chla_mapset = input_product.mapsetcode chla_prod_ident = functions.set_path_filename_no_date( chla_prod, chla_sprod, chla_mapset, chla_version, ext) chla_input_dir = es2_data_dir + \ functions.set_path_sub_directory(chla_prod, chla_sprod, 'Derived', chla_version, chla_mapset) if re.search('.*sst.*', input_product.productcode): found_sst = True sst_prod = input_product.productcode sst_version = input_product.version sst_sprod = input_product.subproductcode sst_mapset = input_product.mapsetcode sst_prod_ident = functions.set_path_filename_no_date( sst_prod, sst_sprod, sst_mapset, sst_version, ext) sst_input_dir = es2_data_dir + \ functions.set_path_sub_directory(sst_prod, sst_sprod, 'Derived', sst_version, sst_mapset) if re.search('.*kd490.*', input_product.productcode): found_kd490 = True kd490_prod = input_product.productcode kd490_version = input_product.version kd490_sprod = input_product.subproductcode kd490_mapset = input_product.mapsetcode kd490_prod_ident = functions.set_path_filename_no_date( kd490_prod, kd490_sprod, kd490_mapset, kd490_version, ext) kd490_input_dir = es2_data_dir + \ functions.set_path_sub_directory(kd490_prod, kd490_sprod, 'Derived', kd490_version, kd490_mapset) if re.search('.*par.*', input_product.productcode): found_par = True par_prod = input_product.productcode par_version = input_product.version par_sprod = input_product.subproductcode par_mapset = input_product.mapsetcode par_prod_ident = functions.set_path_filename_no_date( par_prod, par_sprod, par_mapset, par_version, ext) par_input_dir = es2_data_dir + \ functions.set_path_sub_directory(par_prod, par_sprod, 'Derived', par_version, par_mapset) # Check consistency of inputs if not (found_chla) or not (found_kd490) or not (found_par) or not ( found_sst): spec_logger.error('At least one of 4 expected inputs missing. Exit') return 1 if chla_mapset != sst_mapset or chla_mapset != kd490_mapset or chla_mapset != par_mapset: spec_logger.error('All 4 input mapset must be equals. Exit') return 1 # Read input product nodata chla_prod_info = querydb.get_product_out_info(productcode=chla_prod, subproductcode=chla_sprod, version=chla_version) chla_product_info = functions.list_to_element(chla_prod_info) chla_nodata = chla_product_info.nodata chla_frequency = chla_product_info.frequency_id sst_prod_info = querydb.get_product_out_info(productcode=sst_prod, subproductcode=sst_sprod, version=sst_version) sst_product_info = functions.list_to_element(sst_prod_info) sst_nodata = sst_product_info.nodata kd_prod_info = querydb.get_product_out_info(productcode=kd490_prod, subproductcode=kd490_sprod, version=kd490_version) kd_product_info = functions.list_to_element(kd_prod_info) kd_nodata = kd_product_info.nodata par_prod_info = querydb.get_product_out_info(productcode=par_prod, subproductcode=par_sprod, version=par_version) par_product_info = functions.list_to_element(par_prod_info) par_nodata = par_product_info.nodata # Define input files # if starting_dates is not None: # starting_files = [] # for my_date in starting_dates: # starting_files.append(input_dir+my_date+in_prod_ident) # else: # starting_files=input_dir+"*"+in_prod_ident # Define outputs output_nodata = -32767 old = False # NOTE: the prod/mapset/version are taken from the FIRST OUTPUT passed # subprod defined according to the frequency output_prod = output_product[0].productcode output_version = output_product[0].version output_mapset = output_product[0].mapsetcode if old: # Get the first output -> PP subproduct generated (8daysavg or monavg) output_sprod = output_product[0].subproductcode else: # Define the outputs according to the frequency (method in 'functions' to be created !!) if chla_frequency == 'e1month': frequency_string = 'monthly' output_sprod = 'monavg' output_sprod_clim = '1monclim' output_sprod_min = '1monmin' output_sprod_max = '1monmax' sub_product_group = '1monstat' elif chla_frequency == 'e1modis8day': frequency_string = '8 days' output_sprod = '8daysavg' activate_pp_stats_clim_comput = 1 activate_pp_stats_min_comput = 1 activate_pp_stats_max_comput = 1 sub_product_group = '8daysstat' output_sprod_clim = '8daysclim' output_sprod_min = '8daysmin' output_sprod_max = '8daysmax' else: spec_logger.error('Frequency not recognized: %s. Exit!', chla_frequency) return out_prod_ident = functions.set_path_filename_no_date( output_prod, output_sprod, output_mapset, output_version, ext) output_subdir = functions.set_path_sub_directory(output_prod, output_sprod, 'Derived', output_version, output_mapset) # Fixes ES2-36 def generate_input_files_pp(): # Take kd490 as starting point kd_files = kd490_input_dir + my_date + "*" + kd490_prod_ident input_files = sorted(glob.glob(kd_files)) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) ancillary_chla = chla_input_dir + mydate + chla_prod_ident ancillary_par = par_input_dir + mydate + par_prod_ident ancillary_sst = sst_input_dir + mydate + sst_prod_ident do_comp = True if not os.path.isfile(ancillary_chla): do_comp = False if not os.path.isfile(ancillary_par): do_comp = False if not os.path.isfile(ancillary_sst): do_comp = False if do_comp is True: output_file = es_constants.processing_dir + output_subdir + os.path.sep + mydate + out_prod_ident my_inputs = (input_file, ancillary_chla, ancillary_par, ancillary_sst) yield (my_inputs, output_file) @active_if(activate_pp_comput) @files(generate_input_files_pp) def modis_pp_comp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"chla_file": input_file[1], "sst_file": input_file[3], "kd_file": input_file[0], "par_file": input_file[2], \ "sst_nodata": sst_nodata, "kd_nodata": kd_nodata, "chla_nodata": chla_nodata, \ "par_nodata": par_nodata, "output_file": output_file, "output_nodata": output_nodata, "output_format": 'GTIFF', \ "output_type": None, "options": "compress=lzw"} raster_image_math.do_compute_primary_production(**args) # --------------------------------------------------------------------- # Climatology (inter-annual average) prod = output_prod mapset = output_mapset new_input_subprod = output_sprod version = output_version in_prod_ident = functions.set_path_filename_no_date( prod, new_input_subprod, mapset, version, ext) in_prod_subdir = functions.set_path_sub_directory(prod, new_input_subprod, 'Derived', version, mapset) starting_files = es2_data_dir + in_prod_subdir + "*" + in_prod_ident output_sprod_group = proc_lists.proc_add_subprod_group(sub_product_group) output_sprod = proc_lists.proc_add_subprod( output_sprod_clim, sub_product_group, final=False, descriptive_name='Inter-annual Climatology at ' + frequency_string + ' frequency', description='Inter-annual Climatology at ' + frequency_string + ' frequency', frequency_id=chla_frequency, date_format='MMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_clim = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_clim = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir_clim + "{MMDD[0]}" + out_prod_ident_clim ] # Fixes ES2-304 def generate_input_files_pp_stats(): # MMDD_nonleap_list = ['0101', '0109', '0117', '0125', '0202', '0210', '0218', '0226', '0306', '0314', '0314', # '0330', '0407', '0415', '0423', '0501', '0509', '0517', '0525', '0602', '0610', '0618', # '0626', '0704', '0712', '0720', '0728', '0805', '0813', '0821', '0829', '0906', '0914', # '0922', '0930', '1008', '1016', '1024', '1101', '1109', '1117', '1125', '1203', '1211', # '1219', '1227'] MMDD_nonleap_dict = { '0101': '0101', '0109': '0109', '0117': '0117', '0125': '0125', '0202': '0202', '0210': '0210', '0218': '0218', '0226': '0226', '0306': '0305', '0314': '0313', '0322': '0321', '0330': '0329', '0407': '0406', '0415': '0414', '0423': '0422', '0501': '0430', '0509': '0508', '0517': '0516', '0525': '0524', '0602': '0601', '0610': '0609', '0618': '0617', '0626': '0625', '0704': '0703', '0712': '0711', '0720': '0719', '0728': '0727', '0805': '0804', '0813': '0812', '0821': '0820', '0829': '0828', '0906': '0905', '0914': '0913', '0922': '0921', '0930': '0929', '1008': '1007', '1016': '1015', '1024': '1023', '1101': '1031', '1109': '1108', '1117': '1116', '1125': '1124', '1203': '1202', '1211': '1210', '1219': '1218', '1227': '1226' } # for MMDD_nonleap in MMDD_nonleap_list: for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items(): formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident nonleap_files = sorted(glob.glob(formatter_in_nonleap)) formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident leap_files = sorted(glob.glob(formatter_in_leap)) my_inputs = leap_files + nonleap_files input_files_unique = list(set(my_inputs)) output_file = es_constants.processing_dir + output_subdir_clim + os.path.sep + MMDD_nonleap + out_prod_ident_clim yield (input_files_unique, output_file) if frequency_string != 'monthly': @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_clim_comput) @files(generate_input_files_pp_stats) def std_yearly_clim(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) else: @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_clim_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_yearly_clim(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # # --------------------------------------------------------------------- # # Minimum output_sprod = proc_lists.proc_add_subprod( output_sprod_min, sub_product_group, final=False, descriptive_name='Inter-annual Minimum at ' + frequency_string + ' frequency', description='Inter-annual Minimum at ' + frequency_string + ' frequency', frequency_id=chla_frequency, date_format='MMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_min = functions.set_path_filename_no_date( prod, output_sprod_min, mapset, version, ext) output_subdir_min = functions.set_path_sub_directory( prod, output_sprod_min, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir_min + "{MMDD[0]}" + out_prod_ident_min ] def generate_input_files_pp_stats_min(): # MMDD_nonleap_list = ['0101', '0109', '0117', '0125', '0202', '0210', '0218', '0226', '0306', '0314', '0314', # '0330', '0407', '0415', '0423', '0501', '0509', '0517', '0525', '0602', '0610', '0618', # '0626', '0704', '0712', '0720', '0728', '0805', '0813', '0821', '0829', '0906', '0914', # '0922', '0930', '1008', '1016', '1024', '1101', '1109', '1117', '1125', '1203', '1211', # '1219', '1227'] MMDD_nonleap_dict = { '0101': '0101', '0109': '0109', '0117': '0117', '0125': '0125', '0202': '0202', '0210': '0210', '0218': '0218', '0226': '0226', '0306': '0305', '0314': '0313', '0322': '0321', '0330': '0329', '0407': '0406', '0415': '0414', '0423': '0422', '0501': '0430', '0509': '0508', '0517': '0516', '0525': '0524', '0602': '0601', '0610': '0609', '0618': '0617', '0626': '0625', '0704': '0703', '0712': '0711', '0720': '0719', '0728': '0727', '0805': '0804', '0813': '0812', '0821': '0820', '0829': '0828', '0906': '0905', '0914': '0913', '0922': '0921', '0930': '0929', '1008': '1007', '1016': '1015', '1024': '1023', '1101': '1031', '1109': '1108', '1117': '1116', '1125': '1124', '1203': '1202', '1211': '1210', '1219': '1218', '1227': '1226' } # for MMDD_nonleap in MMDD_nonleap_list: for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items(): formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident nonleap_files = sorted(glob.glob(formatter_in_nonleap)) formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident leap_files = sorted(glob.glob(formatter_in_leap)) my_inputs = leap_files + nonleap_files input_files_unique = list(set(my_inputs)) output_file = es_constants.processing_dir + output_subdir_min + os.path.sep + MMDD_nonleap + out_prod_ident_min yield (input_files_unique, output_file) if frequency_string != 'monthly': @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_min_comput) @files(generate_input_files_pp_stats_min) def std_yearly_min(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) else: @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_min_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_yearly_min(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) # # --------------------------------------------------------------------- # # Monthly Maximum output_sprod = proc_lists.proc_add_subprod( output_sprod_max, sub_product_group, final=False, descriptive_name='Inter-annual Maximum at ' + frequency_string + ' frequency', description='Inter-annual Maximum at ' + frequency_string + ' frequency', frequency_id=chla_frequency, date_format='MMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_max = functions.set_path_filename_no_date( prod, output_sprod_max, mapset, version, ext) output_subdir_max = functions.set_path_sub_directory( prod, output_sprod_max, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir_max + "{MMDD[0]}" + out_prod_ident_max ] def generate_input_files_pp_stats_max(): MMDD_nonleap_dict = { '0101': '0101', '0109': '0109', '0117': '0117', '0125': '0125', '0202': '0202', '0210': '0210', '0218': '0218', '0226': '0226', '0306': '0305', '0314': '0313', '0322': '0321', '0330': '0329', '0407': '0406', '0415': '0414', '0423': '0422', '0501': '0430', '0509': '0508', '0517': '0516', '0525': '0524', '0602': '0601', '0610': '0609', '0618': '0617', '0626': '0625', '0704': '0703', '0712': '0711', '0720': '0719', '0728': '0727', '0805': '0804', '0813': '0812', '0821': '0820', '0829': '0828', '0906': '0905', '0914': '0913', '0922': '0921', '0930': '0929', '1008': '1007', '1016': '1015', '1024': '1023', '1101': '1031', '1109': '1108', '1117': '1116', '1125': '1124', '1203': '1202', '1211': '1210', '1219': '1218', '1227': '1226' } # for MMDD_nonleap in MMDD_nonleap_list: for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items(): formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident nonleap_files = sorted(glob.glob(formatter_in_nonleap)) formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident leap_files = sorted(glob.glob(formatter_in_leap)) my_inputs = leap_files + nonleap_files input_files_unique = list(set(my_inputs)) output_file = es_constants.processing_dir + output_subdir_max + os.path.sep + MMDD_nonleap + out_prod_ident_max yield (input_files_unique, output_file) if frequency_string != 'monthly': @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_max_comput) @files(generate_input_files_pp_stats_max) def std_yearly_max(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args) else: @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_max_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_yearly_max(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args)
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None): my_date = None # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() activate_front_detection = 1 activate_shapefile_conversion = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files (SST) in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir + functions.set_path_sub_directory( prod, starting_sprod, 'Ingest', version, mapset) if my_date: starting_files = input_dir + my_date + "*" + in_prod_ident else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # 1. Define and customize parameters # --------------------------------------------------------------------- # Parameters concluded during the Marine Workshop Oct 2018 parameters = { 'histogramWindowStride': 8, # smaller window detects more fronts 'histogramWindowSize': 32, 'minTheta': 0.76, #'minPopProp': 0.25, 'minPopMeanDifference': 25, # Temperature: 0.45 deg (multiply by 100 !!) 'minSinglePopCohesion': 0.60, 'minImageValue': 1, 'minThreshold': 1 } # Default values are from the routine are used if None is passed parameters = { 'histogramWindowStride': 16, 'histogramWindowSize': 32, 'minTheta': 0.76, 'minPopProp': 0.25, 'minPopMeanDifference': 20, # Temperature: 0.45 deg (multiply by 100 !!) 'minSinglePopCohesion': 0.60, 'minImageValue': 1, 'minThreshold': 1 } if prod == 'modis-sst': parameters = { 'histogramWindowStride': 8, # smaller window detects more fronts 'histogramWindowSize': 32, 'minTheta': 0.76, # 'minPopProp': 0.25, 'minPopMeanDifference': 25, # Temperature: 0.45 deg (multiply by 100 !!) 'minSinglePopCohesion': 0.60, 'minImageValue': 1, 'minThreshold': 1 } if prod == 'pml-modis-sst': parameters = { 'histogramWindowSize': 32, 'histogramWindowStride': 16, 'minTheta': 0.76, 'minPopProp': 0.25, 'minPopMeanDifference': 20, 'minSinglePopCohesion': 0.60, 'minImageValue': 1, 'minThreshold': 1 } if prod == 'slstr-sst': parameters = { 'histogramWindowStride': 8, # smaller window detects more fronts 'histogramWindowSize': 32, 'minTheta': 0.76, # 'minPopProp': 0.25, 'minPopMeanDifference': 25, # Temperature: 0.45 deg (multiply by 100 !!) 'minSinglePopCohesion': 0.60, 'minImageValue': 1, 'minThreshold': 1 } # --------------------------------------------------------------------- # SST Fronts (raster) output_sprod_group = proc_lists.proc_add_subprod_group("fronts") output_sprod = proc_lists.proc_add_subprod( "sst-fronts", "fronts", final=False, descriptive_name='SST Fronts', description='Sea Surface Temperature Fronts', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) prod_ident_fronts = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) subdir_fronts = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMMDD>[0-9]{8})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + subdir_fronts + "{YYYYMMDD[0]}" + prod_ident_fronts ] @active_if(activate_front_detection) @transform(starting_files, formatter(formatter_in), formatter_out) def sst_fronts_detection(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress = lzw", "parameters": parameters } raster_image_math.do_detect_sst_fronts(**args) print('Done with raster') # --------------------------------------------------------------------- # SST Fronts (shapefile) input_subprod_fronts = "sst-fronts" in_prod_ident_fronts = functions.set_path_filename_no_date( prod, input_subprod_fronts, mapset, version, ext) input_dir_fronts = es2_data_dir + functions.set_path_sub_directory( prod, input_subprod_fronts, 'Derived', version, mapset) starting_files_fronts = input_dir_fronts + "*" + in_prod_ident_fronts output_sprod = proc_lists.proc_add_subprod( "sst-fronts-shp", "fronts", final=False, descriptive_name='SST Fronts', description='Sea Surface Temperature Fronts (shape)', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) prod_ident_fronts_shp = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, '.shp') subdir_fronts_shp = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMMDD>[0-9]{8})" + in_prod_ident_fronts formatter_out = [ "{subpath[0][5]}" + os.path.sep + subdir_fronts_shp + "{YYYYMMDD[0]}" + prod_ident_fronts_shp ] @active_if(activate_shapefile_conversion) @transform(starting_files_fronts, formatter(formatter_in), formatter_out) def sst_shapefile_conversion(input_file, output_file): output_file = functions.list_to_element(output_file) # Check if the (final) output file already exists - and delete it if os.path.isfile(output_file): files = glob.glob(output_file.replace('.shp', '.*')) for my_file in files: os.remove(my_file) # Create a temporary output file (see also ES2-33) try: tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='_' + os.path.basename(output_file), dir=es_constants.base_tmp_dir) except: raise NameError('Error in creating tmpdir') # Convert to shapefile tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file) functions.check_output_dir(os.path.dirname(tmp_output_file)) command = es_constants.es2globals[ 'gdal_polygonize'] + ' ' + input_file + ' ' + tmp_output_file + ' -nomask -f "ESRI Shapefile"' p = os.system(command) # Check output dir exists (see ES2-285) functions.check_output_dir(os.path.dirname(output_file)) # Move and remove tmpdir files = glob.glob(tmp_output_file.replace('.shp', '.*')) for my_file in files: os.rename( my_file, os.path.dirname(output_file) + os.path.sep + os.path.basename(my_file)) shutil.rmtree(tmpdir) return proc_lists
def create_pipeline(prod, starting_sprod, native_mapset, target_mapset, version, starting_dates=None, proc_lists=None, day_time=None, logger=None): # Test flag (to save non-projected cumulated products) test_mode = False # Create Logger # logger.fatal('Version 13.06.2017 !!!!!!!!!!!!!!!!!!!!!!!!!!') # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all ON activate_1dcum_comput = 1 activate_10dcum_comput = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files ('mpe' subproduct) in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, native_mapset, version, ext) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, native_mapset) # ---------------------------------------------------------------------------------------------------------------- # 1dcum # Daily cumulate of the 15 min MPE, re-projected on target mapset output_sprod = proc_lists.proc_add_subprod("1dmax", "msg-mpe", final=False, descriptive_name='1d Cumulate', description='Daily Cumulate', frequency_id='e1day', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) output_sprod = '1dcum' out_prod_ident_1dcum = functions.set_path_filename_no_date( prod, output_sprod, target_mapset, version, ext) output_subdir_1dcum = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, target_mapset) # Use a specific function, to skip the current day def generate_parameters_1dcum(): # Look for all input files in input_dir, and sort them if starting_dates is not None: input_files = [] for my_date in starting_dates: input_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident input_files = glob.glob(starting_files) logger.debug("starting_files %s" % input_files) day_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] if mydate_yyyymmdd not in day_list: day_list.append(mydate_yyyymmdd) day_list = sorted(day_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for myday in day_list: # Exclude the current day if myday != today_str: file_list = [] for input_file in input_files: basename = os.path.basename(input_file) # Date is in format YYYYMMDDhhmm mydate_yyyymmdd = functions.get_date_from_path_filename( basename) if day_time is None: # Append files for myday if mydate_yyyymmdd[0:8] == myday: file_list.append(input_file) else: # Append files in time range myday+hhmm |-| (myday+1)+ hhmm if int(mydate_yyyymmdd) >= int(myday) * 10000 + int( day_time) and int(mydate_yyyymmdd) < ( int(myday) + 1) * 10000 + int(day_time): file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_1dcum + os.path.sep + str( (int(myday)) * 10000 + int(day_time)) + out_prod_ident_1dcum file_list = sorted(file_list) # Check here the number of missing files (for optimization) if len(file_list) > 86: yield (file_list, output_file) # @active_if(activate_1dcum_comput) @files(generate_parameters_1dcum) def msg_mpe_1dcum(input_file, output_file): # output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='', dir=es_constants.base_tmp_dir) tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file) # Divide by 10 to pass from 0.01 to 0.1 as scale factor for 1d cum factor = 0.1 args = { "input_file": input_file, "output_file": tmp_output_file, "output_format": 'GTIFF', "options": "compress=lzw", "scale_factor": factor, "input_nodata": -32768 } raster_image_math.do_cumulate(**args) reproject_output(tmp_output_file, native_mapset, target_mapset) # Copy the non-reprojected file for validation, only in test_mode if test_mode: msg_proj_dir = es_constants.processing_dir + functions.set_path_sub_directory( prod, '1dcum', 'Derived', version, native_mapset) functions.check_output_dir(msg_proj_dir) shutil.copy(tmp_output_file, msg_proj_dir + os.path.sep) # Copy the non-reprojected file for validation, only in test_mode shutil.rmtree(tmpdir) # ---------------------------------------------------------------------------------------------------------------- # 10 day Cumulate (mm) output_sprod = proc_lists.proc_add_subprod( "10dcum", "msg-mpe", final=False, descriptive_name='10day Cumulate', description='10day Cumulate in mm', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_10dcum = functions.set_path_filename_no_date( prod, "10dcum", target_mapset, version, ext) output_subdir_10dcum = functions.set_path_sub_directory( prod, "10dcum", 'Derived', version, target_mapset) in_prod_10dcum = '1dcum' in_prod_ident_10dcum = functions.set_path_filename_no_date( prod, in_prod_10dcum, target_mapset, version, ext) input_dir_10dcum = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, in_prod_10dcum, 'Derived', version, target_mapset) starting_files_10dcum = input_dir_10dcum + "*" + in_prod_ident_10dcum # Define input files def generate_parameters_10dcum(): # Look for all input files in input_dir, and sort them input_files = glob.glob(starting_files_10dcum) dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_10dcum + os.path.sep + my_dekad_str + out_prod_ident_10dcum yield (file_list, output_file) @follows(msg_mpe_1dcum) @active_if(activate_10dcum_comput) @files(generate_parameters_10dcum) def msg_mpe_10dcum(input_file, output_file): if len(input_file) > 8: output_file = functions.list_to_element(output_file) # Get the number of days of that dekad basename = os.path.basename(output_file) mydate = functions.get_date_from_path_filename(basename) nbr_days_dekad = functions.day_per_dekad(mydate) factor = 1.0 functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", "scale_factor": factor, "input_nodata": -32768 } raster_image_math.do_cumulate(**args) else: logger.warning( 'More than 2 files missing for output {0}: Skip'.format( os.path.basename(output_file))) return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None): # Definitions start_season = '0901' second_dekad = '0911' end_season = '0421' # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files (10d) in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir + functions.set_path_sub_directory( prod, starting_sprod, 'Ingest', version, mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = glob.glob(input_dir + "*" + in_prod_ident) # --------------------------------------------------------------------- # Define output files (onset) output_sprod = proc_lists.proc_add_subprod( "rain-onset", "none", final=False, descriptive_name='Rain Onset', description='Rainfall Start of the season', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) prod_ident_onset = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) subdir_onset = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) def generate_parameters_onset(): starting_files.sort() for file_t0 in starting_files: # Get current date date_t0 = functions.get_date_from_path_full(file_t0) # Check if we are in the seasonal range [start < current <= end] dekad_t0 = int(date_t0[4:]) dekad_start = int(start_season) dekad_second = int(second_dekad) dekad_end = int(end_season) # Initialize processing to 0 do_proc = 0 in_season = False # Check we are within the season -> do_proc if dekad_start < dekad_end: if dekad_t0 > dekad_start and dekad_t0 <= dekad_end: in_season = True else: if dekad_t0 > dekad_start or dekad_t0 <= dekad_end: in_season = True if in_season and (dekad_t0 == dekad_second): do_proc = 1 if in_season and (dekad_t0 != dekad_second): do_proc = 2 if do_proc: output_file = es2_data_dir + subdir_onset + str( date_t0) + prod_ident_onset # Get files at t-1 and t-2 (if they exist) previous_files = functions.previous_files(file_t0) # Check if at least one previous file has been identified if do_proc == 1: # Check at least 1 previous file exist if len(previous_files) < 1: print('Error Case 1: no any previous file') else: # Pass two arguments (t0 and t0-1) input_files = [file_t0, previous_files[0]] yield (input_files, output_file) elif do_proc == 2: error = False # Check 2 previous files exist if len(previous_files) < 2: print('Error Case 2: a previous file is missing') error = True # Look for previous output previous_outputs = functions.previous_files(output_file) if len(previous_outputs) < 1: print('Error Case 2: the previous output is missing') error = True # Pass four arguments (t0, t0-1, t0-2 and output-1) if not error: previous_output = previous_outputs[0] if os.path.isfile(previous_output): input_files = [ file_t0, previous_files[0], previous_files[1], previous_output ] yield (input_files, output_file) @active_if(activate_onset_comput) @files(generate_parameters_onset) def rain_onset(input_files, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) # Need to define the current_dekad number, wrt begin of season current_date = functions.get_date_from_path_full(output_file) current_dekad = current_date[4:] dekad_number = functions.dekad_nbr_in_season(current_dekad, start_season) # Call the function args = { "input_file": input_files, "output_file": output_file, 'input_nodata': None, 'output_nodata': None, 'output_type': 'Int16', "output_format": 'GTIFF', "options": "compress = lzw", 'current_dekad': dekad_number } raster_image_math.do_rain_onset(**args)
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, update_stats=False, nrt_products=True): # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all off activate_10dstats_comput = 0 # 10d stats activate_10danomalies_comput = 0 # 10d anomalies # switch wrt groups - according to options if nrt_products: activate_10dcount_comput = 1 # 10d anomalies activate_10danomalies_comput = 1 # monthly anomalies if update_stats: activate_10dstats_comput = 1 # 10d stats # switch wrt single products: not to be changed !! activate_10dcount_comput = 1 # 10d count activate_10dcountavg_comput = 1 activate_10dcountmin_comput = 1 activate_10dcountmax_comput = 1 activate_10ddiff_comput = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) #logger.debug('Base data directory is: %s' % es2_data_dir) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # 10dcount output_sprod_group = proc_lists.proc_add_subprod_group("10dcount") output_sprod = proc_lists.proc_add_subprod( "10dcount", "10dcount", final=False, descriptive_name='10d Count', description='Fire Count for dekad', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident_10dcount = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_10dcount = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_10dcount(): # Look for all input files in input_dir, and sort them input_files = glob.glob(starting_files) dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_10dcount + os.path.sep + my_dekad_str + out_prod_ident_10dcount yield (file_list, output_file) @active_if(activate_10dcount_comput) @files(generate_parameters_10dcount) def std_precip_10dcount(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_cumulate(**args) # --------------------------------------------------------------------- # 10dcountavg starting_files_10dcount = es_constants.processing_dir + output_subdir_10dcount + "*" + out_prod_ident_10dcount output_sprod_group = proc_lists.proc_add_subprod_group("10dstats") output_sprod = proc_lists.proc_add_subprod( "10dcountavg", "10dstats", final=False, descriptive_name='10d Fire Average', description='Average fire for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dcountavg_comput) @collate(starting_files_10dcount, formatter(formatter_in), formatter_out) def std_precip_10davg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Float32', 'input_nodata': -32767 } raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # 10dcountmin output_sprod = proc_lists.proc_add_subprod( "10dcountmin", "10dstats", final=False, descriptive_name='10d Fire Minimum', description='Minimum Fire for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dcountmin_comput) @collate(starting_files_10dcount, formatter(formatter_in), formatter_out) def std_precip_10dmin(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # 10dcountmax output_sprod = proc_lists.proc_add_subprod( "10dcountmax", "10dstats", final=False, descriptive_name='10d Maximum', description='Maximum rainfall for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dcountmax_comput) @collate(starting_files_10dcount, formatter(formatter_in), formatter_out) def std_precip_10dmax(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args) # # --------------------------------------------------------------------- # # 10dDiff # output_sprod_group=proc_lists.proc_add_subprod_group("10danomalies") # output_sprod=proc_lists.proc_add_subprod("10dcountdiff", "10danomalies", final=False, # descriptive_name='10d Absolute Difference', # description='10d Absolute Difference vs. LTA', # frequency_id='e1dekad', # date_format='YYYYMMDD', # masked=False, # timeseries_role='10d', # active_default=True) # out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) # output_subdir = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset) # # # Starting files + avg # formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident # formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident # # ancillary_sprod = "10davg" # ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, version, ext) # ancillary_subdir = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived',version, mapset) # ancillary_input="{subpath[0][5]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident # # @follows(std_precip_10davg) # @active_if(activate_10danomalies_comput, activate_10ddiff_comput) # @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) # def std_precip_10ddiff(input_file, output_file): # # output_file = functions.list_to_element(output_file) # functions.check_output_dir(os.path.dirname(output_file)) # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"} # raster_image_math.do_oper_subtraction(**args) return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None): my_date=None # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() activate_gradient_computation = 1 #activate_shapefile_conversion = 1 sds_meta = metadata.SdsMetadata() es2_data_dir = es_constants.es2globals['processing_dir']+os.path.sep # --------------------------------------------------------------------- # Define input files (chla) in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir+ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) if my_date: starting_files = input_dir+my_date+"*"+in_prod_ident else: starting_files = input_dir+"*"+in_prod_ident # --------------------------------------------------------------------- # 1. Define and customize parameters # --------------------------------------------------------------------- # # # Default values are from the routine are used if None is passed # parameters = {'histogramWindowStride': 16, # 'histogramWindowSize': 32, # 'minTheta': 0.76, # 'minPopProp': 0.25, # 'minPopMeanDifference': 20, # Temperature: 0.45 deg (multiply by 100 !!) # 'minSinglePopCohesion': 0.60, # 'minImageValue': 1, # 'minThreshold': 1} # if prod == 'modis-sst': # parameters = { 'histogramWindowStride': None, # 'minTheta' : None, # 'minPopProp' : None, # 'minPopMeanDifference' : None, # 'minSinglePopCohesion' : None, # 'histogramWindowSize' : None, # 'minImageValue' : None, # 'minThreshold' : None } # # if prod == 'pml-modis-sst': # parameters = { 'histogramWindowSize' : 32, # 'histogramWindowStride': 16, # 'minTheta' : 0.76, # 'minPopProp' : 0.25, # 'minPopMeanDifference' : 20, # 'minSinglePopCohesion' : 0.60, # 'minImageValue' : 1, # 'minThreshold' : 1 } # --------------------------------------------------------------------- # Chal Gradient (raster) output_sprod_group=proc_lists.proc_add_subprod_group("gradient") output_sprod=proc_lists.proc_add_subprod("gradient", "gradient", final=False, descriptive_name='Gradient', description='Gradient', frequency_id='', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) prod_ident_gradient = functions.set_path_filename_no_date(prod, output_sprod,mapset, version, ext) subdir_gradient = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMMDD>[0-9]{8})"+in_prod_ident formatter_out = ["{subpath[0][5]}"+os.path.sep+subdir_gradient+"{YYYYMMDD[0]}"+prod_ident_gradient] @active_if(activate_gradient_computation) @transform(starting_files, formatter(formatter_in),formatter_out) def gradient_computation(input_file, output_file): no_data = int(sds_meta.get_nodata_value(input_file)) output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "nodata": no_data, "output_format": 'GTIFF', "options": "compress = lzw"} raster_image_math.do_compute_chla_gradient(**args) print ('Done with raster') return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, nrt_products=True, logger=None): # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all ON activate_3davg_comput = 1 activate_1monavg_comput = 1 sds_meta = metadata.SdsMetadata() es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files (chl) in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir + functions.set_path_sub_directory( prod, starting_sprod, 'Ingest', version, mapset) starting_files = input_dir + "*" + in_prod_ident # ---------------------------------------------------------------------------------------------------------------- # 1 . 3davg # 3 Day average of the 1 day Chl, re-projected on target mapset output_sprod = proc_lists.proc_add_subprod("3dayavg", prod, final=False, descriptive_name='3day Avg', description='3 day Average', frequency_id='e1day', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) prod_ident_3davg = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) subdir_3davg = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Use a specific function, to skip the current day def generate_parameters_3davg(): # Look for all input files in input_dir, and sort them if starting_dates is not None: input_files = [] for my_date in starting_dates: input_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident input_files = glob.glob(starting_files) logger.debug("starting_files %s" % input_files) day_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] if mydate_yyyymmdd not in day_list: day_list.append(mydate_yyyymmdd) day_list = sorted(day_list) # Compute the 'julian' dakad for the current day today = datetime.today() yesterday = today - timedelta(1) today_str = today.strftime('%Y%m%d') yesterday_str = yesterday.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for myday in day_list: # Exclude the current day and yesterday #if myday != today_str or myday != yesterday_str: #some_list = ['abc-123', 'def-456', 'ghi-789', 'abc-456'] input_file = [s for s in input_files if myday in s] file_list = [] #for input_file in input_files: #for i, input_file in enumerate(input_files, 1): basename = os.path.basename(input_file[0]) # Date is in format YYYYMMDD mydate_yyyymmdd = functions.get_date_from_path_filename(basename) #if mydate_yyyymmdd != day_list[i]: yyyy = int(mydate_yyyymmdd[0:4]) mm = int(mydate_yyyymmdd[4:6]) dd = int(mydate_yyyymmdd[6:8]) day2 = datetime(yyyy, mm, dd) + timedelta(1) day2_filepath = input_dir + day2.strftime('%Y%m%d') + in_prod_ident if not functions.is_file_exists_in_path(day2_filepath): continue day3 = datetime(yyyy, mm, dd) + timedelta(2) day3_filepath = input_dir + day3.strftime('%Y%m%d') + in_prod_ident if not functions.is_file_exists_in_path(day3_filepath): continue file_list.append(input_file[0]) file_list.append(day2_filepath) file_list.append(day3_filepath) output_file = es_constants.processing_dir + subdir_3davg + os.path.sep + mydate_yyyymmdd + prod_ident_3davg file_list = sorted(file_list) # Check here the number of missing files (for optimization) if len(file_list) == 3: yield (file_list, output_file) @active_if(activate_3davg_comput) @files(generate_parameters_3davg) def compute_3dayavg(input_file, output_file): no_data = int(sds_meta.get_nodata_value(input_file[0])) output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "", "input_nodata": no_data, "output_nodata": no_data } raster_image_math.do_avg_image(**args) return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, logger=None): my_date = None # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() sds_meta = metadata.SdsMetadata() es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir + functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) if my_date is not None: starting_files = input_dir + my_date + "*" + in_prod_ident else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # Monthly Average for a given month output_sprod_group = proc_lists.proc_add_subprod_group("monstats") output_sprod = proc_lists.proc_add_subprod("monavg", "monstats", final=False, descriptive_name='Monthly average', description='Monthly average', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" + '01' + out_prod_ident @collate(starting_files, formatter(formatter_in), formatter_out) def compute_monavg(input_file, output_file): output_file = functions.list_to_element(output_file) out_filename = os.path.basename(output_file) functions.check_output_dir(os.path.dirname(output_file)) no_data = int(sds_meta.get_nodata_value(input_file[0])) str_date = out_filename[0:6] today = datetime.date.today() today_yyyymm = today.strftime('%Y%m') # expected_ndays=functions.get_number_days_month(str_date) # current_ndays=len(input_file) if str_date == today_yyyymm: logger.info('Do not perform computation for current month {0}. Skip'.format(str_date)) else: args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "input_nodata": no_data, "options": "compress=lzw"} raster_image_math.do_avg_image(**args) return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, logger=None): my_date = None # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # 8d cumul activate_8dayavg_comput = 1 # monthly activate_monavg_comput = 1 activate_monclim_comput = 0 activate_monanom_comput = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir + functions.set_path_sub_directory( prod, starting_sprod, 'Ingest', version, mapset) if my_date is not None: starting_files = input_dir + my_date + "*" + in_prod_ident else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # 8-days Average output_sprod_group_8day = proc_lists.proc_add_subprod_group("8days") output_sprod_8day = proc_lists.proc_add_subprod( "8daysavg", "8days", final=False, descriptive_name='8Day average', description='8Day average', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_8day = functions.set_path_filename_no_date( prod, output_sprod_8day, mapset, version, ext) output_subdir_8day = functions.set_path_sub_directory( prod, output_sprod_8day, 'Derived', version, mapset) def generate_parameters_8days(): years_periods_list = [] # Look for all input files in input_dir input_files = glob.glob(starting_files) # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydate_year = str(mydate)[0:4] period_nbr = functions.conv_date_2_8days(mydate_yyyymmdd) if (mydate_year, period_nbr) not in years_periods_list: years_periods_list.append((mydate_year, period_nbr)) periods_sorted = sorted(years_periods_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') year_now = today.strftime('%Y') period_now = functions.conv_date_2_8days(today_str) # Generate the list of 30 min time in a day for year, period in periods_sorted: # Exclude the current dekad if period != period_now or year != year_now: file_list = [] jdoy_period = "{0:03d}".format(1 + 8 * (int(period) - 1)) mmdd_period = functions.conv_date_yyyydoy_2_yyyymmdd( year + jdoy_period) output_file = es_constants.processing_dir + output_subdir_8day + os.path.sep + mmdd_period + out_prod_ident_8day for myfile in input_files: basename = os.path.basename(myfile) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydate_year = mydate_yyyymmdd[0:4] period_nbr = functions.conv_date_2_8days( mydate_yyyymmdd[0:8]) if period_nbr == period and mydate_year == year: file_list.append(myfile) # Special case of last period of the year: add few days of next year if period == 46: next_year = "{0:04d}".format(int(year) + 1) if calendar.isleap(int(year)): add_days = ('0101', '0102', '0103') else: add_days = ('0101', '0102', '0103', '0104') for day in add_days: date = next_year + day matches = [ x for x in input_files if fnmatch.fnmatch(x, '*{0}*'.format(date)) ] # Fixes ES2-35 (see YouTrack) if len(matches) > 0: file_list.append(matches[0]) yield (sorted(file_list), output_file) @active_if(activate_8dayavg_comput) @files(generate_parameters_8days) def modis_8dayavg(input_file, output_file): output_file = functions.list_to_element(output_file) out_filename = os.path.basename(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Average for a given month output_sprod_group = proc_lists.proc_add_subprod_group("monstats") output_sprod = proc_lists.proc_add_subprod( "monavg", "monstats", final=False, descriptive_name='Monthly average', description='Monthly average', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" + '01' + out_prod_ident @active_if(activate_monavg_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def modis_monavg(input_file, output_file): output_file = functions.list_to_element(output_file) out_filename = os.path.basename(output_file) functions.check_output_dir(os.path.dirname(output_file)) str_date = out_filename[0:6] today = datetime.date.today() today_yyyymm = today.strftime('%Y%m') #expected_ndays=functions.get_number_days_month(str_date) #current_ndays=len(input_file) if str_date == today_yyyymm: logger.info( 'Do not perform computation for current month {0}. Skip'. format(str_date)) else: args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Climatology for all years output_sprod = proc_lists.proc_add_subprod( "monclim", "monstats", final=False, descriptive_name='Monthly climatology', description='Monthly climatology', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) new_input_subprod = 'monavg' new_in_prod_ident = functions.set_path_filename_no_date( prod, new_input_subprod, mapset, version, ext) new_input_dir = es2_data_dir + functions.set_path_sub_directory( prod, new_input_subprod, 'Derived', version, mapset) new_starting_files = new_input_dir + "*" + new_in_prod_ident out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + new_in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident @active_if(activate_monclim_comput) @collate(new_starting_files, formatter(formatter_in), formatter_out) def modis_chla_monclim(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw"} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Monthly Anomaly for a given monthly output_sprod = proc_lists.proc_add_subprod( "monanom", "monstats", final=False, descriptive_name='Monthly anomaly', description='Monthly anomaly', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + new_in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "monclim" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @active_if(activate_monanom_comput) @transform(new_starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) def modis_chla_mondiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_oper_subtraction(**args) return proc_lists
def create_pipeline(input_product, logfile=None, nrt_products=True, update_stats=False): proc_lists = None if proc_lists is None: proc_lists = functions.ProcLists() spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_modis_pp') # Set DEFAULTS: all off activate_pp_comput = 0 # PP from Chla, SST, Kd490 and PAR activate_stats_comput = 0 # Stats computation (inter-annual clim, min, max) activate_anomalies_comput = 0 # Anomalies computation (not yet done!!) # switch wrt groups - according to options if nrt_products: activate_pp_comput = 1 # PP from Chla, SST, Kd490 and PAR if update_stats: activate_stats_comput = 1 activate_anomalies_comput = 1 activate_pp_prod_comput = 1 activate_pp_stats_clim_comput = 0 activate_pp_stats_min_comput = 0 activate_pp_stats_max_comput = 0 # --------------------------------------------------------------------- # Create lists # my_date='20160601' my_date = '' es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Parse the arguments and extract the 4 input variables # if len(input_products) <> 4: spec_logger.error('Modis PP computation requires 4 inputs. Exit') return 1 found_chla = False found_sst = False found_par = False found_kd490 = False for input_product in input_products: if re.search('.*chla.*', input_product.productcode): found_chla = True chla_prod = input_product.productcode chla_version = input_product.version chla_sprod = input_product.subproductcode chla_mapset = input_product.mapsetcode chla_prod_ident = functions.set_path_filename_no_date( chla_prod, chla_sprod, chla_mapset, chla_version, ext) chla_input_dir = es2_data_dir + \ functions.set_path_sub_directory(chla_prod, chla_sprod, 'Derived', chla_version, chla_mapset) if re.search('.*sst.*', input_product.productcode): found_sst = True sst_prod = input_product.productcode sst_version = input_product.version sst_sprod = input_product.subproductcode sst_mapset = input_product.mapsetcode sst_prod_ident = functions.set_path_filename_no_date( sst_prod, sst_sprod, sst_mapset, sst_version, ext) sst_input_dir = es2_data_dir + \ functions.set_path_sub_directory(sst_prod, sst_sprod, 'Derived', sst_version, sst_mapset) if re.search('.*kd490.*', input_product.productcode): found_kd490 = True kd490_prod = input_product.productcode kd490_version = input_product.version kd490_sprod = input_product.subproductcode kd490_mapset = input_product.mapsetcode kd490_prod_ident = functions.set_path_filename_no_date( kd490_prod, kd490_sprod, kd490_mapset, kd490_version, ext) kd490_input_dir = es2_data_dir + \ functions.set_path_sub_directory(kd490_prod, kd490_sprod, 'Derived', kd490_version, kd490_mapset) if re.search('.*par.*', input_product.productcode): found_par = True par_prod = input_product.productcode par_version = input_product.version par_sprod = input_product.subproductcode par_mapset = input_product.mapsetcode par_prod_ident = functions.set_path_filename_no_date( par_prod, par_sprod, par_mapset, par_version, ext) par_input_dir = es2_data_dir + \ functions.set_path_sub_directory(par_prod, par_sprod, 'Derived', par_version, par_mapset) # Check consistency of inputs if not (found_chla) or not (found_kd490) or not (found_par) or not ( found_sst): spec_logger.error('At least one of 4 expected inputs missing. Exit') return 1 if chla_mapset <> sst_mapset or chla_mapset <> kd490_mapset or chla_mapset <> par_mapset: spec_logger.error('All 4 input mapset must be equals. Exit') return 1 # Read input product nodata chla_prod_info = querydb.get_product_out_info(productcode=chla_prod, subproductcode=chla_sprod, version=chla_version) chla_product_info = functions.list_to_element(chla_prod_info) chla_nodata = chla_product_info.nodata sst_prod_info = querydb.get_product_out_info(productcode=sst_prod, subproductcode=sst_sprod, version=sst_version) sst_product_info = functions.list_to_element(sst_prod_info) sst_nodata = sst_product_info.nodata kd_prod_info = querydb.get_product_out_info(productcode=kd490_prod, subproductcode=kd490_sprod, version=kd490_version) kd_product_info = functions.list_to_element(kd_prod_info) kd_nodata = kd_product_info.nodata par_prod_info = querydb.get_product_out_info(productcode=par_prod, subproductcode=par_sprod, version=par_version) par_product_info = functions.list_to_element(par_prod_info) par_nodata = par_product_info.nodata # Define input files # if starting_dates is not None: # starting_files = [] # for my_date in starting_dates: # starting_files.append(input_dir+my_date+in_prod_ident) # else: # starting_files=input_dir+"*"+in_prod_ident # Define outputs output_nodata = -32767 # Get the first output -> PP subproduct generated (8daysavg or monavg) output_prod = output_product[0].productcode output_sprod = output_product[0].subproductcode output_version = output_product[0].version output_mapset = output_product[0].mapsetcode out_prod_ident = functions.set_path_filename_no_date( output_prod, output_sprod, output_mapset, output_version, ext) output_subdir = functions.set_path_sub_directory(output_prod, output_sprod, 'Derived', output_version, output_mapset) # Fixes ES2-36 def generate_input_files_pp(): # Take kd490 as starting point kd_files = kd490_input_dir + my_date + "*" + kd490_prod_ident input_files = sorted(glob.glob(kd_files)) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) ancillary_chla = chla_input_dir + mydate + chla_prod_ident ancillary_par = par_input_dir + mydate + par_prod_ident ancillary_sst = sst_input_dir + mydate + sst_prod_ident do_comp = True if not os.path.isfile(ancillary_chla): do_comp = False if not os.path.isfile(ancillary_par): do_comp = False if not os.path.isfile(ancillary_sst): do_comp = False if do_comp is True: output_file = es_constants.processing_dir + output_subdir + os.path.sep + mydate + out_prod_ident my_inputs = (input_file, ancillary_chla, ancillary_par, ancillary_sst) yield (my_inputs, output_file) @active_if(activate_pp_comput) @files(generate_input_files_pp) def modis_pp_comp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"chla_file": input_file[1], "sst_file": input_file[3], "kd_file": input_file[0], "par_file": input_file[2], \ "sst_nodata": sst_nodata, "kd_nodata": kd_nodata, "chla_nodata": chla_nodata, \ "par_nodata": par_nodata, "output_file": output_file, "output_nodata": output_nodata, "output_format": 'GTIFF', \ "output_type": None, "options": "compress=lzw"} raster_image_math.do_compute_primary_production(**args)