def test_get_list_dates_for_dataset(self): productcode = 'lsasaf-et' productversion = 'undefined' subproductcode = '10d30min' input_file = self.test_procfunc_dir+os.path.sep+productcode+os.path.sep+subproductcode+os.path.sep+'202004010000_lsasaf-et_10d30min_MSG-satellite-3km_undefined.tif' start_date = 202004010000 end_date = 202004010120 proc_functions.get_list_dates_for_dataset(productcode, subproductcode, productversion,start_date, end_date) self.assertEqual(1, 1)
def my_proc_std_modis_firms(start_date=None, end_date=None, pipe_run=0, pipe_print=3, start_date_stats=None, end_date_stats=None, touch_files_only=False): # Create the list of dates -> returns empty if start==end==None if start_date is not None and end_date is not None: starting_dates = proc_functions.get_list_dates_for_dataset( 'modis-firms', '1day', 'v6.0', start_date=start_date, end_date=end_date) else: starting_dates = None if start_date_stats is not None and end_date_stats is not None: starting_dates_stats = proc_functions.get_list_dates_for_dataset( 'modis-firms', '10dcount', 'v6.0', start_date=start_date_stats, end_date=end_date_stats) else: starting_dates_stats = None target_mapset = 'SPOTV-Africa-1km' touch_files_only = False args = { 'pipeline_run_level': pipe_run, 'pipeline_printout_level': pipe_print, 'pipeline_printout_graph_level': 0, 'prod': 'modis-firms', 'starting_sprod': '1day', 'starting_dates': starting_dates, 'starting_dates_stats': starting_dates_stats, 'mapset': target_mapset, 'version': 'v6.0', 'logfile': 'log-modis-firms.log', 'update_stats': True, 'nrt_products': True, 'touch_files_only': touch_files_only } res_queue = None proc_lists = processing_std_modis_firms(res_queue, **args) print(proc_lists)
def my_proc_std_spi_monthly(pipe_run=0, pipe_print=3, start_date=None, end_date=None, touch_files_only=False): start_date = '19830301' end_date = '19830310' starting_dates = proc_functions.get_list_dates_for_dataset( 'arc2-rain', '1day', '2.0', start_date=start_date, end_date=end_date) starting_dates = None mapset = 'ARC2-Africa-11km' # # args = {'pipeline_run_level':pipe_run, \ 'pipeline_printout_level':pipe_print, \ 'pipeline_printout_graph_level': 0, \ 'prod': 'arc2-rain',\ 'starting_sprod':'1mon',\ 'starting_dates': starting_dates,\ 'mapset': mapset,\ 'version':'2.0', 'logfile':'log-arc2-rain.log'} res_queue = None proc_lists = processing_std_spi_monthly(res_queue, **args) print(proc_lists)
def my_proc_std_rain_onset(pipe_run=0, pipe_print=3, start_date=None, end_date=None, touch_files_only=False): # --------------------------------------------------------------------- # onset computation # --------------------------------------------------------------------- start_date = '20160901' end_date = '20161011' starting_dates = proc_functions.get_list_dates_for_dataset( 'fewsnet-rfe', '10d', '2.0', start_date=start_date, end_date=end_date) args = {'pipeline_run_level':pipe_run, \ 'pipeline_printout_level':pipe_print, \ 'pipeline_printout_graph_level': 0, \ 'prod': 'fewsnet-rfe',\ 'starting_sprod':'10d',\ 'mapset': 'FEWSNET-Africa-8km',\ 'version':'2.0', 'logfile':'rain-onset', 'starting_dates':starting_dates } res_queue = None processing_std_rain_onset(res_queue, **args)
def my_proc_std_lsasaf_et(pipe_run=3, pipe_print=0, start_date=None, end_date=None, touch_files_only=False): # # Create the list of dates -> returns empty if start==end==None if (start_date) or (end_date): starting_dates = proc_functions.get_list_dates_for_dataset( 'lsasaf-et', 'et', 'undefined', start_date=start_date, end_date=end_date) else: starting_dates = None native_mapset = 'MSG-satellite-3km' target_mapset = 'SPOTV-Africa-1km' args = {'pipeline_run_level':pipe_run, \ 'pipeline_printout_level':pipe_print, \ 'pipeline_printout_graph_level': 0, \ 'prod': 'lsasaf-et',\ 'starting_sprod':'et',\ 'starting_dates': starting_dates,\ 'native_mapset': native_mapset,\ 'mapset': target_mapset,\ 'version':'undefined', 'logfile':'log-lsasaf-et.log'} res_queue = None proc_lists = processing_std_lsasaf_et(res_queue, **args) print(proc_lists)
def my_proc_fewsnet_rfe(pipe_run=0, pipe_print=3, start_date=None, end_date=None, touch_files_only=False): # Create the list of dates -> returns empty if start==end==None if start_date is not None and end_date is not None: starting_dates = proc_functions.get_list_dates_for_dataset( 'fewsnet-rfe', '10d', '2.0', start_date=start_date, end_date=end_date) else: starting_dates = None args = {'pipeline_run_level':pipe_run, \ 'pipeline_printout_level':pipe_print, \ 'pipeline_printout_graph_level': 0, \ 'prod': 'fewsnet-rfe',\ 'starting_sprod':'10d',\ 'starting_dates': starting_dates,\ 'mapset': 'FEWSNET-Africa-8km',\ 'version':'2.0', 'logfile':'log-fewsnet.log'} res_queue = None proc_lists = processing_std_precip_stats_only(res_queue, **args) print(proc_lists)
def my_proc_std_ndvi(pipe_run=0, pipe_print=3, touch_files_only=False): #(pipe_run=0, pipe_print=3, start_date=None, end_date=None, touch_files_only=False): productcode = 'vgt-ndvi' subproductcode = 'ndv' version = 'sv2-pv2.2' start_date = '20180101' end_date = None list_dates = proc_functions.get_list_dates_for_dataset( productcode, subproductcode, version, start_date=start_date, end_date=end_date) args = {'pipeline_run_level':pipe_run, \ 'pipeline_printout_level':pipe_print, \ 'pipeline_printout_graph_level': 0, \ 'prod': productcode,\ 'starting_sprod':subproductcode,\ 'mapset': 'SPOTV-Africa-1km',\ 'version': version, 'starting_dates': list_dates, 'logfile':'test_processing_ndvi', 'touch_files_only':touch_files_only} #res_queue = Queue() res_queue = None proc_lists = processing_std_ndvi_prods_only(res_queue, **args)
def test_subprocess_vgt_lai(pipe_run=4, pipe_print=0, touch_files_only=False): # # Create the list of dates -> returns empty if start==end==None start_date = '19990101' end_date = '20181221' if start_date is not None and end_date is not None: starting_dates = proc_functions.get_list_dates_for_dataset( 'vgt-lai', 'lai', 'V2.0', start_date=start_date, end_date=end_date) else: starting_dates = None args = {'pipeline_run_level':pipe_run, \ 'pipeline_printout_level':pipe_print, \ 'pipeline_printout_graph_level': 0, \ 'prod': 'vgt-lai',\ 'starting_sprod':'lai',\ 'mapset': 'SPOTV-Africa-1km',\ 'version':'V2.0', 'logfile':'vgt-lai', 'starting_dates':starting_dates, 'upsert_db' : False, 'touch_only':touch_files_only } res_queue = None processing_std_vgt_stats_only(res_queue, **args)
def my_proc_msg_mpe(start_date=None, end_date=None, pipe_run=0, pipe_print=3, start_date_stats=None, end_date_stats=None, touch_files_only=False): # Create the list of dates -> returns empty if start==end==None if start_date is not None and end_date is not None: starting_dates = proc_functions.get_list_dates_for_dataset( 'msg-mpe', '10dcum', 'undefined', start_date=start_date, end_date=end_date) else: starting_dates = None args = {'pipeline_run_level':pipe_run, \ 'pipeline_printout_level':pipe_print, \ 'pipeline_printout_graph_level': 0, \ 'prod': 'msg-mpe',\ 'starting_sprod':'10dcum',\ 'starting_dates': starting_dates,\ 'mapset': 'SPOTV-Africa-1km',\ 'version':'undefined', 'logfile':'ruffus-chirps', 'touch_only':touch_files_only} res_queue = None proc_lists = processing_std_msg_mpe(res_queue, **args) print(proc_lists)
def my_proc_olci_wrr(start_date=None, end_date=None, pipe_run=0, pipe_print=3, start_date_stats=None, end_date_stats=None, touch_files_only=False): # Create the list of dates -> returns empty if start==end==None if start_date is not None and end_date is not None: #starting_dates = proc_functions.get_list_dates_for_dataset('olci-wrr', 'chl-nn', 'V02.0', start_date=start_date, end_date=end_date) starting_dates = proc_functions.get_list_dates_for_dataset( 'olci-wrr', 'chl-oc4me', 'V02.0', start_date=start_date, end_date=end_date) else: starting_dates = None args = {'pipeline_run_level':pipe_run, \ 'pipeline_printout_level':pipe_print, \ 'pipeline_printout_graph_level': 0, \ 'prod': 'olci-wrr',\ 'starting_sprod':'chl-oc4me',\ 'starting_dates': starting_dates,\ 'mapset': 'SPOTV-Africa-1km',\ 'version':'V02.0', 'logfile':'ruffus-chirps'} res_queue = None proc_lists = processing_std_olci_wrr(res_queue, **args) print(proc_lists)
def my_proc_vgt_dmp(pipe_run=0, pipe_print=3, start_date=None, end_date=None, touch_files_only=False): # Create the list of dates -> returns empty if start==end==None if start_date is not None and end_date is not None: starting_dates = proc_functions.get_list_dates_for_dataset( 'vgt-dmp', 'dmp', 'V2.0', start_date=start_date, end_date=end_date) else: starting_dates = None args = {'pipeline_run_level':pipe_run, \ 'pipeline_printout_level':pipe_print, \ 'pipeline_printout_graph_level': 0, \ 'prod': 'vgt-dmp',\ 'starting_sprod':'dmp',\ 'starting_dates': starting_dates,\ 'mapset': 'SPOTV-Africa-1km',\ 'version':'V2.0', 'logfile':'ruffus-chirps', 'touch_only':touch_files_only} request_queue = Queue() proc_lists = processing_std_dmp_all(request_queue, **args)
def test_subprocess_vgt_fapar(pipe_run=4, pipe_print=0, touch_files_only=False): start_date = None end_date = None if start_date is not None and end_date is not None: starting_dates = proc_functions.get_list_dates_for_dataset( 'vgt-fapar', 'fapar', 'V2.0', start_date=start_date, end_date=end_date) else: starting_dates = None args = {'pipeline_run_level':pipe_run, \ 'pipeline_printout_level':pipe_print, \ 'pipeline_printout_graph_level': 0, \ 'prod': 'vgt-fapar',\ 'starting_sprod':'fapar',\ 'mapset': 'SPOTV-Africa-1km',\ 'version':'V2.0', 'starting_dates': starting_dates, 'logfile':'vgt-fapar', 'upsert_db' : False, 'touch_only':touch_files_only } res_queue = None processing_std_vgt_prods_only(res_queue, **args)
def my_proc_arc2rain_dekad(pipe_run=0, pipe_print=3, start_date=None, end_date=None, upsert_db=False, touch_files_only=False): # Create the list of dates -> returns empty if start==end==None if start_date is not None and end_date is not None: starting_dates = proc_functions.get_list_dates_for_dataset( 'chirps-dekad', '10d', '2.0', start_date=start_date, end_date=end_date) else: starting_dates = None args = {'pipeline_run_level':pipe_run, \ 'pipeline_printout_level':pipe_print, \ 'pipeline_printout_graph_level': 0, \ 'prod': 'arc2-rain',\ 'starting_sprod':'10d',\ 'starting_dates': starting_dates,\ 'mapset': 'CHIRP-Africa-5km',\ 'version':'2.0', 'logfile':'ruffus-chirps', 'upsert_db': upsert_db, 'touch_only':touch_files_only} request_queue = Queue() proc_lists = processing_std_precip_stats_only(request_queue, **args)
def processing_merge(pipeline_run_level=0, pipeline_printout_level=0, input_products='', output_product='', mapset=''): # Dummy return arguments proc_lists = functions.ProcLists() list_subprods = proc_lists.list_subprods list_subprod_groups = proc_lists.list_subprod_groups es2_data_dir = es_constants.processing_dir+os.path.sep # Do some checks on the integrity of the inputs # Manage output_product data out_product_code = output_product[0].productcode out_sub_product_code = output_product[0].subproductcode out_version = output_product[0].version out_mapset = output_product[0].mapsetcode out_subdir = functions.set_path_sub_directory(out_product_code, out_sub_product_code,'Ingest', out_version, out_mapset) out_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, out_version, ext) out_dir = es2_data_dir + out_subdir # Check the output product directory functions.check_output_dir(out_dir) # Loop over the input products: for input in input_products: # Extract info from input product product_code = input.productcode sub_product_code = input.subproductcode version = input.version start_date = input.start_date end_date = input.end_date product_info = querydb.get_product_out_info_connect(productcode=product_code, subproductcode=sub_product_code, version=version) prod_type = product_info[0].product_type in_subdir = functions.set_path_sub_directory(product_code, sub_product_code, prod_type, version, out_mapset) in_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, version, ext) # Create the list of dates -> returns empty if start==end==None list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version, start_date=start_date, end_date=end_date) # If list_dates == None, look at all existing files if list_dates is None: print 'To be Done !!!' # Otherwise, build list of files from list of dates else: for my_date in list_dates: in_file_path = es2_data_dir + in_subdir + my_date + in_prod_ident out_file_path = out_dir+my_date+out_prod_ident # Create the link functions.create_sym_link(in_file_path, out_file_path, force=False) return list_subprods, list_subprod_groups
def generate_parameters_cum(): starting_files.sort() # Convert from string to in (for comparison) dekad_start = int(start_season) dekad_end = int(end_season) # Loop over all input files for file_t0 in starting_files: # Get current date (in format '19980901') date_t0 = functions.get_date_from_path_full(file_t0) # Extract from date-string the dekad/year as integer dekad_t0 = int(date_t0[4:]) year_t0 = int(date_t0[0:4]) in_season = False # Check if season goes across two years -> define year1/2 if dekad_start < dekad_end: if dekad_t0 >= dekad_start and dekad_t0 <= dekad_end: year_sos = year_t0 in_season = True else: if dekad_t0 >= dekad_start: year_sos = year_t0 in_season = True if dekad_t0 <= dekad_end: year_sos = year_t0 - 1 in_season = True # Detect the end of the season and trigger processing if in_season: # Define output filename output_file = es2_data_dir + subdir_ident_cum + date_t0 + prod_ident_cum # Get list of dates from start of season to end of season list_dates = proc_functions.get_list_dates_for_dataset( prod, starting_sprod, version, start_date=str(year_sos) + start_season, end_date=date_t0) input_files = [] missing_file = False for ldate in list_dates: # Append the file to list if it exists ... if os.path.isfile(input_dir + ldate + in_prod_ident): input_files.append(input_dir + ldate + in_prod_ident) # ... otherwise raise a warning and break else: logger.warning( 'Missing file for date {0}. Season not computed.'. format(ldate)) missing_file = True break if not missing_file: yield (input_files, output_file)
def my_proc_std_gsod(pipe_run=0, pipe_print=3, start_date=None, end_date=None, touch_files_only=False): start_date = '20160503' end_date = '20160505' starting_dates = proc_functions.get_list_dates_for_dataset( 'gsod-rain', '1dmeas', '1.0', start_date=start_date, end_date=end_date) args = {'pipeline_run_level':pipe_run, \ 'pipeline_printout_level':pipe_print, \ 'pipeline_printout_graph_level': 0, \ 'prod': 'gsod-rain',\ 'starting_sprod':'1dmeas',\ 'mapset': 'SPOTV-SADC-1km',\ 'version':'1.0', 'logfile':'gsod.log', 'starting_dates':starting_dates } res_queue = None processing_std_gsod(res_queue, **args)
def build_date_list_from_datasource(datasource_descr, product_in_info, ingest_mapset): dates_list = [] start_datetime = datetime.datetime.strptime( str(datasource_descr.start_date), "%Y%m%d") if datasource_descr.end_date is None: end_datetime = datetime.date.today() else: end_datetime = datetime.datetime.strptime( str(datasource_descr.end_date), "%Y%m%d") all_starting_dates = proc_functions.get_list_dates_for_dataset(product_in_info.productcode, \ product_in_info.subproductcode, \ product_in_info.version, \ start_date=datasource_descr.start_date, end_date=datasource_descr.end_date) my_dataset = products.Dataset(product_in_info.productcode, product_in_info.subproductcode, ingest_mapset, version=product_in_info.version, from_date=start_datetime, to_date=end_datetime) my_dates = my_dataset.get_dates() my_formatted_dates = [] for my_date in my_dates: my_formatted_dates.append(my_dataset._frequency.format_date(my_date)) my_missing_dates = [] for curr_date in all_starting_dates: if curr_date not in my_formatted_dates: my_missing_dates.append(curr_date) dates_list = sorted(my_missing_dates, reverse=False) return dates_list
def processing_merge(pipeline_run_level=0, pipeline_printout_level=0, input_products='', output_product='', mapset='', logfile=None): if logfile: spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_merge') # Dummy return arguments proc_lists = functions.ProcLists() list_subprods = proc_lists.list_subprods list_subprod_groups = proc_lists.list_subprod_groups es2_data_dir = es_constants.processing_dir+os.path.sep # Do some checks on the integrity of the inputs # Manage output_product data out_product_code = output_product[0].productcode out_sub_product_code = output_product[0].subproductcode out_version = output_product[0].version out_mapset = output_product[0].mapsetcode out_subdir = functions.set_path_sub_directory(out_product_code, out_sub_product_code,'Ingest', out_version, out_mapset) out_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, out_version, ext) out_dir = es2_data_dir + out_subdir # Check the output product directory functions.check_output_dir(out_dir) # Fill the processing list -> some fields to be taken by innput products output_sprod_group=proc_lists.proc_add_subprod_group("merged") output_sprod=proc_lists.proc_add_subprod(out_sub_product_code, "merged", final=False, descriptive_name='undefined', description='undefined', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) # Loop over the input products: for input in input_products: # Extract info from input product product_code = input.productcode sub_product_code = input.subproductcode version = input.version start_date = input.start_date end_date = input.end_date product_info = querydb.get_product_out_info_connect(productcode=product_code, subproductcode=sub_product_code, version=version) prod_type = product_info[0].product_type in_subdir = functions.set_path_sub_directory(product_code, sub_product_code, prod_type, version, out_mapset) in_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, version, ext) # Create the list of dates -> returns empty if start==end==None list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version, start_date=start_date, end_date=end_date) # If list_dates == None, look at all existing files if list_dates is None: print ('To be Done !!!') # Otherwise, build list of files from list of dates else: for my_date in list_dates: in_file_path = es2_data_dir + in_subdir + my_date + in_prod_ident out_file_path = out_dir+my_date+out_prod_ident # Create the link status = functions.create_sym_link(in_file_path, out_file_path, force=False) if status == 0 and logfile: spec_logger.info("Merged file %s created" % out_file_path) return list_subprods, list_subprod_groups
def loop_processing(dry_run=False): # Driver of the process service # Reads configuration from the database # Creates the pipelines for the active processing # Calls the active pipelines with the relevant argument # Arguments: dry_run -> if > 0, it triggers pipeline_printout() rather than pipeline_run() # -> if < 0, it triggers pipeline_printout_graph() rather than pipeline_run() # Clean dir with locks if os.path.isdir(es_constants.processing_tasks_dir): shutil.rmtree(es_constants.processing_tasks_dir) logger.info("Entering routine %s" % 'loop_processing') echo_query = False functions.check_output_dir(es_constants.processing_tasks_dir) while True : logger.debug("Entering infinite loop") # Get all active processing chains from the database. active_processing_chains = querydb.get_active_processing_chains() # Manage dry_run if dry_run: pipeline_run_level = 0 pipeline_printout_level = 3 else: pipeline_run_level = 3 pipeline_printout_level = 0 for chain in active_processing_chains: logger.debug("Processing Chain N.:%s" % str(chain.process_id)) derivation_method = chain.derivation_method # name of the method in the module algorithm = chain.algorithm # name of the .py module mapset = chain.output_mapsetcode process_id = chain.process_id # Get input products input_products = querydb.get_processing_chain_products(chain.process_id,type='input') # Case of a 'std_' (i.e. ruffus) processing -> get all info from 1st INPUT and manage dates if re.search('^std_.*',algorithm): product_code = input_products[0].productcode sub_product_code = input_products[0].subproductcode version = input_products[0].version start_date = input_products[0].start_date end_date = input_products[0].end_date # Manage the dates list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version, start_date=start_date, end_date=end_date) # Prepare arguments args = {'pipeline_run_level':pipeline_run_level, \ 'pipeline_printout_level':pipeline_printout_level,\ 'starting_sprod': sub_product_code, \ 'prod': product_code, \ 'mapset':mapset,\ 'starting_dates': list_dates,\ 'version':version} # Case of no 'std' (i.e. ruffus processing) -> get output products and pass everything to function else: output_products = querydb.get_processing_chain_products(chain.process_id,type='output') # Prepare arguments args = {'pipeline_run_level':pipeline_run_level, \ 'pipeline_printout_level':pipeline_printout_level,\ 'input_products': input_products, \ 'output_product': output_products} # Define an id from a combination of fields processing_unique_id='ID='+str(process_id)+'_METHOD='+derivation_method+'_ALGO='+algorithm+'.lock' processing_unique_lock=es_constants.processing_tasks_dir+processing_unique_id if not os.path.isfile(processing_unique_lock): logger.debug("Launching processing for ID: %s" % processing_unique_id) open(processing_unique_lock,'a').close() # Define the module name and function() module_name = 'processing_'+algorithm function_name = 'processing_'+derivation_method # Enter the module and walk until to the name of the function() to be executed proc_dir = __import__("apps.processing") proc_pck = getattr(proc_dir, "processing") proc_mod = getattr(proc_pck, module_name) proc_func= getattr(proc_mod, function_name) # fork and call the std_precip 'generic' processing pid = os.fork() if pid == 0: # Call to the processing pipeline [list_subprods, list_subprod_groups] = proc_func(**args) # Simulate longer processing (TEMP) logger.info("Going to sleep for a while - to be removed") time.sleep(50) os.remove(processing_unique_lock) sys.exit(0) else: # Qui sono il padre pass #os.wait() else: logger.debug("Processing already running for ID: %s " % processing_unique_id) logger.info("End of the loop ... wait a while") time.sleep(5)
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, starting_dates_stats=None, update_stats=False, nrt_products=True): # --------------------------------------------------------------------- # Create lists to store definition of the derived products, and their # groups. # Two starting dates ranges are passed: # # starting_dates: range - 1d frequency - for 1day -> 10dcount # Normally not used: only for tests (the number of 1day files i large!) # # starting_dates_stats: range - 10d frequency - for 10dcount -> 10dcountmin/max/avg # Used to define a specific range for stats, normally 20030101 -> <prev-year>1221 # # For the 10d products anomalies (both 1km and 10km) ALL available files are used for anomaly computation # # --------------------------------------------------------------------- if proc_lists is None: proc_lists = functions.ProcLists() # --------------------------------------------------------------------- # Define and assign the flags to control the individual derived products # and the groups. NOT to be changed by the User # --------------------------------------------------------------------- # Set DEFAULTS: all off activate_10dcount_comput = 0 # 2.a - 10d count activate_10dstats_comput = 0 # 2.b - 10d stats activate_10danomalies_comput = 0 # 2.c - 10d anomalies activate_10d_10k_comput = 0 # 3.a - 10d on 10km cells activate_10d_10k_stats_comput = 0 # 3.b - 10d on 10km statistics activate_10d_10k_anom_comput = 0 # 3.c - 10d on 10km anomalies # switch wrt groups - according to options if nrt_products: activate_10dcount_comput = 1 # 10d count activate_10danomalies_comput = 1 # 10d anomalies activate_10d_10k_comput = 1 # 10d on 10k activate_10d_10k_anom_comput = 1 # 10d on 10km anomalies if update_stats: activate_10dstats_comput = 1 # 10d stats activate_10d_10k_stats_comput = 1 # 10d on 10km statistics # Switch wrt single products: not to be changed !! # 2.b -> 10d stats activate_10dcountavg_comput = 1 activate_10dcountmin_comput = 1 activate_10dcountmax_comput = 1 # 2.c -> 10d anomalies activate_10ddiff_comput = 1 # 3.a -> 10d on 10 km activate_10dcount10k_comput = 1 # 3.b -> 10d on 10 km stats activate_10dcount10kavg_comput = 1 activate_10dcount10kmin_comput = 1 activate_10dcount10kmax_comput = 1 # 3.c -> 10d on 10 km anomalies activate_10dcount10kdiff_comput = 1 activate_10dcount10kperc_comput = 1 activate_10dcount10kratio_comput = 1 # --------------------------------------------------------------------- # Define the 'grid' file for the 10k count conversion # If it does not exists, disable computation # --------------------------------------------------------------------- grid_mapset_name = 'SPOTV-Africa-1km' # grid_file='/eStation2/layers/Mask_Africa_SPOTV_10km.tif' grid_file = es_constants.es2globals[ 'estation2_layers_dir'] + os.path.sep + 'Mask_Africa_SPOTV_10km.tif' if not os.path.isfile(grid_file): activate_10d_10k_comput = 0 # 10d on 10km activate_10d_10k_anom_comput = 0 # 10d on 10km anomalies activate_10d_10k_stats_comput = 0 # 10d on 10km statistics es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files from the starting_sprod and starting_dates arguments # --------------------------------------------------------------------- in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) # logger.debug('Base data directory is: %s' % es2_data_dir) input_dir = es2_data_dir + \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) # starting_dates -> 1 day if starting_dates is not None: starting_files_1day = [] for my_date in starting_dates: starting_files_1day.append(input_dir + my_date + in_prod_ident) else: starting_files_1day = glob.glob(input_dir + "*" + in_prod_ident) # --------------------------------------------------------------------- # Derived product: 10dcount # --------------------------------------------------------------------- output_sprod_group = proc_lists.proc_add_subprod_group("10dcount") output_sprod = proc_lists.proc_add_subprod( "10dcount", "10dcount", final=False, descriptive_name='10d Count', description='Fire Count for dekad', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident_10dcount = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_10dcount = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) def generate_parameters_10dcount(): # Look for all input files in input_dir, and sort them input_files = starting_files_1day dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_10dcount + os.path.sep + my_dekad_str + out_prod_ident_10dcount yield (file_list, output_file) @active_if(activate_10dcount_comput) @files(generate_parameters_10dcount) def std_fire_10dcount(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_cumulate(**args) # --------------------------------------------------------------------- # Derived product: 10dcountavg # --------------------------------------------------------------------- if starting_dates_stats is not None: files_10dcount_4stats = [] for my_date in starting_dates_stats: files_10dcount_4stats.append(es2_data_dir + output_subdir_10dcount + my_date + out_prod_ident_10dcount) else: files_10dcount_4stats = es2_data_dir + output_subdir_10dcount + "*" + out_prod_ident_10dcount output_sprod_group = proc_lists.proc_add_subprod_group("10dstats") output_sprod = proc_lists.proc_add_subprod( "10dcountavg", "10dstats", final=False, descriptive_name='10d Fire Average', description='Average fire for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dcountavg_comput) @collate(files_10dcount_4stats, formatter(formatter_in), formatter_out) @follows(std_fire_10dcount) def std_fire_10dcountavg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Float32', 'input_nodata': -32768 } # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type':'Float32', 'input_nodata':0} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Derived product: 10dcountmin # --------------------------------------------------------------------- output_sprod = proc_lists.proc_add_subprod( "10dcountmin", "10dstats", final=False, descriptive_name='10d Fire Minimum', description='Minimum Fire for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dcountmin_comput) @collate(files_10dcount_4stats, formatter(formatter_in), formatter_out) @follows(std_fire_10dcountavg) def std_fire_10dcountmin(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) # The coded value (nodata=0) leads to the wrong result args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", "input_nodata": -32768 } raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # Derived product: 10dcountmax # --------------------------------------------------------------------- output_sprod = proc_lists.proc_add_subprod( "10dcountmax", "10dstats", final=False, descriptive_name='10d Maximum', description='Maximum rainfall for dekad', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10dstats_comput, activate_10dcountmax_comput) @collate(files_10dcount_4stats, formatter(formatter_in), formatter_out) @follows(std_fire_10dcountmin) def std_fire_10dcountmax(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # Derived product: 10dDiff # --------------------------------------------------------------------- # Define the input files for conversion to 10k on the basis of the 'starting_dates' (not 'starting_dates_stats') if starting_dates is not None: files_10dcount_4anom = [] use_dates_10dcount = proc_functions.get_list_dates_for_dataset( prod, '10dcount', version, start_date=starting_dates[0], end_date=starting_dates[-1]) for my_date in use_dates_10dcount: files_10dcount_4anom.append(es2_data_dir + output_subdir_10dcount + my_date + out_prod_ident_10dcount) else: files_10dcount_4anom = glob.glob(es2_data_dir + output_subdir_10dcount + "*" + out_prod_ident_10dcount) output_sprod_group = proc_lists.proc_add_subprod_group("10danomalies") output_sprod = proc_lists.proc_add_subprod( "10dcountdiff", "10danomalies", final=False, descriptive_name='10d Absolute Difference', description='10d Absolute Difference vs. LTA', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10dcountavg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, mapset, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, mapset) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident # @follows(std_fire_10dcountavg) @active_if(activate_10danomalies_comput, activate_10ddiff_comput) @transform(files_10dcount_4anom, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) @follows(std_fire_10dcountmax) def std_fire_10dcountdiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type':'Float32', 'input_nodata':-32768} args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Float32', 'input_nodata': -32768, 'output_nodata': -32768 } raster_image_math.do_oper_subtraction(**args) # --------------------------------------------------------------------- # Derived product: 10dcount10km # --------------------------------------------------------------------- # target_mapset_name = 'SPOTV-Africa-10km' output_sprod_group = proc_lists.proc_add_subprod_group("10dcount10k") output_sprod_10dcount10k = proc_lists.proc_add_subprod( "10dcount10k", "10dcount10k", final=False, descriptive_name='10d Gridded at 10 km', description='10d Count Gridded at 10 km', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident_10dcount10k = functions.set_path_filename_no_date( prod, output_sprod_10dcount10k, target_mapset_name, version, ext) output_subdir_10dcount10k = functions.set_path_sub_directory( prod, output_sprod_10dcount10k, 'Derived', version, target_mapset_name) # Starting files + avg formatter_in = "(?P<YYYYMMDD>[0-9]{8})" + out_prod_ident_10dcount formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir_10dcount10k + "{YYYYMMDD[0]}" + out_prod_ident_10dcount10k @active_if(activate_10d_10k_comput, activate_10dcount10k_comput) @transform(files_10dcount_4anom, formatter(formatter_in), formatter_out) @follows(std_fire_10dcountdiff) def std_fire_10dcount10k(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='_' + os.path.basename(output_file), dir=es_constants.base_tmp_dir) # Temporary (not masked) file output_file_temp = tmpdir + os.path.sep + os.path.basename(output_file) input_mapset_name = mapset operation = 'sum' args = { "input_file": input_file, "grid_file": grid_file, "output_file": output_file_temp, "operation": operation, "input_mapset_name": input_mapset_name, "grid_mapset_name": grid_mapset_name, "output_format": None, 'nodata': -32768, "options": "compress=lzw", "output_type": 'Int16' } raster_image_math.do_stats_4_raster(**args) args = { "inputfile": output_file_temp, "output_file": output_file, "native_mapset_name": grid_mapset_name, "target_mapset_name": target_mapset_name } raster_image_math.do_reproject(**args) shutil.rmtree(tmpdir) # --------------------------------------------------------------------- # Derived product: 10dcount10kavg # --------------------------------------------------------------------- if starting_dates_stats is not None: files_10dcount10k_4stats = [] for my_date in starting_dates_stats: files_10dcount10k_4stats.append(es2_data_dir + output_subdir_10dcount10k + my_date + out_prod_ident_10dcount10k) else: files_10dcount10k_4stats = es2_data_dir + output_subdir_10dcount10k + "*" + out_prod_ident_10dcount10k output_sprod_group = proc_lists.proc_add_subprod_group("10dcount10kstats") output_sprod = proc_lists.proc_add_subprod( "10dcount10kavg", "10dcount10kstats", final=False, descriptive_name='10d Fire count 10km Average', description='10d Fire count 10km Average', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, target_mapset_name, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, target_mapset_name) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10d_10k_stats_comput, activate_10dcount10kavg_comput) @collate(files_10dcount10k_4stats, formatter(formatter_in), formatter_out) @follows(std_fire_10dcount10k) def std_fire_10dcount10kavg(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Float32', 'input_nodata': -32768 } # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Float32', 'input_nodata': 0} raster_image_math.do_avg_image(**args) # --------------------------------------------------------------------- # Derived product: 10dcount10kmin # --------------------------------------------------------------------- output_sprod_group = proc_lists.proc_add_subprod_group("10dcount10kstats") output_sprod = proc_lists.proc_add_subprod( "10dcount10kmin", "10dcount10kstats", final=False, descriptive_name='10d Fire count 10km minimum', description='10d Fire count 10km minimum', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, target_mapset_name, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, target_mapset_name) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident ] @active_if(activate_10d_10k_stats_comput, activate_10dcount10kmin_comput) @collate(files_10dcount10k_4stats, formatter(formatter_in), formatter_out) @follows(std_fire_10dcount10kavg) def std_fire_10dcount10kmin(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Int16', 'input_nodata': -32768 } # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Int16', 'input_nodata': 0} raster_image_math.do_min_image(**args) # --------------------------------------------------------------------- # Derived product: 10dcount10kmax # --------------------------------------------------------------------- output_sprod_group = proc_lists.proc_add_subprod_group("10dcount10kstats") output_sprod = proc_lists.proc_add_subprod( "10dcount10kmax", "10dcount10kstats", final=False, descriptive_name='10d Fire count 10km maximum', description='10d Fire count 10km maximum', frequency_id='e1dekad', date_format='MMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, target_mapset_name, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, target_mapset_name) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident @active_if(activate_10d_10k_stats_comput, activate_10dcount10kmax_comput) @collate(files_10dcount10k_4stats, formatter(formatter_in), formatter_out) @follows(std_fire_10dcount10kmin) def std_fire_10dcount10kmax(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Int16', 'input_nodata': -32768 } # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Int16', 'input_nodata': 0} raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # Derived product: 10dcount10kdiff # --------------------------------------------------------------------- # Define the input files for conversion to 10k on the basis of the 'starting_dates' (not 'starting_dates_stats') if starting_dates is not None: files_10dcount10k_4anom = [] use_dates_10dcount10k = proc_functions.get_list_dates_for_dataset( prod, '10dcount10k', version, start_date=starting_dates[0], end_date=starting_dates[-1]) for my_date in use_dates_10dcount10k: files_10dcount10k_4anom.append(es2_data_dir + output_subdir_10dcount10k + my_date + out_prod_ident_10dcount10k) else: files_10dcount10k_4anom = glob.glob(es2_data_dir + output_subdir_10dcount10k + "*" + out_prod_ident_10dcount10k) output_sprod_group = proc_lists.proc_add_subprod_group( "10dcount10kanomalies") output_sprod = proc_lists.proc_add_subprod( "10dcount10kdiff", "10dcount10kanomalies", final=False, descriptive_name='10d 10 km Absolute Difference', description='10d 10 km Absolute Difference vs. LTA', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, target_mapset_name, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, target_mapset_name) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10dcount10kavg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, target_mapset_name, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, target_mapset_name) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @active_if(activate_10d_10k_anom_comput, activate_10dcount10kdiff_comput) @transform(files_10dcount10k_4anom, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) @follows(std_fire_10dcount10kmax) def std_fire_10dcount10kdiff(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Float32', 'input_nodata': -32768, 'output_nodata': -32768 } # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type':'Float32', 'input_nodata':-32768} raster_image_math.do_oper_subtraction(**args) # --------------------------------------------------------------------- # Derived product: 10dcount10kperc # --------------------------------------------------------------------- output_sprod_group = proc_lists.proc_add_subprod_group( "10dcount10kanomalies") output_sprod = proc_lists.proc_add_subprod( "10dcount10kperc", "10dcount10kanomalies", final=False, descriptive_name='10d 10 km Percent Difference', description='10d 10 km Percent Difference vs. LTA', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, target_mapset_name, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, target_mapset_name) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10dcount10kavg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, target_mapset_name, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, target_mapset_name) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @active_if(activate_10d_10k_anom_comput, activate_10dcount10kperc_comput) @transform(files_10dcount10k_4anom, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) @follows(std_fire_10dcount10kdiff) def std_fire_10dcount10kperc(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) # args = {"input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type':'Float32', 'input_nodata':-32768} args = { "input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Float32', 'input_nodata': -32768, 'output_nodata': -32768 } raster_image_math.do_compute_perc_diff_vs_avg(**args) # --------------------------------------------------------------------- # Derived product: 10dcount10kratio # --------------------------------------------------------------------- output_sprod_group = proc_lists.proc_add_subprod_group( "10dcount10kanomalies") output_sprod = proc_lists.proc_add_subprod( "10dcount10kratio", "10dcount10kanomalies", final=False, descriptive_name='10d 10 km Ratio with AVG', description='10d 10 km Ratio with LTA AVG', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, target_mapset_name, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, target_mapset_name) # Starting files + avg formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident ancillary_sprod = "10dcount10kavg" ancillary_sprod_ident = functions.set_path_filename_no_date( prod, ancillary_sprod, target_mapset_name, version, ext) ancillary_subdir = functions.set_path_sub_directory( prod, ancillary_sprod, 'Derived', version, target_mapset_name) ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident @active_if(activate_10d_10k_anom_comput, activate_10dcount10kratio_comput) @transform(files_10dcount10k_4anom, formatter(formatter_in), add_inputs(ancillary_input), formatter_out) @follows(std_fire_10dcount10kperc) def std_fire_10dcount10kratio(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type':'Float32', 'input_nodata':-32768} args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Float32', 'input_nodata': -32768, 'output_nodata': -32768 } raster_image_math.do_oper_division_perc(**args) # # End of pipeline definition return proc_lists
def loop_processing(dry_run=False, serialize=False): # Driver of the process service # Reads configuration from the database # Creates the pipelines for the active processing # Calls the active pipelines with the relevant argument # Arguments: dry_run -> if > 0, it triggers pipeline_printout() rather than pipeline_run() # -> if < 0, it triggers pipeline_printout_graph() rather than pipeline_run() # serialize -> False (default): detach the process and work in parallel # -> True: do NOT detach processes and work in series (mainly for debugging) # Clean dir with locks if os.path.isdir(es_constants.processing_tasks_dir): shutil.rmtree(es_constants.processing_tasks_dir) logger.info("Entering routine %s" % 'loop_processing') echo_query = False functions.check_output_dir(es_constants.processing_tasks_dir) while True: logger.debug("Entering infinite loop") # Get all active processing chains from the database. active_processing_chains = querydb.get_active_processing_chains() # Manage dry_run if dry_run: pipeline_run_level = 0 pipeline_printout_level = 3 else: pipeline_run_level = 3 pipeline_printout_level = 0 for chain in active_processing_chains: logger.debug("Processing Chain N.:%s" % str(chain.process_id)) derivation_method = chain.derivation_method # name of the method in the module algorithm = chain.algorithm # name of the .py module mapset = chain.output_mapsetcode process_id = chain.process_id # Get input products input_products = querydb.get_processing_chain_products(chain.process_id,type='input') product_code = input_products[0].productcode sub_product_code = input_products[0].subproductcode version = input_products[0].version # Get product metadata for output products (from first input) input_product_info = querydb.get_product_out_info(productcode=product_code, subproductcode=sub_product_code, version=version) # Case of a 'std_' (i.e. ruffus with 1 input) processing -> get all info from 1st INPUT and manage dates if re.search('^std_.*',algorithm): start_date = input_products[0].start_date end_date = input_products[0].end_date # Manage the dates list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version, start_date=start_date, end_date=end_date) # Prepare arguments args = {'pipeline_run_level':pipeline_run_level, \ 'pipeline_printout_level':pipeline_printout_level,\ 'starting_sprod': sub_product_code, \ 'prod': product_code, \ 'mapset':mapset,\ 'starting_dates': list_dates,\ 'version':version} # Case of no 'std' (e.g. merge processing) -> get output products and pass everything to function else: output_products = querydb.get_processing_chain_products(chain.process_id,type='output') # Prepare arguments args = {'pipeline_run_level':pipeline_run_level, \ 'pipeline_printout_level':pipeline_printout_level,\ 'input_products': input_products, \ 'output_product': output_products} # Define an id from a combination of fields processing_unique_id='ID='+str(process_id)+'_METHOD='+derivation_method+'_ALGO='+algorithm+'.lock' processing_unique_lock=es_constants.processing_tasks_dir+processing_unique_id if not os.path.isfile(processing_unique_lock): logger.debug("Launching processing for ID: %s" % processing_unique_id) open(processing_unique_lock,'a').close() # Define the module name and function() module_name = 'processing_'+algorithm function_name = 'processing_'+derivation_method # Enter the module and walk until to the name of the function() to be executed proc_dir = __import__("apps.processing") proc_pck = getattr(proc_dir, "processing") proc_mod = getattr(proc_pck, module_name) proc_func= getattr(proc_mod, function_name) # Fork and call the std_precip 'generic' processing if serialize==False: pid = os.fork() if pid == 0: # Here I'm the child process -> call to the processing pipeline proc_lists = proc_func(**args) # Upsert database upsert_database(process_id, product_code, version, mapset, proc_lists, input_product_info) # Simulate longer processing (TEMP) logger.info("Going to sleep for a while - to be removed") time.sleep(2) logger.info("Waking-up now, and removing the .lock") os.remove(processing_unique_lock) sys.exit(0) else: # Here I'm the parent process -> just go on .. pass # Do NOT detach process (work in series) else: proc_lists = proc_func(**args) logger.info("Going to sleep for a while - to be removed") # Upsert database upsert_database(process_id, product_code, version, mapset, proc_lists, input_product_info) time.sleep(2) logger.info("Waking-up now, and removing the .lock") os.remove(processing_unique_lock) else: logger.debug("Processing already running for ID: %s " % processing_unique_id) # logger.info("End of the loop ... wait a while") time.sleep(5)
def processing_merge(pipeline_run_level=0, pipeline_printout_level=0, input_products='', output_product='', mapset=''): es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # Do some checks on the integrity of the inputs # Manage output_product data out_product_code = output_product[0].productcode out_sub_product_code = output_product[0].subproductcode out_version = output_product[0].version out_mapset = output_product[0].mapsetcode out_subdir = functions.set_path_sub_directory(out_product_code, out_sub_product_code, 'Ingest', out_version, out_mapset) out_prod_ident = functions.set_path_filename_no_date( out_product_code, out_sub_product_code, out_mapset, out_version, ext) out_dir = es2_data_dir + out_subdir # Check the output product directory functions.check_output_dir(out_dir) # Loop over the input products: for input in input_products: # Extract info from input product product_code = input.productcode sub_product_code = input.subproductcode version = input.version start_date = input.start_date end_date = input.end_date product_info = querydb.get_product_out_info( productcode=product_code, subproductcode=sub_product_code, version=version) prod_type = product_info[0].product_type in_subdir = functions.set_path_sub_directory(product_code, sub_product_code, prod_type, version, out_mapset) in_prod_ident = functions.set_path_filename_no_date( out_product_code, out_sub_product_code, out_mapset, version, ext) # Create the list of dates -> returns empty if start==end==None list_dates = proc_functions.get_list_dates_for_dataset( product_code, sub_product_code, version, start_date=start_date, end_date=end_date) # If list_dates == None, look at all existing files if list_dates is None: print 'To be Done !!!' # Otherwise, build list of files from list of dates else: for my_date in list_dates: in_file_path = es2_data_dir + in_subdir + my_date + in_prod_ident out_file_path = out_dir + my_date + out_prod_ident # Create the link print in_file_path print out_file_path functions.create_sym_link(in_file_path, out_file_path, force=False)
def loop_processing(dry_run=False, serialize=False, test_one_product=None): # Driver of the process service # Reads configuration from the database # Creates the pipelines for the active processing # Calls the active pipelines with the relevant argument # Arguments: dry_run -> if > 0, it triggers pipeline_printout() rather than pipeline_run() # -> if < 0, it triggers pipeline_printout_graph() rather than pipeline_run() # serialize -> False (default): detach the process and work in parallel # -> True: do NOT detach processes and work in series (mainly for debugging) # Clean dir with locks at restart if os.path.isdir(es_constants.processing_tasks_dir): shutil.rmtree(es_constants.processing_tasks_dir) logger.info("Entering routine %s" % 'loop_processing') functions.check_output_dir(es_constants.processing_tasks_dir) # Read sleep time (used by each processing chain) sleep_time=es_constants.processing_sleep_time_sec while True: logger.debug("Entering infinite loop") # Get all active processing chains from the database. active_processing_chains = querydb.get_active_processing_chains() # Manage dry_run if dry_run: pipeline_run_level = 0 pipeline_printout_level = 3 else: pipeline_run_level = 3 pipeline_printout_level = 0 logger.debug("Pipeline run level: %i" % pipeline_run_level) logger.debug("Pipeline printout level: %i" % pipeline_printout_level) for chain in active_processing_chains: derivation_method = chain.derivation_method # name of the method in the module algorithm = chain.algorithm # name of the .py module mapset = chain.output_mapsetcode process_id = chain.process_id do_processing_singleproduct = False if test_one_product: if process_id != test_one_product: do_processing_singleproduct = True if do_processing_singleproduct: continue # Get input products input_products = querydb.get_processing_chain_products(chain.process_id,type='input') product_code = input_products[0].productcode sub_product_code = input_products[0].subproductcode version = input_products[0].version native_mapset=input_products[0].mapsetcode logger.info("Algorithm %s applied to [%s]/[%s]" % (str(algorithm), str(product_code),str(sub_product_code))) # Get product metadata for output products (from first input) input_product_info = querydb.get_product_out_info(productcode=product_code, subproductcode=sub_product_code, version=version) # Define a standard logfile associated to the processing chain processing_unique_id='ID='+str(process_id)+'_PROD='+product_code+'_METHOD='+derivation_method+'_ALGO='+algorithm logfile='apps.processing.'+processing_unique_id # Case of a 'std_' processing (i.e. ruffus with 1 input) -> get all info from 1st INPUT and manage dates if re.search('^std_.*',algorithm): logger.debug("Processing Chain is standard type") # Define dates interval from input product start_date = input_products[0].start_date end_date = input_products[0].end_date # Manage the dates list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version, start_date=start_date, end_date=end_date) # Prepare arguments args = {'pipeline_run_level':pipeline_run_level, \ 'pipeline_printout_level':pipeline_printout_level,\ 'starting_sprod': sub_product_code, \ 'prod': product_code, \ 'mapset':mapset,\ 'starting_dates': list_dates,\ 'version':version, 'logfile':logfile} # 'native_mapset':native_mapset} logger.debug('RL:{pipeline_run_level}; PL:{pipeline_printout_level},prod:{prod}, sprod:{starting_sprod},mapset:{mapset},\ dates:{starting_dates},version:{version}'.format(**args)) # Define an id from a combination of fields processing_unique_lock=es_constants.processing_tasks_dir+processing_unique_id+'.lock' # Check the processing chain is not locked if not os.path.isfile(processing_unique_lock): # # Perform sanity check on the output files # processing_base_directory = es_constants.es2globals['processing_dir']+\ # os.path.sep+product_code+\ # os.path.sep+version+\ # os.path.sep+mapset+os.path.sep+'derived' # # proc_functions.clean_corrupted_files(processing_base_directory, dry_run=True) open(processing_unique_lock,'a').close() logger.debug("Unique lock created: % s" % processing_unique_id) # Define the module name and function() module_name = 'processing_'+algorithm function_name = 'processing_'+derivation_method # Enter the module and walk until to the name of the function() to be executed proc_dir = __import__("apps.processing") try: proc_pck = getattr(proc_dir, "processing") except: logger.error("Error in loading module apps.processing.processing") return try: proc_mod = getattr(proc_pck, module_name) except: logger.error("Error in loading module [%s]" % module_name) return try: proc_func= getattr(proc_mod, function_name) except: logger.error("Error in loading algoritm [%s] for module [%s]" % (function_name,module_name)) return # Check serialize option if serialize==False: # Call to the processing pipeline logger.debug("Launching the pipeline") #proc_lists = proc_func(**args) results_queue = Queue() p = Process(target=proc_func, args=(results_queue,), kwargs=args) #p.daemon = True logger.debug("Before starting the process .. %i", p.is_alive()) p.start() logger.debug("After start .. %i", p.is_alive()) #proc_lists=results_queue.get() p.join() logger.debug("After join .. %i", p.is_alive()) # Sleep time to be read from processing time.sleep(float(sleep_time)) logger.debug("Execution finished - remove lock") try: os.remove(processing_unique_lock) except: logger.warning("Lock not removed: %s" % processing_unique_lock) # Do NOT detach process (work in series) else: logger.info("Work in series - do not detach process") results_queue = Queue() proc_lists = proc_func(results_queue, **args) os.remove(processing_unique_lock) time.sleep(float(sleep_time)) else: logger.debug("Lock already exist: %s" % processing_unique_id) # Case of no 'std' (e.g. merge processing - or more than 1 input) -> get output products and pass everything to function else: output_products = querydb.get_processing_chain_products(chain.process_id,type='output') # Prepare arguments args = {'pipeline_run_level':pipeline_run_level, 'pipeline_printout_level':pipeline_printout_level, 'input_products': input_products, 'output_product': output_products, 'logfile': logfile} # Define an id from a combination of fields processing_unique_id='ID='+str(process_id)+'_METHOD='+derivation_method+'_ALGO='+algorithm+'.lock' processing_unique_lock=es_constants.processing_tasks_dir+processing_unique_id if not os.path.isfile(processing_unique_lock): logger.debug("Launching processing for ID: %s" % processing_unique_id) open(processing_unique_lock,'a').close() # Define the module name and function() module_name = 'processing_'+algorithm function_name = 'processing_'+derivation_method # Enter the module and walk until to the name of the function() to be executed proc_dir = __import__("apps.processing") proc_pck = getattr(proc_dir, "processing") proc_mod = getattr(proc_pck, module_name) proc_func= getattr(proc_mod, function_name) if re.search('.*merge.*',algorithm): logger.debug("Processing Chain is merge type") # Do NOT detach process (work in series) proc_lists = proc_func(**args) time.sleep(float(sleep_time)) logger.info("Waking-up now, and removing the .lock") os.remove(processing_unique_lock) else: logger.info("Processing Chain is more-inputs type (e.g. modis-pp)") # We have to 'detach' the process for avoiding ruffus exception 'error_duplicate_task_name' results_queue = Queue() p = Process(target=proc_func, args=(results_queue,), kwargs=args) p.start() p.join() # Sleep time to be read from processing time.sleep(float(sleep_time)) logger.debug("Execution finished - remove lock") try: os.remove(processing_unique_lock) except: logger.warning("Lock not removed: %s" % processing_unique_lock) else: logger.debug("Processing already running for ID: %s " % processing_unique_id) logger.info("End of the loop ... wait a while") time.sleep(1)