def generate_parameters_1dcum(): # Look for all input files in input_dir, and sort them if starting_dates is not None: input_files = [] for my_date in starting_dates: input_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident input_files = glob.glob(starting_files) logger.debug("starting_files %s" % input_files) day_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] if mydate_yyyymmdd not in day_list: day_list.append(mydate_yyyymmdd) day_list = sorted(day_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for myday in day_list: # Exclude the current day if myday != today_str: file_list = [] for input_file in input_files: basename = os.path.basename(input_file) # Date is in format YYYYMMDDhhmm mydate_yyyymmdd = functions.get_date_from_path_filename( basename) if day_time is None: # Append files for myday if mydate_yyyymmdd[0:8] == myday: file_list.append(input_file) else: # Append files in time range myday+hhmm |-| (myday+1)+ hhmm if int(mydate_yyyymmdd) >= int(myday) * 10000 + int( day_time) and int(mydate_yyyymmdd) < ( int(myday) + 1) * 10000 + int(day_time): file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_1dcum + os.path.sep + str( (int(myday)) * 10000 + int(day_time)) + out_prod_ident_1dcum file_list = sorted(file_list) # Check here the number of missing files (for optimization) if len(file_list) > 86: yield (file_list, output_file)
def generate_parameters_1yearcum(): n_mon = 12 max_missing = 35 month_list = [] # Create unique list of all months (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mymonth_yyyymm = str(mydate)[0:6] if mymonth_yyyymm not in month_list: month_list.append(mymonth_yyyymm) month_list = sorted(month_list) # Compute the current month today = datetime.date.today() today_mon = today.strftime('%Y%m') for month in month_list: # Exclude the current dekad if month != today_mon: file_list = [] # Compute first - last date for current interval first_day_this_month = datetime.date(int(month[0:4]), int(month[4:6]), 1) first_day_next_month = first_day_this_month + relativedelta( months=+1) first_day_2_month_before = first_day_this_month + relativedelta( months=-n_mon + 1) delta_3mon = first_day_next_month - first_day_2_month_before expected_days = delta_3mon.days for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydate = datetime.date(int(mydate_yyyymmdd[0:4]), int(mydate_yyyymmdd[4:6]), int(mydate_yyyymmdd[6:8])) if first_day_2_month_before <= mydate < first_day_next_month: file_list.append(input_file) if len(file_list) >= expected_days - max_missing: output_file = es_constants.processing_dir + output_subdir_1yearcum + os.path.sep + month + '01' + out_prod_ident_1yearcum yield (file_list, output_file) else: print( 'Too many missing filed for 1yearcum, period until: {0}' .format(month))
def generate_parameters_3moncum(): # Number of months to consider n_mon = 3 dates_list = [] # Extract and sort all dates for input_file in input_files: basename=os.path.basename(input_file) mydate=functions.get_date_from_path_filename(basename) mydate_yyyymmdd=str(mydate)[0:8] dates_list.append(mydate_yyyymmdd) dates_list = sorted(dates_list) # loop from the 'n_mon'-1 date to the last date - this is the period end-limit for date_index in range(n_mon-1,len(dates_list)-1): mydate = dates_list[date_index] prev_date = dates_list[date_index-n_mon+1] file_list = [] # Get month-date and m_1 = datetime.date(int(mydate[0:4]),int(mydate[4:6]),1) m_2 = datetime.date(int(prev_date[0:4]),int(prev_date[4:6]),1) delta = m_1 - m_2 # Check there are no missing month, i.e. tot_delta < 155 days if delta.days <=(31*(n_mon-1)): for curr_index in range(0,n_mon): curr_date = dates_list[date_index-curr_index] if os.path.isfile(input_dir+curr_date+in_prod_ident): file_list.append(input_dir+curr_date+in_prod_ident) output_file=es_constants.processing_dir+output_subdir_3moncum+os.path.sep+mydate+out_prod_ident_3moncum yield (file_list, output_file) else: print 'At least 1 month is missing for period ending {0}'.format(mydate)
def msg_mpe_10dcum(input_file, output_file): if len(input_file) > 8: output_file = functions.list_to_element(output_file) # Get the number of days of that dekad basename = os.path.basename(output_file) mydate = functions.get_date_from_path_filename(basename) nbr_days_dekad = functions.day_per_dekad(mydate) factor = 1.0 functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", "scale_factor": factor, "input_nodata": -32768 } raster_image_math.do_cumulate(**args) else: logger.warning( 'More than 2 files missing for output {0}: Skip'.format( os.path.basename(output_file)))
def generate_input_files_pp(): # Take kd490 as starting point kd_files = kd490_input_dir + my_date + "*" + kd490_prod_ident input_files = sorted(glob.glob(kd_files)) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) ancillary_chla = chla_input_dir + mydate + chla_prod_ident ancillary_par = par_input_dir + mydate + par_prod_ident ancillary_sst = sst_input_dir + mydate + sst_prod_ident do_comp = True if not os.path.isfile(ancillary_chla): do_comp = False if not os.path.isfile(ancillary_par): do_comp = False if not os.path.isfile(ancillary_sst): do_comp = False if do_comp is True: output_file = es_constants.processing_dir + output_subdir + os.path.sep + mydate + out_prod_ident my_inputs = (input_file, ancillary_chla, ancillary_par, ancillary_sst) yield (my_inputs, output_file)
def lsasaf_lst_10dmin(input_file, output_file): output_file = functions.list_to_element(output_file) # Get the number of days of that dekad basename = os.path.basename(output_file) mydate = functions.get_date_from_path_filename(basename) functions.check_output_dir(os.path.dirname(output_file)) tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='', dir=es_constants.base_tmp_dir) tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file) args = { "input_file": input_file, "output_file": tmp_output_file, "output_format": 'GTIFF', "options": "compress=lzw", "input_nodata": -32768 } raster_image_math.do_min_image(**args) reproject_output(tmp_output_file, native_mapset, target_mapset) shutil.rmtree(tmpdir) # Do also the house-keeping, by deleting the files older than 6 months number_months_keep = 6 remove_old_files(prod, '10d15min', version, native_mapset, 'Ingest', number_months_keep)
def lsasaf_etp_10dcum(input_file, output_file): output_file = functions.list_to_element(output_file) # Get the number of days of that dekad basename=os.path.basename(output_file) mydate=functions.get_date_from_path_filename(basename) nbr_days_dekad = functions.day_per_dekad(mydate) # Compute the correcting factor: we sum-up all 48 30min cycles and: # Divide by 2 (mm/h -> mm) # Multiply by number of days # Divide by 100, so that the scale factor changes from 0.0001 (30min) to 0.01 factor = float(nbr_days_dekad)*0.005 functions.check_output_dir(os.path.dirname(output_file)) tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='',dir=es_constants.base_tmp_dir) tmp_output_file = tmpdir+os.path.sep+os.path.basename(output_file) args = {"input_file": input_file, "output_file": tmp_output_file, "output_format": 'GTIFF', "options": "compress=lzw", "scale_factor": factor, "input_nodata":-32768} # See ES2-416: we accept at least 40 files out of the expect 48 if len(input_file) >= 40: raster_image_math.do_cumulate(**args) reproject_output(tmp_output_file, native_mapset, target_mapset) # Do also the house-keeping, by deleting the files older than 6 months number_months_keep = 6 remove_old_files(prod, "10d30min-et", version, native_mapset, 'Derived', number_months_keep) # Remove tmp dir (moved out of if-clause - 21.11.19) shutil.rmtree(tmpdir)
def generate_parameters_10d30min(): # Look for all input files in input_dir, and sort them input_files = glob.glob(starting_files) dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) # Generate the list of 30 min time in a day timelist = [ datetime.time(h, m).strftime("%H%M") for h, m in itertools.product(xrange(0, 24), xrange(0, 60, 30)) ] for time in timelist: files_for_time = glob.glob(input_dir + os.path.sep + '*' + time + in_prod_ident) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) output_file = es_constants.processing_dir + output_subdir + os.path.sep + my_dekad_str + time + out_prod_ident for myfile in files_for_time: basename = os.path.basename(myfile) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(myfile) yield (file_list, output_file)
def generate_parameters_1d_to_10d(self): dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in self.input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) expected_days = functions.day_per_dekad(my_dekad_str) for input_file in self.input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(input_file) output_file = es_constants.processing_dir + self.output_subdir_10d + os.path.sep + my_dekad_str + self.out_prod_ident_10d if len(file_list) >= expected_days - 1: yield (file_list, output_file) else: print('Too many missing filed for dekad {0}'.format( my_dekad_str))
def generate_parameters_10dcount(): # Look for all input files in input_dir, and sort them input_files = glob.glob(starting_files) dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_10dcount + os.path.sep + my_dekad_str + out_prod_ident_10dcount yield (file_list, output_file)
def generate_parameters_1moncum(): month_list = [] # Create unique list of all months (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mymonth_yyyymm = str(mydate)[0:6] if mymonth_yyyymm not in month_list: month_list.append(mymonth_yyyymm) month_list = sorted(month_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_mon = today.strftime('%Y%m') for month in month_list: # Exclude the current dekad if month != today_mon: file_list = [] exp_days_last_dk = functions.day_per_dekad(month + '21') expected_days = int(exp_days_last_dk) + 20 for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydate_yyyymm = mydate_yyyymmdd[0:6] if mydate_yyyymm == month: file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_1moncum + os.path.sep + month + '01' + out_prod_ident_1moncum if len(file_list) >= expected_days - 3: yield (file_list, output_file) else: print('Too many missing filed for month {0}'.format(month))
def remove_old_files(productcode, subproductcode, version, mapsetcode, product_type, nmonths, logger=None): # Check logger if logger is None: logger = log.my_logger(__name__) # Get the existing dates for the dataset logger.info("Entering routine %s " % 'remove_old_files') # Check the installation type sysSettings = functions.getSystemSettings() if sysSettings['type_installation'] == 'Server': logger.info("File housekeeping not done on Server ") return prod_subdir = functions.set_path_sub_directory(productcode, subproductcode, product_type, version, mapsetcode) prod_dir = es_constants.es2globals[ 'processing_dir'] + os.path.sep + prod_subdir list_files = sorted(glob.glob(prod_dir + os.path.sep + '*.tif')) # Define the earliest date to be kept month_now = datetime.date.today().month year_now = datetime.date.today().year for my_file in list_files: # Extract the date date = functions.get_date_from_path_filename(os.path.basename(my_file)) date_yyyy = int(date[0:4]) date_month = int(date[4:6]) if date_yyyy < year_now or (date_month + nmonths) <= month_now: logger.debug("Deleting file %s " % my_file) os.remove(my_file)
def generate_parameters_8days(): years_periods_list = [] # Look for all input files in input_dir input_files = glob.glob(starting_files) # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydate_year = str(mydate)[0:4] period_nbr = functions.conv_date_2_8days(mydate_yyyymmdd) if (mydate_year, period_nbr) not in years_periods_list: years_periods_list.append((mydate_year, period_nbr)) periods_sorted = sorted(years_periods_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') year_now = today.strftime('%Y') period_now = functions.conv_date_2_8days(today_str) # Generate the list of 30 min time in a day for year, period in periods_sorted: # Exclude the current dekad if period != period_now or year != year_now: file_list = [] jdoy_period = "{0:03d}".format(1 + 8 * (int(period) - 1)) mmdd_period = functions.conv_date_yyyydoy_2_yyyymmdd( year + jdoy_period) output_file = es_constants.processing_dir + output_subdir_8day + os.path.sep + mmdd_period + out_prod_ident_8day for myfile in input_files: basename = os.path.basename(myfile) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydate_year = mydate_yyyymmdd[0:4] period_nbr = functions.conv_date_2_8days( mydate_yyyymmdd[0:8]) if period_nbr == period and mydate_year == year: file_list.append(myfile) # Special case of last period of the year: add few days of next year if period == 46: next_year = "{0:04d}".format(int(year) + 1) if calendar.isleap(int(year)): add_days = ('0101', '0102', '0103') else: add_days = ('0101', '0102', '0103', '0104') for day in add_days: date = next_year + day matches = [ x for x in input_files if fnmatch.fnmatch(x, '*{0}*'.format(date)) ] # Fixes ES2-35 (see YouTrack) if len(matches) > 0: file_list.append(matches[0]) yield (sorted(file_list), output_file)
def convert_driver(output_dir=None): # Definitions input_dir = es_constants.es2globals['processing_dir'] # Instance metadata object sds_meta = metadata.SdsMetadata() # Check base output dir if output_dir is None: output_dir = es_constants.es2globals['spirits_output_dir'] functions.check_output_dir(output_dir) # Read the spirits table and convert all existing files spirits_list = querydb.get_spirits() for entry in spirits_list: use_range = False product_code = entry['productcode'] sub_product_code = entry['subproductcode'] version = entry['version'] out_data_type = entry['out_data_type'] out_scale_factor = entry['out_scale_factor'] out_offset = entry['out_offset'] out_nodata = entry['data_ignore_value'] # Prepare the naming dict naming_spirits = { 'sensor_filename_prefix':entry['sensor_filename_prefix'], \ 'frequency_filename_prefix':entry['frequency_filename_prefix'], \ 'pa_filename_prefix':entry['product_anomaly_filename_prefix']} metadata_spirits= {'values': entry['prod_values'], 'flags': entry['flags'], \ 'data_ignore_value':entry['data_ignore_value'], \ 'days': entry['days'], \ 'sensor_type':entry['sensor_type'], \ 'comment':entry['comment'], \ 'date':''} # Manage mapsets: if defined use it, else read the existing ones from filesystem my_mapsets = [] if entry['mapsetcode']: my_mapsets.append(entry['mapsetcode']) else: prod = Product(product_code, version=version) for mp in prod.mapsets: my_mapsets.append(mp) # Manage dates if entry['start_date']: from_date = datetime.datetime.strptime(str(entry['start_date']), '%Y%m%d').date() use_range = True else: from_date = None if entry['end_date']: to_date = datetime.datetime.strptime(str(entry['end_date']), '%Y%m%d').date() use_range = True else: to_date = None for my_mapset in my_mapsets: # Manage output dirs out_sub_dir = my_mapset+os.path.sep+\ product_code+os.path.sep+\ entry['product_anomaly_filename_prefix']+\ entry['frequency_filename_prefix']+\ str(entry['days'])+os.path.sep logger.info('Working on [%s]/[%s]/[%s]/[%s]' % (product_code, version, my_mapset, sub_product_code)) ds = Dataset(product_code, sub_product_code, my_mapset, version=version, from_date=from_date, to_date=to_date) product_info = ds._db_product in_scale_factor = product_info.scale_factor in_offset = product_info.scale_offset in_nodata = product_info.nodata mask_min = product_info.mask_min mask_max = product_info.mask_max productcode = product_info.productcode subproductcode = product_info.subproductcode if productcode == 'vgt-ndvi' and subproductcode == 'ndv': mask_min = 0 if use_range: available_files = ds.get_filenames_range() else: available_files = ds.get_filenames() # Convert input products if len(available_files) > 0: for input_file in available_files: # Check it is a .tif file (not .missing) path, ext = os.path.splitext(input_file) if ext == '.tif': functions.check_output_dir(output_dir + out_sub_dir) str_date = functions.get_date_from_path_filename( os.path.basename(input_file)) # Check input file exists if os.path.isfile(input_file): if len(naming_spirits['frequency_filename_prefix'] ) > 1: my_str_date = naming_spirits[ 'frequency_filename_prefix'][1:5] + str_date metadata_spirits['date'] = my_str_date else: metadata_spirits['date'] = str_date #Read metadata from the file and differentiate chirps prelim and final data sds_meta.read_from_file(input_file) input_file_name = sds_meta.get_item( 'eStation2_input_files') if productcode == 'chirps-dekad' and input_file_name.endswith( ".tif;"): metadata_spirits[ 'comment'] = 'Prelim ' + entry['comment'] elif productcode == 'chirps-dekad' and input_file_name.endswith( ".gz;"): metadata_spirits[ 'comment'] = 'Final ' + entry['comment'] # Check output file exists #convert_geotiff_file(input_file, output_dir+out_sub_dir, str_date, naming_spirits, metadata_spirits) convert_geotiff_datatype_rescaled( input_file, output_dir + out_sub_dir, str_date, naming_spirits, metadata_spirits, in_scale_factor, in_offset, in_nodata, out_scale_factor, out_offset, out_nodata, out_data_type, mask_min, mask_max) else: logger.debug('Input file does not exist: %s' % input_file)
def generate_parameters_3davg(): # Look for all input files in input_dir, and sort them if starting_dates is not None: input_files = [] for my_date in starting_dates: input_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident input_files = glob.glob(starting_files) logger.debug("starting_files %s" % input_files) day_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] if mydate_yyyymmdd not in day_list: day_list.append(mydate_yyyymmdd) day_list = sorted(day_list) # Compute the 'julian' dakad for the current day today = datetime.today() yesterday = today - timedelta(1) today_str = today.strftime('%Y%m%d') yesterday_str = yesterday.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for myday in day_list: # Exclude the current day and yesterday #if myday != today_str or myday != yesterday_str: #some_list = ['abc-123', 'def-456', 'ghi-789', 'abc-456'] input_file = [s for s in input_files if myday in s] file_list = [] #for input_file in input_files: #for i, input_file in enumerate(input_files, 1): basename = os.path.basename(input_file[0]) # Date is in format YYYYMMDD mydate_yyyymmdd = functions.get_date_from_path_filename(basename) #if mydate_yyyymmdd != day_list[i]: yyyy = int(mydate_yyyymmdd[0:4]) mm = int(mydate_yyyymmdd[4:6]) dd = int(mydate_yyyymmdd[6:8]) day2 = datetime(yyyy, mm, dd) + timedelta(1) day2_filepath = input_dir + day2.strftime('%Y%m%d') + in_prod_ident if not functions.is_file_exists_in_path(day2_filepath): continue day3 = datetime(yyyy, mm, dd) + timedelta(2) day3_filepath = input_dir + day3.strftime('%Y%m%d') + in_prod_ident if not functions.is_file_exists_in_path(day3_filepath): continue file_list.append(input_file[0]) file_list.append(day2_filepath) file_list.append(day3_filepath) output_file = es_constants.processing_dir + subdir_3davg + os.path.sep + mydate_yyyymmdd + prod_ident_3davg file_list = sorted(file_list) # Check here the number of missing files (for optimization) if len(file_list) == 3: yield (file_list, output_file)
def test_get_date_from_path_filename(self): my_date = functions.get_date_from_path_filename(self.filename) self.assertEqual(my_date, self.str_date)