def build_list_dates_generic(from_date, to_date, frequency_id): # Add a check on frequency try: frequency = datasets.Dataset.get_frequency( frequency_id, datasets.Frequency.DATEFORMAT.DATETIME) except Exception as inst: logger.debug("Error in datasets.Dataset.get_frequency: %s" % inst.args[0]) raise # Manage the start_date (mandatory). try: # If it is a date, convert to datetime if functions.is_date_yyyymmdd(str(from_date), silent=True): datetime_start = datetime.datetime.strptime( str(from_date), '%Y%m%d') else: # If it is a negative number, subtract from current date if isinstance(from_date, int) or isinstance(from_date, int): if from_date < 0: datetime_start = datetime.datetime.today( ) - datetime.timedelta(days=-from_date) else: logger.debug("Error in Start Date: must be YYYYMMDD or -Ndays") raise Exception("Start Date not valid") except: raise Exception("Start Date not valid") # Manage the end_date (mandatory). try: if functions.is_date_yyyymmdd(str(to_date), silent=True): datetime_end = datetime.datetime.strptime(str(to_date), '%Y%m%d') # If it is a negative number, subtract from current date elif isinstance(to_date, int) or isinstance(to_date, int): if to_date < 0: datetime_end = datetime.datetime.today() - datetime.timedelta( days=-to_date) elif to_date > 0: datetime_end = datetime.datetime.today() + datetime.timedelta( days=to_date) else: datetime_end = datetime.datetime.today() except: pass try: dates = frequency.get_dates(datetime_start, datetime_end) except Exception as inst: logger.debug("Error in frequency.get_dates: %s" % inst.args[0]) raise return dates
def syncGeoserver(): # # Copy some 'relevant' datasets to GeoServer # Selection of datasets is done on the basis of the product.geoserver table # # Get list of all 'relevant' subproducts (see 2. above) list_active_geoserver = esTools.get_activated_geoserver() # Loop over existing sub_products for geoserver_sprod in list_active_geoserver: # Extract local variable: my_prod = geoserver_sprod.productcode my_subprod = geoserver_sprod.subproductcode my_version = geoserver_sprod.version start_date = geoserver_sprod.startdate end_date = geoserver_sprod.enddate logger.info("Working on Product/Subproduct/Version: {0}/{1}/{2}".format(my_prod, my_subprod, my_version)) # Manage dates from bigint to datetime if functions.is_date_yyyymmdd(str(start_date), silent=True): date_start = datetime.datetime.strptime(str(start_date), '%Y%m%d').date() else: date_start = None if functions.is_date_yyyymmdd(str(end_date), silent=True): date_end = datetime.datetime.strptime(str(end_date), '%Y%m%d').date() else: date_end = None # Get additional products info product_info = querydb.get_product_out_info(productcode=my_prod, subproductcode=my_subprod, version=my_version) # my_mapset = subprod.mapsetcode my_type = product_info[0].product_type my_category = product_info[0].category_id # Create a Product object (to get mapsets) my_product = products.Product(my_prod, version=my_version) my_mapsets = my_product.mapsets if len(my_mapsets) > 1: logger.info('More than 1 mapset exists. Take the first') if len(my_mapsets) == 0: logger.warning('No any mapset exists. Skip.') continue my_mapset = my_mapsets[0] # Create a Dataset object (to get file list) # If data_start is not set (e.g. for 10davg prod) create w/o dates if date_start: my_dataset = datasets.Dataset(my_prod, my_subprod, my_mapset, version=my_version, from_date=date_start, to_date=date_end) if my_dataset._frequency.dateformat == 'MMDD': logger.warning('Product of type MMDD: date specification not supported. Skip.') continue file_list = my_dataset.get_filenames_range() else: my_dataset = datasets.Dataset(my_prod, my_subprod, my_mapset, version=my_version) file_list = my_dataset.get_filenames() # Check that there is at least 1 file if len(file_list) > 0: # Check the Workspace exists, or create it my_workspace = esTools.setWorkspaceName(my_category, my_prod, my_subprod, my_version, my_mapset, nameType=geoserverREST.geoserverWorkspaceName) if not geoserverREST.isWorkspace(my_workspace): geoserverREST.createWorkspace(my_workspace) # Loop over files and upload for my_file in file_list: my_date = functions.get_date_from_path_full(my_file) # if subprod in list_active_subprods: logger.debug("Working on Product/Subproduct/Version/Mapset/Date: {0}/{1}/{2}/{3}/{4}".format( my_prod, my_subprod, my_version, my_mapset, my_date)) # Upload the file and register esTools.uploadAndRegisterRaster(my_category, my_prod, my_subprod, my_version, my_mapset, my_date, my_type, local_data_dir)
def loop_get_internet(dry_run=False): global processed_list_filename, processed_list global processed_info_filename, processed_info signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGILL, signal_handler) logger.info("Starting retrieving data from INTERNET.") while True: output_dir = es_constants.ingest_dir logger.debug("Check if the Ingest Server input directory : %s exists.", output_dir) if not os.path.exists(output_dir): logger.fatal("The Ingest Server input directory : %s doesn't exists.", output_dir) exit(1) if not os.path.exists(es_constants.processed_list_int_dir): os.mkdir(es_constants.processed_list_int_dir) while 1: try: time_sleep = user_def_sleep logger.debug("Sleep time set to : %s.", time_sleep) except: logger.warning("Sleep time not defined. Setting to default=1min. Continue.") time_sleep = 60 logger.debug("Reading active INTERNET data sources from database") internet_sources_list = querydb.get_active_internet_sources(echo=echo_query) # Loop over active triggers try: for internet_source in internet_sources_list: logger.debug("Processing internet source %s.", internet_source.descriptive_name) processed_list_filename = es_constants.get_internet_processed_list_prefix+str(internet_source.internet_id)+'.list' processed_info_filename = es_constants.get_internet_processed_list_prefix+str(internet_source.internet_id)+'.info' # Create objects for list and info processed_list = [] processed_info = {'length_proc_list': 0, 'time_latest_exec': datetime.datetime.now(), 'time_latest_copy': datetime.datetime.now()} # Restore/Create List processed_list=functions.restore_obj_from_pickle(processed_list, processed_list_filename) # Restore/Create Info processed_info=functions.restore_obj_from_pickle(processed_info, processed_info_filename) # Update processing time (in case it is restored) processed_info['time_latest_exec']=datetime.datetime.now() logger.debug("Create current list of file to process for source %s.", internet_source.internet_id) if internet_source.user_name is None: user_name = "anonymous" else: user_name = internet_source.user_name if internet_source.password is None: password = "******" else: password = internet_source.password usr_pwd = str(user_name)+':'+str(password) logger.debug(" Url is %s.", internet_source.url) logger.debug(" usr/pwd is %s.", usr_pwd) logger.debug(" regex is %s.", internet_source.include_files_expression) internet_type = internet_source.type if internet_type == 'ftp': # Note that the following list might contain sub-dirs (it reflects full_regex) current_list = get_list_matching_files_dir_ftp(str(internet_source.url), str(usr_pwd), str(internet_source.include_files_expression)) elif internet_type == 'http_tmpl': # Manage the dates:start_date is mandatory .. end_date replaced by 'today' if missing/wrong try: if functions.is_date_yyyymmdd(str(internet_source.start_date), silent=True): datetime_start=datetime.datetime.strptime(str(internet_source.start_date),'%Y%m%d') else: raise Exception("Start Date not valid") except: raise Exception("Start Date not valid") try: if functions.is_date_yyyymmdd(str(internet_source.end_date), silent=True): datetime_end=datetime.datetime.strptime(str(internet_source.end_date),'%Y%m%d') else: datetime_end=datetime.datetime.today() except: pass # Create the full filename from a 'template' which contains try: current_list = build_list_matching_for_http(str(internet_source.url), str(internet_source.include_files_expression), datetime_start, datetime_end, str(internet_source.frequency_id)) except: logger.error("Error in creating date lists. Continue") logger.debug("Number of files currently available for source %s is %i", internet_source.internet_id, len(current_list)) if len(current_list) > 0: logger.debug("Number of files already copied for trigger %s is %i", internet_source.internet_id, len(processed_list)) listtoprocess = [] for current_file in current_list: if len(processed_list) == 0: listtoprocess.append(current_file) else: #if os.path.basename(current_file) not in processed_list: -> save in .list subdirs as well !! if current_file not in processed_list: listtoprocess.append(current_file) logger.debug("Number of files to be copied for trigger %s is %i", internet_source.internet_id, len(listtoprocess)) if listtoprocess != set([]): logger.debug("Loop on the found files.") if not dry_run: for filename in list(listtoprocess): logger.debug("Processing file: "+str(internet_source.url)+os.path.sep+filename) try: result = get_file_from_url(str(internet_source.url)+os.path.sep+filename, target_file=os.path.basename(filename), target_dir=es_constants.ingest_dir, userpwd=str(usr_pwd)) if not result: logger.info("File %s copied.", filename) processed_list.append(filename) except: logger.warning("Problem while copying file: %s.", filename) else: logger.info('Dry_run is set: do not get files') if not dry_run: functions.dump_obj_to_pickle(processed_list, processed_list_filename) functions.dump_obj_to_pickle(processed_info, processed_info_filename) sleep(float(user_def_sleep)) # Loop over sources except Exception as inst: logger.error("Error while processing source %s. Continue" % internet_source.descriptive_name) sleep(float(user_def_sleep)) exit(0)
def define_output_data_format(datasource_descr, in_date, out_date_format): # Convert the in_date format into a convenient one for DB and file naming # (i.e YYYYMMDD or YYYYMMDDHHMM) # Initialize to error value output_date_str = -1 if datasource_descr.date_format == 'YYYYMMDD': if functions.is_date_yyyymmdd(in_date): output_date_str = in_date else: output_date_str = -1 if datasource_descr.date_format == 'YYYYMMDDHHMM': if functions.is_date_yyyymmddhhmm(in_date): output_date_str = in_date else: output_date_str = -1 if datasource_descr.date_format == 'YYYYDOY_YYYYDOY': output_date_str = functions.conv_date_yyyydoy_2_yyyymmdd( str(in_date)[0:7]) if datasource_descr.date_format == 'YYYYMMDD_YYYYMMDD': output_date_str = str(in_date)[0:8] if not functions.is_date_yyyymmdd(output_date_str): output_date_str = -1 if datasource_descr.date_format == 'YYYYDOY': output_date_str = functions.conv_date_yyyydoy_2_yyyymmdd(in_date) if datasource_descr.date_format == 'YYYY_MM_DKX': output_date_str = functions.conv_yyyy_mm_dkx_2_yyyymmdd(in_date) if datasource_descr.date_format == 'YYMMK': output_date_str = functions.conv_yymmk_2_yyyymmdd(in_date) if datasource_descr.date_format == 'YYYYdMMdK': output_date_str = functions.conv_yyyydmmdk_2_yyyymmdd(in_date) if datasource_descr.date_format == 'YYYYMMDD_G2': # The date (e.g. 20151103) is converted to the dekad it belongs to (e.g. 20151101) output_date_str = functions.conv_yyyymmdd_g2_2_yyyymmdd(in_date) if datasource_descr.date_format == 'MMDD': output_date_str = str(in_date) if datasource_descr.date_format == 'YYYYMM': # Convert from YYYYMM -> YYYYMMDD output_date_str = str(in_date) + '01' if datasource_descr.date_format == 'YYYY_DK': # The date (e.g. 2020_36) is converted to the dekad it belongs to (e.g. 20201221) output_date_str = functions.conv_yyyydk_2_yyyymmdd(in_date) if output_date_str == -1: out_date_str_final = in_date + '_DATE_ERROR_' else: if out_date_format == 'YYYYMMDDHHMM': if functions.is_date_yyyymmddhhmm(output_date_str): out_date_str_final = output_date_str elif functions.is_date_yyyymmdd(output_date_str): out_date_str_final = output_date_str + '0000' elif out_date_format == 'YYYYMMDD': if functions.is_date_yyyymmdd(output_date_str, silent=True): out_date_str_final = output_date_str elif functions.is_date_yyyymmddhhmm(output_date_str): out_date_str_final = output_date_str[0:8] elif out_date_format == 'MMDD': if functions.is_date_mmdd(output_date_str): out_date_str_final = output_date_str return out_date_str_final
def build_list_matching_files_motu(base_url, template, from_date, to_date, frequency_id, username, password, files_filter_expression): # Add a check on frequency try: frequency = datasets.Dataset.get_frequency( frequency_id, datasets.Frequency.DATEFORMAT.DATETIME) except Exception as inst: logger.debug("Error in datasets.Dataset.get_frequency: %s" % inst.args[0]) raise # Manage the start_date (mandatory). try: # If it is a date, convert to datetime if functions.is_date_yyyymmdd(str(from_date), silent=True): datetime_start = datetime.datetime.strptime( str(from_date), '%Y%m%d') else: # If it is a negative number, subtract from current date if isinstance(from_date, int) or isinstance(from_date, long): if from_date < 0: datetime_start = datetime.datetime.today( ) - datetime.timedelta(days=-from_date) else: logger.debug("Error in Start Date: must be YYYYMMDD or -Ndays") raise Exception("Start Date not valid") except: raise Exception("Start Date not valid") # Manage the end_date (mandatory). try: if functions.is_date_yyyymmdd(str(to_date), silent=True): datetime_end = datetime.datetime.strptime(str(to_date), '%Y%m%d') # If it is a negative number, subtract from current date elif isinstance(to_date, int) or isinstance(to_date, long): if to_date < 0: datetime_end = datetime.datetime.today() - datetime.timedelta( days=-to_date) elif to_date > 0: datetime_end = datetime.datetime.today() + datetime.timedelta( days=to_date) else: datetime_end = datetime.datetime.today() except: pass try: dates = frequency.get_dates(datetime_start, datetime_end) except Exception as inst: logger.debug("Error in frequency.get_dates: %s" % inst.args[0]) raise try: list_filenames = motu_api.motu_4_dates(dates, template, base_url, username, password, files_filter_expression) #list_filenames = frequency.get_internet_dates(dates, template) except Exception as inst: logger.debug("Error in motu_api.motu_getlists: %s" % inst.args[0]) raise return list_filenames
def build_list_matching_files_tmpl(base_url, template, from_date, to_date, frequency_id): # Add a check on frequency try: frequency = datasets.Dataset.get_frequency( frequency_id, datasets.Frequency.DATEFORMAT.DATETIME) except Exception as inst: logger.debug("Error in datasets.Dataset.get_frequency: %s" % inst.args[0]) raise # Manage the start_date (mandatory). try: # If it is a date, convert to datetime if functions.is_date_yyyymmdd(str(from_date), silent=True): datetime_start = datetime.datetime.strptime( str(from_date), '%Y%m%d') else: # If it is a negative number, subtract from current date if isinstance(from_date, int) or isinstance(from_date, long): if from_date < 0: datetime_start = datetime.datetime.today( ) - datetime.timedelta(days=-from_date) else: logger.debug("Error in Start Date: must be YYYYMMDD or -Ndays") raise Exception("Start Date not valid") except: raise Exception("Start Date not valid") # Manage the end_date (mandatory). try: if functions.is_date_yyyymmdd(str(to_date), silent=True): datetime_end = datetime.datetime.strptime(str(to_date), '%Y%m%d') # If it is a negative number, subtract from current date elif isinstance(to_date, int) or isinstance(to_date, long): if to_date < 0: datetime_end = datetime.datetime.today() - datetime.timedelta( days=-to_date) else: datetime_end = datetime.datetime.today() except: pass try: dates = frequency.get_dates(datetime_start, datetime_end) except Exception as inst: logger.debug("Error in frequency.get_dates: %s" % inst.args[0]) raise try: if sys.platform == 'win32': template.replace("-", "#") list_filenames = frequency.get_internet_dates(dates, template) except Exception as inst: logger.debug("Error in frequency.get_internet_dates: %s" % inst.args[0]) raise return list_filenames ###################################################################################### # build_list_matching_files_sentinel_sat # Purpose: return the list of file names matching a 'template' with 'date' placeholders # It is the entry point for the 'http_templ' source type # Author: Vijay Charan Venkatachalam, JRC, European Commission # Date: 2015/02/18 # Inputs: template: regex including subdirs (e.g. 'Collection51/TIFF/Win1[01]/201[1-3]/MCD45monthly.A20.*burndate.tif.gz' # from_date: start date for the dataset (datetime.datetime object) # to_date: end date for the dataset (datetime.datetime object) # frequency: dataset 'frequency' (see DB 'frequency' table) # # def build_list_matching_files_sentinel_sat(base_url, template, from_date, to_date, frequency_id, username, password): # # # Add a check on frequency # try: # frequency = datasets.Dataset.get_frequency(frequency_id, datasets.Frequency.DATEFORMAT.DATETIME) # except Exception as inst: # logger.debug("Error in datasets.Dataset.get_frequency: %s" %inst.args[0]) # raise # # # Manage the start_date (mandatory). # try: # # If it is a date, convert to datetime # if functions.is_date_yyyymmdd(str(from_date), silent=True): # datetime_start=datetime.datetime.strptime(str(from_date),'%Y%m%d') # else: # # If it is a negative number, subtract from current date # if isinstance(from_date,int) or isinstance(from_date,long): # if from_date < 0: # datetime_start=datetime.datetime.today() - datetime.timedelta(days=-from_date) # else: # logger.debug("Error in Start Date: must be YYYYMMDD or -Ndays") # raise Exception("Start Date not valid") # except: # raise Exception("Start Date not valid") # # # Manage the end_date (mandatory). # try: # if functions.is_date_yyyymmdd(str(to_date), silent=True): # datetime_end=datetime.datetime.strptime(str(to_date),'%Y%m%d') # # If it is a negative number, subtract from current date # elif isinstance(to_date,int) or isinstance(to_date,long): # if to_date < 0: # datetime_end=datetime.datetime.today() - datetime.timedelta(days=-to_date) # else: # datetime_end=datetime.datetime.today() # except: # pass # # try: # list_filenames = sentinelsat_api.sentinelsat_getlists(base_url, template, datetime_start, datetime_end)#frequency.get_dates(datetime_start, datetime_end) # except Exception as inst: # logger.debug("Error in sentinelsat.get_lists: %s" %inst.args[0]) # raise # try: # dates = frequency.get_dates(datetime_start, datetime_end) # except Exception as inst: # logger.debug("Error in frequency.get_dates: %s" %inst.args[0]) # raise # # try: # list_filenames = frequency.get_internet_dates(dates, template) # except Exception as inst: # logger.debug("Error in frequency.get_internet_dates: %s" %inst.args[0]) # raise return list_filenames
def test_is_date_time(self): self.assertTrue(f.is_date_yyyymmdd(self.string_yyyymmdd)) self.assertTrue(f.is_date_mmdd(self.string_mmdd)) self.assertTrue(f.is_date_yyyymmddhhmm(self.string_yyyymmddhhmm)) self.assertTrue(f.is_date_yyyydoy(self.string_yyyydoy))
def build_list_matching_files_cds(base_url, template, from_date, to_date, frequency_id, resourcename_uuid): # Add a check on frequency try: frequency = datasets.Dataset.get_frequency( frequency_id, datasets.Frequency.DATEFORMAT.DATETIME) except Exception as inst: logger.debug("Error in datasets.Dataset.get_frequency: %s" % inst.args[0]) raise # Manage the start_date (mandatory). try: # If it is a date, convert to datetime if functions.is_date_yyyymmdd(str(from_date), silent=True): datetime_start = datetime.datetime.strptime( str(from_date), '%Y%m%d') else: # If it is a negative number, subtract from current date if isinstance(from_date, int) or isinstance(from_date, int): if from_date < 0: datetime_start = datetime.datetime.today( ) - datetime.timedelta(days=-from_date) else: logger.debug("Error in Start Date: must be YYYYMMDD or -Ndays") raise Exception("Start Date not valid") except: raise Exception("Start Date not valid") # Manage the end_date (mandatory). try: if functions.is_date_yyyymmdd(str(to_date), silent=True): datetime_end = datetime.datetime.strptime(str(to_date), '%Y%m%d') # If it is a negative number, subtract from current date elif isinstance(to_date, int) or isinstance(to_date, int): if to_date < 0: datetime_end = datetime.datetime.today() - datetime.timedelta( days=-to_date) else: datetime_end = datetime.datetime.today() except: pass try: dates = frequency.get_dates(datetime_start, datetime_end) except Exception as inst: logger.debug("Error in frequency.get_dates: %s" % inst.args[0]) raise try: if sys.platform == 'win32': template.replace("-", "#") # return lst list_input_files = cds_api.create_list_cds(dates, template, base_url, resourcename_uuid) except Exception as inst: logger.debug("Error in frequency.get_internet_dates: %s" % inst.args[0]) raise return list_input_files
def test_is_date_yyyymmdd(self): self.assertTrue(functions.is_date_yyyymmdd(self.string_yyyymmdd))