def test_save_status(self): # Define .pck filename pickle_filename=es2system.system_status_filename() # Run the method to save status result = es2system.save_status_local_machine() # Read the .pck object=None machine_status=functions.restore_obj_from_pickle(object, pickle_filename) # Checks print('Check psql ON') self.assertEquals(machine_status['postgresql_status'], True)
def loop_get_internet(dry_run=False): global processed_list_filename, processed_list global processed_info_filename, processed_info signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGILL, signal_handler) logger.info("Starting retrieving data from INTERNET.") while True: output_dir = es_constants.ingest_dir logger.debug("Check if the Ingest Server input directory : %s exists.", output_dir) if not os.path.exists(output_dir): logger.fatal("The Ingest Server input directory : %s doesn't exists.", output_dir) exit(1) if not os.path.exists(es_constants.processed_list_int_dir): os.mkdir(es_constants.processed_list_int_dir) while 1: try: time_sleep = user_def_sleep logger.debug("Sleep time set to : %s.", time_sleep) except: logger.warning("Sleep time not defined. Setting to default=1min. Continue.") time_sleep = 60 logger.debug("Reading active INTERNET data sources from database") internet_sources_list = querydb.get_active_internet_sources(echo=echo_query) # Loop over active triggers try: for internet_source in internet_sources_list: logger.debug("Processing internet source %s.", internet_source.descriptive_name) processed_list_filename = es_constants.get_internet_processed_list_prefix+str(internet_source.internet_id)+'.list' processed_info_filename = es_constants.get_internet_processed_list_prefix+str(internet_source.internet_id)+'.info' # Create objects for list and info processed_list = [] processed_info = {'length_proc_list': 0, 'time_latest_exec': datetime.datetime.now(), 'time_latest_copy': datetime.datetime.now()} # Restore/Create List processed_list=functions.restore_obj_from_pickle(processed_list, processed_list_filename) # Restore/Create Info processed_info=functions.restore_obj_from_pickle(processed_info, processed_info_filename) # Update processing time (in case it is restored) processed_info['time_latest_exec']=datetime.datetime.now() logger.debug("Create current list of file to process for source %s.", internet_source.internet_id) if internet_source.user_name is None: user_name = "anonymous" else: user_name = internet_source.user_name if internet_source.password is None: password = "******" else: password = internet_source.password usr_pwd = str(user_name)+':'+str(password) logger.debug(" Url is %s.", internet_source.url) logger.debug(" usr/pwd is %s.", usr_pwd) logger.debug(" regex is %s.", internet_source.include_files_expression) internet_type = internet_source.type if internet_type == 'ftp': # Note that the following list might contain sub-dirs (it reflects full_regex) current_list = get_list_matching_files_dir_ftp(str(internet_source.url), str(usr_pwd), str(internet_source.include_files_expression)) elif internet_type == 'http_tmpl': # Manage the dates:start_date is mandatory .. end_date replaced by 'today' if missing/wrong try: if functions.is_date_yyyymmdd(str(internet_source.start_date), silent=True): datetime_start=datetime.datetime.strptime(str(internet_source.start_date),'%Y%m%d') else: raise Exception("Start Date not valid") except: raise Exception("Start Date not valid") try: if functions.is_date_yyyymmdd(str(internet_source.end_date), silent=True): datetime_end=datetime.datetime.strptime(str(internet_source.end_date),'%Y%m%d') else: datetime_end=datetime.datetime.today() except: pass # Create the full filename from a 'template' which contains try: current_list = build_list_matching_for_http(str(internet_source.url), str(internet_source.include_files_expression), datetime_start, datetime_end, str(internet_source.frequency_id)) except: logger.error("Error in creating date lists. Continue") logger.debug("Number of files currently available for source %s is %i", internet_source.internet_id, len(current_list)) if len(current_list) > 0: logger.debug("Number of files already copied for trigger %s is %i", internet_source.internet_id, len(processed_list)) listtoprocess = [] for current_file in current_list: if len(processed_list) == 0: listtoprocess.append(current_file) else: #if os.path.basename(current_file) not in processed_list: -> save in .list subdirs as well !! if current_file not in processed_list: listtoprocess.append(current_file) logger.debug("Number of files to be copied for trigger %s is %i", internet_source.internet_id, len(listtoprocess)) if listtoprocess != set([]): logger.debug("Loop on the found files.") if not dry_run: for filename in list(listtoprocess): logger.debug("Processing file: "+str(internet_source.url)+os.path.sep+filename) try: result = get_file_from_url(str(internet_source.url)+os.path.sep+filename, target_file=os.path.basename(filename), target_dir=es_constants.ingest_dir, userpwd=str(usr_pwd)) if not result: logger.info("File %s copied.", filename) processed_list.append(filename) except: logger.warning("Problem while copying file: %s.", filename) else: logger.info('Dry_run is set: do not get files') if not dry_run: functions.dump_obj_to_pickle(processed_list, processed_list_filename) functions.dump_obj_to_pickle(processed_info, processed_info_filename) sleep(float(user_def_sleep)) # Loop over sources except Exception as inst: logger.error("Error while processing source %s. Continue" % internet_source.descriptive_name) sleep(float(user_def_sleep)) exit(0)
def loop_get_internet(dry_run=False, test_one_source=False): global processed_list_filename, processed_list global processed_info_filename, processed_info signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGILL, signal_handler) logger.info("Starting retrieving data from INTERNET.") while True: output_dir = es_constants.get_internet_output_dir logger.debug("Check if the Ingest Server input directory : %s exists.", output_dir) if not os.path.exists(output_dir): # ToDo: create output_dir - ingest directory logger.fatal( "The Ingest Server input directory : %s doesn't exists.", output_dir) exit(1) if not os.path.exists(es_constants.processed_list_int_dir): os.mkdir(es_constants.processed_list_int_dir) while 1: # Check internet connection (or continue) if not functions.internet_on(): logger.error( "The computer is not currently connected to the internet. Wait 1 minute." ) time.sleep(60) else: try: time_sleep = user_def_sleep logger.debug("Sleep time set to : %s.", time_sleep) except: logger.warning( "Sleep time not defined. Setting to default=1min. Continue." ) time_sleep = 60 logger.info( "Reading active INTERNET data sources from database") internet_sources_list = querydb.get_active_internet_sources() # Loop over active triggers for internet_source in internet_sources_list: try: if test_one_source and (internet_source.internet_id != test_one_source): logger.info( "Running in test mode, and source is not %s. Continue.", test_one_source) continue execute_trigger = True # Get this from the pads database table (move from internet_source 'pull_frequency' to the pads table, # so that it can be exploited by eumetcast triggers as well). It is in minute pull_frequency = internet_source.pull_frequency # Manage the case of files to be continuously downloaded (delay < 0) if pull_frequency < 0: do_not_consider_processed_list = True delay_time_source_minutes = -pull_frequency else: do_not_consider_processed_list = False delay_time_source_minutes = pull_frequency if sys.platform == 'win32': internet_id = str( internet_source.internet_id).replace(':', '_') else: internet_id = str(internet_source.internet_id) logger_spec = log.my_logger('apps.get_internet.' + internet_id) logger.info("Processing internet source %s.", internet_source.descriptive_name) # Create objects for list and info processed_info_filename = es_constants.get_internet_processed_list_prefix + str( internet_id) + '.info' # Restore/Create Info processed_info = None processed_info = functions.restore_obj_from_pickle( processed_info, processed_info_filename) if processed_info is not None: # Check the delay current_delta = datetime.datetime.now( ) - processed_info['time_latest_exec'] current_delta_minutes = int(current_delta.seconds / 60) if current_delta_minutes < delay_time_source_minutes: logger.debug( "Still waiting up to %i minute - since latest execution.", delay_time_source_minutes) execute_trigger = False else: # Create processed_info object processed_info = { 'lenght_proc_list': 0, 'time_latest_exec': datetime.datetime.now(), 'time_latest_copy': datetime.datetime.now() } execute_trigger = True if execute_trigger: # Restore/Create List processed_list = [] if not do_not_consider_processed_list: processed_list_filename = es_constants.get_internet_processed_list_prefix + internet_id + '.list' processed_list = functions.restore_obj_from_pickle( processed_list, processed_list_filename) processed_info[ 'time_latest_exec'] = datetime.datetime.now() logger.debug( "Create current list of file to process for source %s.", internet_source.internet_id) if internet_source.user_name is None: user_name = "anonymous" else: user_name = internet_source.user_name if internet_source.password is None: password = "******" else: password = internet_source.password usr_pwd = str(user_name) + ':' + str(password) logger_spec.debug(" Url is %s.", internet_source.url) logger_spec.debug(" usr/pwd is %s.", usr_pwd) logger_spec.debug( " regex is %s.", internet_source.include_files_expression) internet_type = internet_source.type if internet_type == 'ftp' or internet_type == 'http': # Manage the end_date (added for MODIS_FIRMS) if (internet_source.end_date != ''): end_date = internet_source.end_date else: end_date = None # Note that the following list might contain sub-dirs (it reflects full_regex) try: current_list = get_list_matching_files( str(internet_source.url), str(usr_pwd), str(internet_source. include_files_expression), internet_type, end_date=end_date) except: logger.error( "Error in creating file lists. Continue" ) continue elif internet_type == 'http_tmpl': # Create the full filename from a 'template' which contains try: current_list = build_list_matching_files_tmpl( str(internet_source.url), str(internet_source. include_files_expression), internet_source.start_date, internet_source.end_date, str(internet_source.frequency_id)) except: logger.error( "Error in creating date lists. Continue" ) continue elif internet_type == 'motu_client': # Create the full filename from a 'template' which contains try: current_list = build_list_matching_files_motu( str(internet_source.url), str(internet_source. include_files_expression), internet_source.start_date, internet_source.end_date, str(internet_source.frequency_id), str(internet_source.user_name), str(internet_source.password), str(internet_source. files_filter_expression), ) except: logger.error( "Error in creating motu_client lists. Continue" ) continue # elif internet_type == 'sentinel_sat': # # Create the full filename from a 'template' which contains # try: # current_list = build_list_matching_files_sentinel_sat(str(internet_source.url), # str(internet_source.include_files_expression), # internet_source.start_date, # internet_source.end_date, # str(internet_source.frequency_id), # str(internet_source.user_name), # str(internet_source.password), # #str(internet_source.files_filter_expression), # ) # # except: # logger.error("Error in creating sentinel_sat lists. Continue") # continue elif internet_type == 'local': logger.info( "This internet source is meant to copy data on local filesystem" ) try: current_list = get_list_matching_files_dir_local( str(internet_source.url), str(internet_source. include_files_expression)) except: logger.error( "Error in creating date lists. Continue" ) continue elif internet_type == 'offline': logger.info( "This internet source is meant to work offline (GoogleDrive)" ) current_list = [] else: logger.error( "No correct type for this internet source type: %s" % internet_type) current_list = [] logger_spec.debug( "Number of files currently available for source %s is %i", internet_id, len(current_list)) if len(current_list) > 0: logger_spec.debug( "Number of files already copied for trigger %s is %i", internet_id, len(processed_list)) listtoprocess = [] for current_file in current_list: if len(processed_list) == 0: listtoprocess.append(current_file) else: #if os.path.basename(current_file) not in processed_list: -> save in .list subdirs as well !! if current_file not in processed_list: listtoprocess.append(current_file) logger_spec.debug( "Number of files to be copied for trigger %s is %i", internet_id, len(listtoprocess)) if listtoprocess != set([]): # # Debug # toprint='' # for elem in listtoprocess: # toprint+=elem+',' # logger_spec.info('List in get_list_matching_files: %s' % toprint) logger_spec.debug( "Loop on the found files.") if not dry_run: for filename in list(listtoprocess): logger_spec.debug( "Processing file: " + str(internet_source.url) + os.path.sep + filename) try: if internet_type == 'local': shutil.copyfile( str(internet_source[ 'url']) + os.path.sep + filename, es_constants.ingest_dir + os.path.basename( filename)) result = 0 elif internet_type == 'motu_client': result = get_file_from_motu_command( str(filename), #target_file=internet_source.files_filter_expression, target_dir=es_constants .ingest_dir, userpwd=str(usr_pwd)) # elif internet_type == 'sentinel_sat': # result = get_file_from_sentinelsat_url(str(filename), # target_dir=es_constants.ingest_dir) else: result = get_file_from_url( str(internet_source.url ) + os.path.sep + filename, target_file=os.path. basename(filename), target_dir=es_constants .ingest_dir, userpwd=str(usr_pwd)) if not result: logger_spec.info( "File %s copied.", filename) processed_list.append( filename) else: logger_spec.warning( "File %s not copied: ", filename) except: logger_spec.warning( "Problem while copying file: %s.", filename) else: logger_spec.info( 'Dry_run is set: do not get files') if not dry_run: functions.dump_obj_to_pickle( processed_list, processed_list_filename) functions.dump_obj_to_pickle( processed_info, processed_info_filename) sleep(float(user_def_sleep)) # Loop over sources except Exception as inst: logger.error( "Error while processing source %s. Continue" % internet_source.descriptive_name) sleep(float(user_def_sleep)) exit(0)
def get_archives_eumetcast_ftp(): # Ad-hoc definitions (to be copied to settings file) source_id = 'MESA:JRC:Archives' filter_expression_mesa_jrc = 'MESA_JRC_.*.tif' # Get Access credentials ftp_eumetcast_url = es_constants.es2globals['ftp_eumetcast_url'] ftp_eumetcast_userpwd = es_constants.es2globals['ftp_eumetcast_userpwd'] # Define a file_handler logger 'source-specific' (for GUI) logger_spec = log.my_logger('apps.get_archives_eumetcast') logger.info("Retrieving MESA_JRC files from PC1.") if sys.platform == 'win32': source_id = source_id.replace(':', '_') #Pierluigi processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str( source_id) + '.list' processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str( source_id) + '.info' # Create objects for list and info processed_list = [] processed_info = { 'length_proc_list': 0, 'time_latest_exec': datetime.datetime.now(), 'time_latest_copy': datetime.datetime.now() } logger.debug("Loading the processed file list for source %s ", source_id) # Restore/Create List processed_list = functions.restore_obj_from_pickle( processed_list, processed_list_filename) # Restore/Create Info processed_info = functions.restore_obj_from_pickle( processed_info, processed_info_filename) # Update processing time (in case it is restored) processed_info['time_latest_exec'] = datetime.datetime.now() logger.debug("Create current list of file to process for trigger %s.", source_id) try: current_list = get_list_matching_files(ftp_eumetcast_url, ftp_eumetcast_userpwd, filter_expression_mesa_jrc, 'ftp', my_logger=logger_spec) except: logger.error("Cannot connect to the PC1 via ftp. Wait 1 minute") current_list = [] time.sleep(60) logger_spec.info("Number of files currently on PC1 for trigger %s is %i", source_id, len(current_list)) if len(current_list) > 0: #logger.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list)) logger_spec.debug( "Number of files already copied for trigger %s is %i", source_id, len(processed_list)) listtoprocess = [] listtoprocess = set(current_list) - set(processed_list) #logger.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess)) logger_spec.info("Number of files to be copied for trigger %s is %i", source_id, len(listtoprocess)) if listtoprocess != set([]): logger_spec.debug("Loop on the found files.") for filename in list(listtoprocess): try: result = get_file_from_url( str(ftp_eumetcast_url) + os.path.sep + filename, target_file=os.path.basename(filename), target_dir=es_constants.ingest_dir, userpwd=str(ftp_eumetcast_userpwd)) if not result: logger_spec.info("File %s copied.", filename) processed_list.append(filename) else: logger_spec.warning("File %s not copied: ", filename) except: logger_spec.warning("Problem while copying file: %s.", filename) else: logger.debug("Nothing to process - go to next trigger.") pass for infile in processed_list: if not infile in current_list: processed_list.remove(infile) functions.dump_obj_to_pickle(processed_list, processed_list_filename) functions.dump_obj_to_pickle(processed_info, processed_info_filename)
def loop_eumetcast(dry_run=False): global processed_list_filename, processed_list global processed_info_filename, processed_info signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGILL, signal_handler) logger.info("Starting retrieving EUMETCast data.") while True: logger.debug("Check if the EUMETCast input directory : %s exists.", input_dir) if not os.path.exists(input_dir): logger.error( "The EUMETCast input directory : %s is not yet mounted.", input_dir) logger.debug("Check if the Ingest Server input directory : %s exists.", output_dir) if not os.path.exists(output_dir): logger.fatal( "The Ingest Server input directory : %s doesn't exists.", output_dir) exit(1) if not os.path.exists(es_constants.base_tmp_dir): os.mkdir(es_constants.base_tmp_dir) if not os.path.exists(es_constants.processed_list_base_dir): os.mkdir(es_constants.processed_list_base_dir) if not os.path.exists(es_constants.processed_list_eum_dir): os.mkdir(es_constants.processed_list_eum_dir) while 1: try: time_sleep = user_def_sleep logger.debug("Sleep time set to : %s.", time_sleep) except: logger.warning( "Sleep time not defined. Setting to default=1min. Continue." ) time_sleep = 60 # try: logger.debug("Reading active EUMETCAST data sources from database") eumetcast_sources_list = querydb.get_eumetcast_sources() logger.debug("N. %i active EUMETCAST data sources found", len(eumetcast_sources_list)) # Get the EUMETCast MESA_JRC files try: get_archives_eumetcast() except: logger.error( "Error in executing get_archives_eumetcast. Continue") # Loop over active triggers for eumetcast_source in eumetcast_sources_list: # Define a file_handler logger 'source-specific' (for GUI) logger_spec = log.my_logger('apps.get_eumetcast.' + eumetcast_source.eumetcast_id) logger.info("Processing eumetcast source %s.", eumetcast_source.eumetcast_id) if sys.platform == 'win32': # Pierluigi processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str( eumetcast_source.eumetcast_id).replace(':', '_') + '.list' processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str( eumetcast_source.eumetcast_id).replace(':', '_') + '.info' else: processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str( eumetcast_source.eumetcast_id) + '.list' processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str( eumetcast_source.eumetcast_id) + '.info' # Create objects for list and info processed_list = [] processed_info = { 'length_proc_list': 0, 'time_latest_exec': datetime.datetime.now(), 'time_latest_copy': datetime.datetime.now() } logger.debug("Loading the processed file list for source %s ", eumetcast_source.eumetcast_id) # Restore/Create List processed_list = functions.restore_obj_from_pickle( processed_list, processed_list_filename) # Restore/Create Info processed_info = functions.restore_obj_from_pickle( processed_info, processed_info_filename) # Update processing time (in case it is restored) processed_info['time_latest_exec'] = datetime.datetime.now() logger.debug( "Create current list of file to process for trigger %s.", eumetcast_source.eumetcast_id) current_list = find_files( input_dir, eumetcast_source.filter_expression_jrc) #logger.debug("Number of files currently on PC1 for trigger %s is %i", eumetcast_source.eumetcast_id, len(current_list)) logger_spec.info( "Number of files currently on PC1 for trigger %s is %i", eumetcast_source.eumetcast_id, len(current_list)) if len(current_list) > 0: #logger.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list)) logger_spec.debug( "Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list)) listtoprocess = [] listtoprocess = set(current_list) - set(processed_list) #logger.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess)) logger_spec.debug( "Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess)) if listtoprocess != set([]): logger_spec.debug("Loop on the found files.") for filename in list(listtoprocess): if os.path.isfile(os.path.join( input_dir, filename)): if os.stat(os.path.join( input_dir, filename)).st_mtime < int( time.time()): logger_spec.debug( "Processing file: " + os.path.basename(filename)) if not dry_run: if subprocess.getstatusoutput( "cp " + filename + " " + output_dir + os.sep + os.path.basename(filename) )[0] == 0: logger_spec.info( "File %s copied.", filename) processed_list.append(filename) # Update processing info processed_info[ 'time_latest_copy'] = datetime.datetime.now( ) processed_info[ 'length_proc_list'] = len( processed_list) else: logger_spec.warning( "Problem while copying file: %s.", filename) else: logger_spec.info( 'Dry_run is set: do not get files') else: logger_spec.error( "File %s removed by the system before being processed.", filename) else: logger.debug( "Nothing to process - go to next trigger.") pass for infile in processed_list: if not os.path.exists(infile): processed_list.remove(infile) if not dry_run: functions.dump_obj_to_pickle(processed_list, processed_list_filename) functions.dump_obj_to_pickle(processed_info, processed_info_filename) logger.info("End of Get EUMETCast loop. Sleep") time.sleep(float(time_sleep)) exit(0)
def loop_eumetcast_ftp(dry_run=False): global processed_list_filename, processed_list global processed_info_filename, processed_info signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGILL, signal_handler) logger.info("Starting retrieving EUMETCast data.") ftp_eumetcast_url = es_constants.es2globals['ftp_eumetcast_url'] ftp_eumetcast_userpwd = es_constants.es2globals['ftp_eumetcast_userpwd'] while True: logger.debug("Check if the Ingest Server input directory : %s exists.", output_dir) if not os.path.exists(output_dir): logger.fatal( "The Ingest Server input directory : %s doesn't exists.", output_dir) exit(1) if not os.path.exists(es_constants.base_tmp_dir): os.mkdir(es_constants.base_tmp_dir) if not os.path.exists(es_constants.processed_list_base_dir): os.mkdir(es_constants.processed_list_base_dir) if not os.path.exists(es_constants.processed_list_eum_dir): os.mkdir(es_constants.processed_list_eum_dir) while 1: try: time_sleep = user_def_sleep logger.debug("Sleep time set to : %s.", time_sleep) except: logger.warning( "Sleep time not defined. Setting to default=1min. Continue." ) time_sleep = 60 # try: logger.debug("Reading active EUMETCAST data sources from database") eumetcast_sources_list = querydb.get_eumetcast_sources() logger.debug("N. %i active EUMETCAST data sources found", len(eumetcast_sources_list)) # Get the EUMETCast MESA_JRC files try: get_archives_eumetcast_ftp() except: logger.error( "Error in executing get_archives_eumetcast_ftp. Continue") # Loop over active triggers for eumetcast_source in eumetcast_sources_list: # Define a file_handler logger 'source-specific' (for GUI) logger_spec = log.my_logger('apps.get_eumetcast.' + eumetcast_source.eumetcast_id) logger.info("Processing eumetcast source %s.", eumetcast_source.eumetcast_id) if sys.platform == 'win32': #Pierluigi processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str( eumetcast_source.eumetcast_id).replace(':', '_') + '.list' processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str( eumetcast_source.eumetcast_id).replace(':', '_') + '.info' else: processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str( eumetcast_source.eumetcast_id) + '.list' processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str( eumetcast_source.eumetcast_id) + '.info' # Create objects for list and info processed_list = [] processed_info = { 'length_proc_list': 0, 'time_latest_exec': datetime.datetime.now(), 'time_latest_copy': datetime.datetime.now() } logger.debug("Loading the processed file list for source %s ", eumetcast_source.eumetcast_id) # Restore/Create List processed_list = functions.restore_obj_from_pickle( processed_list, processed_list_filename) # Restore/Create Info processed_info = functions.restore_obj_from_pickle( processed_info, processed_info_filename) # Update processing time (in case it is restored) processed_info['time_latest_exec'] = datetime.datetime.now() logger.debug( "Create current list of file to process for trigger %s.", eumetcast_source.eumetcast_id) try: current_list = get_list_matching_files( ftp_eumetcast_url, ftp_eumetcast_userpwd, eumetcast_source.filter_expression_jrc, 'ftp', my_logger=logger_spec) except: logger.error( "Cannot connect to the PC1 via ftp. Wait 1 minute") current_list = [] time.sleep(60) if len(current_list) > 0: # See ES2-204 logger_spec.debug( "Number of files currently on PC1 for trigger %s is %i", eumetcast_source.eumetcast_id, len(current_list)) #logger.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list)) logger_spec.debug( "Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list)) listtoprocess = [] listtoprocess = set(current_list) - set(processed_list) #logger.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess)) logger_spec.debug( "Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess)) if listtoprocess != set([]): logger_spec.debug("Loop on the found files.") for filename in list(listtoprocess): try: result = get_file_from_url( str(ftp_eumetcast_url) + os.path.sep + filename, target_file=os.path.basename(filename), target_dir=es_constants.ingest_dir, userpwd=str(ftp_eumetcast_userpwd)) if not result: logger_spec.info("File %s copied.", filename) processed_list.append(filename) else: logger_spec.warning( "File %s not copied: ", filename) except: logger_spec.warning( "Problem while copying file: %s.", filename) else: logger.debug( "Nothing to process - go to next trigger.") pass for infile in processed_list: if not infile in current_list: processed_list.remove(infile) if not dry_run: functions.dump_obj_to_pickle(processed_list, processed_list_filename) functions.dump_obj_to_pickle(processed_info, processed_info_filename) logger.info("End of Get EUMETCast loop. Sleep") time.sleep(float(time_sleep)) exit(0)
def get_archives_eumetcast(): input_dir = '/eumetcast_test/' # Ad-hoc definitions (to be copied to settings file) source_id = 'MESA:JRC:Archives' filter_expression_mesa_jrc = 'MESA_JRC_.*.tif' # Define a file_handler logger 'source-specific' (for GUI) logger_spec = log.my_logger('apps.get_archives_eumetcast') logger.info("Retrieving MESA_JRC files from PC1.") processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str( source_id) + '.list' processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str( source_id) + '.info' # Create objects for list and info processed_list = [] processed_info = { 'length_proc_list': 0, 'time_latest_exec': datetime.datetime.now(), 'time_latest_copy': datetime.datetime.now() } logger.warning( "Input DIR for get_archives_eumetcast is defined as: *** %s ***", input_dir) logger.debug("Loading the processed file list for source %s ", source_id) # Restore/Create List processed_list = functions.restore_obj_from_pickle( processed_list, processed_list_filename) # Restore/Create Info processed_info = functions.restore_obj_from_pickle( processed_info, processed_info_filename) # Update processing time (in case it is restored) processed_info['time_latest_exec'] = datetime.datetime.now() logger.debug("Create current list of file to process for trigger %s.", source_id) try: current_list = find_files(input_dir, filter_expression_mesa_jrc) except: logger.error("Cannot connect to the PC1 via ftp. Wait 1 minute") current_list = [] time.sleep(60) logger_spec.info("Number of files currently on PC1 for trigger %s is %i", source_id, len(current_list)) if len(current_list) > 0: #logger.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list)) logger_spec.debug( "Number of files already copied for trigger %s is %i", source_id, len(processed_list)) listtoprocess = [] listtoprocess = set(current_list) - set(processed_list) #logger.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess)) logger_spec.info("Number of files to be copied for trigger %s is %i", source_id, len(listtoprocess)) if listtoprocess != set([]): logger_spec.debug("Loop on the found files.") for filename in list(listtoprocess): try: if subprocess.getstatusoutput( "cp " + filename + " " + output_dir + os.sep + os.path.basename(filename))[0] == 0: logger_spec.info("File %s copied.", filename) processed_list.append(filename) # Update processing info processed_info[ 'time_latest_copy'] = datetime.datetime.now() processed_info['length_proc_list'] = len( processed_list) else: logger_spec.warning("Problem while copying file: %s.", filename) except: logger_spec.warning("Problem while copying file: %s.", filename) else: logger.debug("Nothing to process - go to next trigger.") pass for infile in processed_list: if not infile in current_list: processed_list.remove(infile) functions.dump_obj_to_pickle(processed_list, processed_list_filename) functions.dump_obj_to_pickle(processed_info, processed_info_filename)
def loop_get_datastore(dry_run=False, test_one_source=False, my_source=None): global processed_list_filename, processed_list global processed_info_filename, processed_info signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGILL, signal_handler) logger.info("Starting retrieving data from INTERNET.") b_loop = True # to exit loops in testing mode b_error = False # checking files download - for testing mode while b_loop: output_dir = es_constants.get_internet_output_dir logger.debug( "Check if the Datastore Server input directory : %s exists.", output_dir) if not os.path.exists(output_dir): # ToDo: create output_dir - ingest directory logger.fatal( "The Ingest Server input directory : %s doesn't exists.", output_dir) if test_one_source: return 1 else: exit(1) if not os.path.exists(es_constants.processed_list_datastore_dir): os.mkdir(es_constants.processed_list_datastore_dir) while b_loop: # # Check internet connection (or continue) # if not functions.internet_on(): #False: JEodesk- doesnt detect internet connection properly so provide False# # logger.error("The computer is not currently connected to the internet. Wait 1 minute.") # b_error = True # time.sleep(60) # # else: logger.info("Reading active Datastore data sources from database") # internet_sources_list = querydb.get_active_internet_sources() datastore_sources_list = querydb.get_active_datastore_sources() # Loop over active triggers for datastore_source in datastore_sources_list: try: # In case of test_one_source, skip all other sources if test_one_source: if (datastore_source.internet_id != test_one_source): logger.debug( "Running in test mode, and source is not %s. Continue.", test_one_source) continue else: # Overwrite DB definitions with the passed object (if defined - for testing purposes) if my_source: datastore_source = my_source execute_trigger = True # Get this from the pads database table (move from internet_source 'pull_frequency' to the pads table, # so that it can be exploited by eumetcast triggers as well). It is in minute pull_frequency = datastore_source.pull_frequency # Manage the case of files to be continuously downloaded (delay < 0) if pull_frequency < 0: do_not_consider_processed_list = True delay_time_source_minutes = -pull_frequency else: do_not_consider_processed_list = False delay_time_source_minutes = pull_frequency if sys.platform == 'win32': internet_id = str( datastore_source.internet_id).replace(':', '_') else: internet_id = str( datastore_source.internet_id).replace(":", "_") logger_spec = log.my_logger('apps.get_internet.' + internet_id) logger.info("Processing internet source %s.", datastore_source.descriptive_name) # Create objects for list and info processed_info_filename = es_constants.get_datastore_processed_list_prefix + str( internet_id) + '.info' # Restore/Create Info processed_info = None processed_info = functions.restore_obj_from_pickle( processed_info, processed_info_filename) if processed_info is not None: # Check the delay current_delta = datetime.datetime.now( ) - processed_info['time_latest_exec'] current_delta_minutes = int( old_div(current_delta.seconds, 60)) if current_delta_minutes < delay_time_source_minutes: logger.debug( "Still waiting up to %i minute - since latest execution.", delay_time_source_minutes) execute_trigger = False else: # Create processed_info object processed_info = { 'lenght_proc_list': 0, 'time_latest_exec': datetime.datetime.now(), 'time_latest_copy': datetime.datetime.now() } execute_trigger = True if execute_trigger: # Restore/Create List processed_list = [] if not do_not_consider_processed_list: processed_list_filename = es_constants.get_datastore_processed_list_prefix + internet_id + '.list' # processed_list = functions.restore_obj_from_json(processed_list, # processed_list_filename) processed_info[ 'time_latest_exec'] = datetime.datetime.now() logger.debug( "Create current list of file to process for source %s.", datastore_source.internet_id) if datastore_source.user_name is None: user_name = "anonymous" else: user_name = datastore_source.user_name if datastore_source.password is None: password = "******" else: password = datastore_source.password usr_pwd = str(user_name) + ':' + str(password) logger_spec.debug(" Url is %s.", datastore_source.url) logger_spec.debug(" usr/pwd is %s.", usr_pwd) logger_spec.debug( " regex is %s.", datastore_source.include_files_expression) internet_type = datastore_source.type if internet_type == 'cds_api': current_list = cds_api_loop_internet( datastore_source) elif internet_type == 'iri_api': current_list = iri_api_loop_internet( datastore_source) else: logger.debug( "No correct type for this datastore source type: %s" % internet_type) current_list = [] logger_spec.debug( "Number of files currently available for source %s is %i", internet_id, len(current_list)) if not dry_run: # functions.dump_obj_to_json(processed_list, processed_list_filename) functions.dump_obj_to_pickle( processed_info, processed_info_filename) # if test_one_source: # b_loop = False # else: # sleep(float(user_def_sleep)) # # Loop over sources except Exception as inst: logger.error("Error while processing source %s. Continue" % datastore_source.descriptive_name) b_error = True sleep(float(user_def_sleep)) if not test_one_source: exit(0) else: return b_error
def loop_eumetcast(dry_run=False): global processed_list_filename, processed_list global processed_info_filename, processed_info signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGILL, signal_handler) logger.info("Starting retrieving EUMETCast data.") while True: logger.debug("Check if the EUMETCast input directory : %s exists.", input_dir) if not os.path.exists(input_dir): logger.error("The EUMETCast input directory : %s is not yet mounted.", input_dir) logger.debug("Check if the Ingest Server input directory : %s exists.", output_dir) if not os.path.exists(output_dir): logger.fatal("The Ingest Server input directory : %s doesn't exists.", output_dir) # TODO Jurvtk: Create the Ingest Server output directory if it doesn't exist! exit(1) if not os.path.exists(es_constants.base_tmp_dir): os.mkdir(es_constants.base_tmp_dir) if not os.path.exists(es_constants.processed_list_base_dir): os.mkdir(es_constants.processed_list_base_dir) if not os.path.exists(es_constants.processed_list_eum_dir): os.mkdir(es_constants.processed_list_eum_dir) while 1: try: time_sleep = user_def_sleep logger.debug("Sleep time set to : %s.", time_sleep) except: logger.warning("Sleep time not defined. Setting to default=1min. Continue.") time_sleep = 60 # try: logger.debug("Reading active EUMETCAST data sources from database") eumetcast_sources_list = querydb.get_eumetcast_sources(echo=echo_query) logger.debug("N. %i active EUMETCAST data sources found", len(eumetcast_sources_list)) # Loop over active triggers for eumetcast_source in eumetcast_sources_list: # Define a file_handler logger 'source-specific' (for GUI) logger_spec = log.my_logger('apps.get_eumetcast.'+eumetcast_source.eumetcast_id) logger.debug("Processing eumetcast source %s.", eumetcast_source.eumetcast_id) processed_list_filename = es_constants.get_eumetcast_processed_list_prefix+str(eumetcast_source.eumetcast_id)+'.list' processed_info_filename = es_constants.get_eumetcast_processed_list_prefix+str(eumetcast_source.eumetcast_id)+'.info' # Create objects for list and info processed_list = [] processed_info = {'length_proc_list': 0, 'time_latest_exec': datetime.datetime.now(), 'time_latest_copy': datetime.datetime.now()} logger.debug("Loading the processed file list for source %s ", eumetcast_source.eumetcast_id) # Restore/Create List processed_list=functions.restore_obj_from_pickle(processed_list, processed_list_filename) # Restore/Create Info processed_info=functions.restore_obj_from_pickle(processed_info, processed_info_filename) # Update processing time (in case it is restored) processed_info['time_latest_exec']=datetime.datetime.now() logger.debug("Create current list of file to process for trigger %s.", eumetcast_source.eumetcast_id) current_list = find_files(input_dir, eumetcast_source.filter_expression_jrc) #logger.debug("Number of files currently on PC1 for trigger %s is %i", eumetcast_source.eumetcast_id, len(current_list)) logger_spec.debug("Number of files currently on PC1 for trigger %s is %i", eumetcast_source.eumetcast_id, len(current_list)) if len(current_list) > 0: #logger.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list)) logger_spec.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list)) listtoprocess = [] listtoprocess = set(current_list) - set(processed_list) #logger.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess)) logger_spec.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess)) if listtoprocess != set([]): logger_spec.debug("Loop on the found files.") for filename in list(listtoprocess): if os.path.isfile(os.path.join(input_dir, filename)): if os.stat(os.path.join(input_dir, filename)).st_mtime < int(time.time()): logger_spec.debug("Processing file: "+os.path.basename(filename)) if not dry_run: if commands.getstatusoutput("cp " + filename + " " + output_dir + os.sep + os.path.basename(filename))[0] == 0: logger.info("File %s copied.", filename) processed_list.append(filename) # Update processing info processed_info['time_latest_copy']=datetime.datetime.now() processed_info['length_proc_list']=len(processed_list) else: logger_spec.warning("Problem while copying file: %s.", filename) else: logger_spec.info('Dry_run is set: do not get files') else: logger_spec.error("File %s removed by the system before being processed.", filename) else: logger.debug("Nothing to process - go to next trigger.") pass for infile in processed_list: if not os.path.exists(infile): processed_list.remove(infile) if not dry_run: functions.dump_obj_to_pickle(processed_list, processed_list_filename) functions.dump_obj_to_pickle(processed_info, processed_info_filename) time.sleep(float(10)) # except Exception, e: # logger.fatal(str(e)) # exit(1) exit(0)
def test_restore_obj_from_pickle(self): functions.dump_obj_to_pickle(self.processed_info, self.processed_info_filename) result = functions.restore_obj_from_pickle( self.processed_info, self.processed_info_filename) self.assertEqual(result, self.processed_info)