def Test_get_active_internet_sources(self): internet_sources = querydb.get_active_internet_sources() logger.info("Internet sources are: %s", internet_sources) for internet_source in internet_sources: print internet_source.url self.assertEqual(1, 1)
def testLocal_EOS_JEODESK_OLCI(self): source_active = False list_internet_id = ['EOS:S3A:OLCI:WRR', 'EOS:S3B:OLCI:WRR'] #internet_id = 'EOS:S3A:OLCI:WRR' start_date_dyn = -5 end_date_dyn = -1 for internet_id in list_internet_id: internet_sources = querydb.get_active_internet_sources() for s in internet_sources: if s.internet_id == internet_id: internet_source = s source_active = True if source_active: my_source = SourceEOS( internet_id=internet_id, url=internet_source.url, descriptive_name="OLCI WRR", include_files_expression=internet_source. include_files_expression, pull_frequency=internet_source.pull_frequency, user_name=internet_source.user_name, password=internet_source.password, start_date=start_date_dyn, end_date=end_date_dyn, frequency_id=internet_source.frequency_id, type=internet_source.type, files_filter_expression=internet_source. files_filter_expression, https_params=internet_source.https_params) productcode = 'olci-wrr' productversion = 'V02.0' product = { "productcode": productcode, "version": productversion } result = get_internet.loop_get_internet( test_one_source=internet_id) self.assertEqual(0, 0)
def testLocal_EOS_JEODESK_SLSTR(self): source_active = False list_internet_id = ['EOS:S3A:SLSTR:WST', 'EOS:S3B:SLSTR:WST'] start_date_dyn = -5 end_date_dyn = -1 internet_sources = querydb.get_active_internet_sources() for internet_id in list_internet_id: for s in internet_sources: if s.internet_id == internet_id: internet_source = s source_active = True if source_active: my_source = SourceEOS( internet_id=internet_id, url=internet_source.url, descriptive_name='sentinel', include_files_expression=internet_source. include_files_expression, pull_frequency=internet_source.pull_frequency, user_name=internet_source.user_name, password=internet_source.password, start_date=start_date_dyn, end_date=end_date_dyn, frequency_id=internet_source.frequency_id, type=internet_source.type, files_filter_expression=internet_source. files_filter_expression, https_params=internet_source.https_params) productcode = 'slstr-sst' productversion = '1.0' product = { "productcode": productcode, "version": productversion } # Test download (dynamic dates result = get_internet.loop_get_internet( test_one_source=internet_id) self.assertEqual(0, 0)
def testLocal_EOS_JEODESK_SLSTR(self): source_active = False internet_id = 'EOS:S3A:SLSTR:WST' start_date_fixed = 20200301 end_date_fixed = 20200310 start_date_dyn = -5 end_date_dyn = -3 file_to_check = '32e61b08-0bcb-4d0a-a06e-f3d499dfb5fc/S3A_SL_2_WST____20200310T073813_20200310T091913_20200311T185257_6059_056_006______MAR_O_NT_003' internet_sources = querydb.get_active_internet_sources() for s in internet_sources: if s.internet_id == internet_id: internet_source = s source_active = True if source_active: my_source = Source( internet_id=internet_id, url=internet_source.url, descriptive_name='sentinel', include_files_expression=internet_source.include_files_expression, pull_frequency=internet_source.pull_frequency, user_name=internet_source.user_name, password=internet_source.password, start_date=start_date_dyn, end_date=end_date_dyn, frequency_id=internet_source.frequency_id, type=internet_source.type, files_filter_expression=internet_source.files_filter_expression, https_params=internet_source.https_params) productcode = 'slstr-sst' productversion = '1.0' product = {"productcode": productcode, "version": productversion} # Test download (dynamic dates if True: result = loop_get_internet(test_one_source=internet_id, my_source=my_source) self.assertEqual(result, 0)
def testLocal_EOS_JEODESK_OLCI(self): source_active = False internet_id = 'EOS:S3A:OLCI:WRR' start_date_fixed = 20200301 end_date_fixed = 20200310 start_date_dyn = -2 end_date_dyn = -1 file_to_check = '44c285d7-3809-4810-836e-510ee52f326a/S3A_OL_2_WRR____20200310T065044_20200310T073438_20200311T133228_2634_056_006______MAR_O_NT_002' internet_sources = querydb.get_active_internet_sources() for s in internet_sources: if s.internet_id == internet_id: internet_source = s source_active = True if source_active: my_source = Source( internet_id=internet_id, url=internet_source.url, descriptive_name="OLCI WRR", include_files_expression=internet_source.include_files_expression, pull_frequency=internet_source.pull_frequency, user_name=internet_source.user_name, password=internet_source.password, start_date=start_date_dyn, end_date=end_date_dyn, frequency_id=internet_source.frequency_id, type=internet_source.type, files_filter_expression=internet_source.files_filter_expression, https_params=internet_source.https_params) productcode = 'olci-wrr' productversion = 'V02.0' product = {"productcode": productcode, "version": productversion} if True: result = loop_get_internet(test_one_source=internet_id, my_source=my_source) self.assertEqual(result, 0)
def loop_get_internet(dry_run=False): global processed_list_filename, processed_list global processed_info_filename, processed_info signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGILL, signal_handler) logger.info("Starting retrieving data from INTERNET.") while True: output_dir = es_constants.ingest_dir logger.debug("Check if the Ingest Server input directory : %s exists.", output_dir) if not os.path.exists(output_dir): logger.fatal("The Ingest Server input directory : %s doesn't exists.", output_dir) exit(1) if not os.path.exists(es_constants.processed_list_int_dir): os.mkdir(es_constants.processed_list_int_dir) while 1: try: time_sleep = user_def_sleep logger.debug("Sleep time set to : %s.", time_sleep) except: logger.warning("Sleep time not defined. Setting to default=1min. Continue.") time_sleep = 60 logger.debug("Reading active INTERNET data sources from database") internet_sources_list = querydb.get_active_internet_sources(echo=echo_query) # Loop over active triggers try: for internet_source in internet_sources_list: logger.debug("Processing internet source %s.", internet_source.descriptive_name) processed_list_filename = es_constants.get_internet_processed_list_prefix+str(internet_source.internet_id)+'.list' processed_info_filename = es_constants.get_internet_processed_list_prefix+str(internet_source.internet_id)+'.info' # Create objects for list and info processed_list = [] processed_info = {'length_proc_list': 0, 'time_latest_exec': datetime.datetime.now(), 'time_latest_copy': datetime.datetime.now()} # Restore/Create List processed_list=functions.restore_obj_from_pickle(processed_list, processed_list_filename) # Restore/Create Info processed_info=functions.restore_obj_from_pickle(processed_info, processed_info_filename) # Update processing time (in case it is restored) processed_info['time_latest_exec']=datetime.datetime.now() logger.debug("Create current list of file to process for source %s.", internet_source.internet_id) if internet_source.user_name is None: user_name = "anonymous" else: user_name = internet_source.user_name if internet_source.password is None: password = "******" else: password = internet_source.password usr_pwd = str(user_name)+':'+str(password) logger.debug(" Url is %s.", internet_source.url) logger.debug(" usr/pwd is %s.", usr_pwd) logger.debug(" regex is %s.", internet_source.include_files_expression) internet_type = internet_source.type if internet_type == 'ftp': # Note that the following list might contain sub-dirs (it reflects full_regex) current_list = get_list_matching_files_dir_ftp(str(internet_source.url), str(usr_pwd), str(internet_source.include_files_expression)) elif internet_type == 'http_tmpl': # Manage the dates:start_date is mandatory .. end_date replaced by 'today' if missing/wrong try: if functions.is_date_yyyymmdd(str(internet_source.start_date), silent=True): datetime_start=datetime.datetime.strptime(str(internet_source.start_date),'%Y%m%d') else: raise Exception("Start Date not valid") except: raise Exception("Start Date not valid") try: if functions.is_date_yyyymmdd(str(internet_source.end_date), silent=True): datetime_end=datetime.datetime.strptime(str(internet_source.end_date),'%Y%m%d') else: datetime_end=datetime.datetime.today() except: pass # Create the full filename from a 'template' which contains try: current_list = build_list_matching_for_http(str(internet_source.url), str(internet_source.include_files_expression), datetime_start, datetime_end, str(internet_source.frequency_id)) except: logger.error("Error in creating date lists. Continue") logger.debug("Number of files currently available for source %s is %i", internet_source.internet_id, len(current_list)) if len(current_list) > 0: logger.debug("Number of files already copied for trigger %s is %i", internet_source.internet_id, len(processed_list)) listtoprocess = [] for current_file in current_list: if len(processed_list) == 0: listtoprocess.append(current_file) else: #if os.path.basename(current_file) not in processed_list: -> save in .list subdirs as well !! if current_file not in processed_list: listtoprocess.append(current_file) logger.debug("Number of files to be copied for trigger %s is %i", internet_source.internet_id, len(listtoprocess)) if listtoprocess != set([]): logger.debug("Loop on the found files.") if not dry_run: for filename in list(listtoprocess): logger.debug("Processing file: "+str(internet_source.url)+os.path.sep+filename) try: result = get_file_from_url(str(internet_source.url)+os.path.sep+filename, target_file=os.path.basename(filename), target_dir=es_constants.ingest_dir, userpwd=str(usr_pwd)) if not result: logger.info("File %s copied.", filename) processed_list.append(filename) except: logger.warning("Problem while copying file: %s.", filename) else: logger.info('Dry_run is set: do not get files') if not dry_run: functions.dump_obj_to_pickle(processed_list, processed_list_filename) functions.dump_obj_to_pickle(processed_info, processed_info_filename) sleep(float(user_def_sleep)) # Loop over sources except Exception as inst: logger.error("Error while processing source %s. Continue" % internet_source.descriptive_name) sleep(float(user_def_sleep)) exit(0)
def loop_get_internet(dry_run=False, test_one_source=False): global processed_list_filename, processed_list global processed_info_filename, processed_info signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGILL, signal_handler) logger.info("Starting retrieving data from INTERNET.") while True: output_dir = es_constants.get_internet_output_dir logger.debug("Check if the Ingest Server input directory : %s exists.", output_dir) if not os.path.exists(output_dir): # ToDo: create output_dir - ingest directory logger.fatal( "The Ingest Server input directory : %s doesn't exists.", output_dir) exit(1) if not os.path.exists(es_constants.processed_list_int_dir): os.mkdir(es_constants.processed_list_int_dir) while 1: # Check internet connection (or continue) if not functions.internet_on(): logger.error( "The computer is not currently connected to the internet. Wait 1 minute." ) time.sleep(60) else: try: time_sleep = user_def_sleep logger.debug("Sleep time set to : %s.", time_sleep) except: logger.warning( "Sleep time not defined. Setting to default=1min. Continue." ) time_sleep = 60 logger.info( "Reading active INTERNET data sources from database") internet_sources_list = querydb.get_active_internet_sources() # Loop over active triggers for internet_source in internet_sources_list: try: if test_one_source and (internet_source.internet_id != test_one_source): logger.info( "Running in test mode, and source is not %s. Continue.", test_one_source) continue execute_trigger = True # Get this from the pads database table (move from internet_source 'pull_frequency' to the pads table, # so that it can be exploited by eumetcast triggers as well). It is in minute pull_frequency = internet_source.pull_frequency # Manage the case of files to be continuously downloaded (delay < 0) if pull_frequency < 0: do_not_consider_processed_list = True delay_time_source_minutes = -pull_frequency else: do_not_consider_processed_list = False delay_time_source_minutes = pull_frequency if sys.platform == 'win32': internet_id = str( internet_source.internet_id).replace(':', '_') else: internet_id = str(internet_source.internet_id) logger_spec = log.my_logger('apps.get_internet.' + internet_id) logger.info("Processing internet source %s.", internet_source.descriptive_name) # Create objects for list and info processed_info_filename = es_constants.get_internet_processed_list_prefix + str( internet_id) + '.info' # Restore/Create Info processed_info = None processed_info = functions.restore_obj_from_pickle( processed_info, processed_info_filename) if processed_info is not None: # Check the delay current_delta = datetime.datetime.now( ) - processed_info['time_latest_exec'] current_delta_minutes = int(current_delta.seconds / 60) if current_delta_minutes < delay_time_source_minutes: logger.debug( "Still waiting up to %i minute - since latest execution.", delay_time_source_minutes) execute_trigger = False else: # Create processed_info object processed_info = { 'lenght_proc_list': 0, 'time_latest_exec': datetime.datetime.now(), 'time_latest_copy': datetime.datetime.now() } execute_trigger = True if execute_trigger: # Restore/Create List processed_list = [] if not do_not_consider_processed_list: processed_list_filename = es_constants.get_internet_processed_list_prefix + internet_id + '.list' processed_list = functions.restore_obj_from_pickle( processed_list, processed_list_filename) processed_info[ 'time_latest_exec'] = datetime.datetime.now() logger.debug( "Create current list of file to process for source %s.", internet_source.internet_id) if internet_source.user_name is None: user_name = "anonymous" else: user_name = internet_source.user_name if internet_source.password is None: password = "******" else: password = internet_source.password usr_pwd = str(user_name) + ':' + str(password) logger_spec.debug(" Url is %s.", internet_source.url) logger_spec.debug(" usr/pwd is %s.", usr_pwd) logger_spec.debug( " regex is %s.", internet_source.include_files_expression) internet_type = internet_source.type if internet_type == 'ftp' or internet_type == 'http': # Manage the end_date (added for MODIS_FIRMS) if (internet_source.end_date != ''): end_date = internet_source.end_date else: end_date = None # Note that the following list might contain sub-dirs (it reflects full_regex) try: current_list = get_list_matching_files( str(internet_source.url), str(usr_pwd), str(internet_source. include_files_expression), internet_type, end_date=end_date) except: logger.error( "Error in creating file lists. Continue" ) continue elif internet_type == 'http_tmpl': # Create the full filename from a 'template' which contains try: current_list = build_list_matching_files_tmpl( str(internet_source.url), str(internet_source. include_files_expression), internet_source.start_date, internet_source.end_date, str(internet_source.frequency_id)) except: logger.error( "Error in creating date lists. Continue" ) continue elif internet_type == 'motu_client': # Create the full filename from a 'template' which contains try: current_list = build_list_matching_files_motu( str(internet_source.url), str(internet_source. include_files_expression), internet_source.start_date, internet_source.end_date, str(internet_source.frequency_id), str(internet_source.user_name), str(internet_source.password), str(internet_source. files_filter_expression), ) except: logger.error( "Error in creating motu_client lists. Continue" ) continue # elif internet_type == 'sentinel_sat': # # Create the full filename from a 'template' which contains # try: # current_list = build_list_matching_files_sentinel_sat(str(internet_source.url), # str(internet_source.include_files_expression), # internet_source.start_date, # internet_source.end_date, # str(internet_source.frequency_id), # str(internet_source.user_name), # str(internet_source.password), # #str(internet_source.files_filter_expression), # ) # # except: # logger.error("Error in creating sentinel_sat lists. Continue") # continue elif internet_type == 'local': logger.info( "This internet source is meant to copy data on local filesystem" ) try: current_list = get_list_matching_files_dir_local( str(internet_source.url), str(internet_source. include_files_expression)) except: logger.error( "Error in creating date lists. Continue" ) continue elif internet_type == 'offline': logger.info( "This internet source is meant to work offline (GoogleDrive)" ) current_list = [] else: logger.error( "No correct type for this internet source type: %s" % internet_type) current_list = [] logger_spec.debug( "Number of files currently available for source %s is %i", internet_id, len(current_list)) if len(current_list) > 0: logger_spec.debug( "Number of files already copied for trigger %s is %i", internet_id, len(processed_list)) listtoprocess = [] for current_file in current_list: if len(processed_list) == 0: listtoprocess.append(current_file) else: #if os.path.basename(current_file) not in processed_list: -> save in .list subdirs as well !! if current_file not in processed_list: listtoprocess.append(current_file) logger_spec.debug( "Number of files to be copied for trigger %s is %i", internet_id, len(listtoprocess)) if listtoprocess != set([]): # # Debug # toprint='' # for elem in listtoprocess: # toprint+=elem+',' # logger_spec.info('List in get_list_matching_files: %s' % toprint) logger_spec.debug( "Loop on the found files.") if not dry_run: for filename in list(listtoprocess): logger_spec.debug( "Processing file: " + str(internet_source.url) + os.path.sep + filename) try: if internet_type == 'local': shutil.copyfile( str(internet_source[ 'url']) + os.path.sep + filename, es_constants.ingest_dir + os.path.basename( filename)) result = 0 elif internet_type == 'motu_client': result = get_file_from_motu_command( str(filename), #target_file=internet_source.files_filter_expression, target_dir=es_constants .ingest_dir, userpwd=str(usr_pwd)) # elif internet_type == 'sentinel_sat': # result = get_file_from_sentinelsat_url(str(filename), # target_dir=es_constants.ingest_dir) else: result = get_file_from_url( str(internet_source.url ) + os.path.sep + filename, target_file=os.path. basename(filename), target_dir=es_constants .ingest_dir, userpwd=str(usr_pwd)) if not result: logger_spec.info( "File %s copied.", filename) processed_list.append( filename) else: logger_spec.warning( "File %s not copied: ", filename) except: logger_spec.warning( "Problem while copying file: %s.", filename) else: logger_spec.info( 'Dry_run is set: do not get files') if not dry_run: functions.dump_obj_to_pickle( processed_list, processed_list_filename) functions.dump_obj_to_pickle( processed_info, processed_info_filename) sleep(float(user_def_sleep)) # Loop over sources except Exception as inst: logger.error( "Error while processing source %s. Continue" % internet_source.descriptive_name) sleep(float(user_def_sleep)) exit(0)