def process_channel(channel_id, aims_xml_info, level_qc): """ Downloads all the data available for one channel_id and moves the file to a wip_path dir channel_id(str) aims_xml_info(dict) level_qc(int)""" channel_id_info = aims_xml_info[channel_id] from_date = channel_id_info['from_date'] thru_date = channel_id_info['thru_date'] thru_date_already_downloaded = get_last_downloaded_date_channel(channel_id, level_qc, from_date) if not has_channel_already_been_downloaded(channel_id, level_qc) or \ datetime.strptime(thru_date, "%Y-%m-%dT%H:%M:%SZ") > \ datetime.strptime(thru_date_already_downloaded, "%Y-%m-%dT%H:%M:%SZ"): logger.info('>> QC%s - Processing channel %s' % (str(level_qc), str(channel_id))) if datetime.strptime(thru_date, "%Y-%m-%dT%H:%M:%SZ") > \ datetime.strptime(thru_date_already_downloaded, "%Y-%m-%dT%H:%M:%SZ"): logger.info('>> QC%s - New data available for channel %s.\nLatest date downloaded: %s' ' \nNew date available: %s' % (str(level_qc), str(channel_id), thru_date_already_downloaded, thru_date)) netcdf_tmp_file_path = download_channel(channel_id, from_date, thru_date, level_qc) contact_aims_msg = "Process of channel aborted - CONTACT AIMS" if netcdf_tmp_file_path is None: logger.error(' Channel %s - not valid zip file - %s' % (str(channel_id), contact_aims_msg)) return False if is_no_data_found(netcdf_tmp_file_path): logger.error(' Channel %s - NO_DATA_FOUND file in Zip file - %s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) return False if not modify_soop_trv_netcdf(netcdf_tmp_file_path, channel_id_info): logger.error(' Channel %s - Could not modify the NetCDF file - \ %s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) return False main_var = get_main_soop_trv_var(netcdf_tmp_file_path) if has_var_only_fill_value(netcdf_tmp_file_path, main_var): logger.error(' Channel %s - _Fillvalues only in main variable - \ %s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) return False if _is_lat_lon_values_outside_boundaries(netcdf_tmp_file_path): logger.error(' Channel %s - Lat/Lon values outside of boundaries \ -%s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) return False if not is_time_monotonic(netcdf_tmp_file_path): logger.error(' Channel %s - TIME value is not strickly monotonic \ - %s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) return False checker_retval = pass_netcdf_checker(netcdf_tmp_file_path, tests=['cf:latest', 'imos:1.3']) if not checker_retval: wip_path = os.environ.get('data_wip_path') logger.error(' Channel %s - File does not pass CF/IMOS \ compliance checker - %s' % (str(channel_id), contact_aims_msg)) shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, 'errors')) logger.error(' File copied to %s for debugging' % (os.path.join(wip_path, 'errors', os.path.basename(netcdf_tmp_file_path) ))) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) return False move_to_incoming(netcdf_tmp_file_path) return True else: logger.info('>> QC%s - Channel %s already processed' % (str(level_qc), str(channel_id))) return False
def process_monthly_channel(channel_id, aims_xml_info, level_qc): """ Downloads all the data available for one channel_id and moves the file to a wip_path dir channel_id(str) aims_xml_info(tuple) level_qc(int) aims_service : 1 -> FAIMMS data 100 -> SOOP TRV data 300 -> NRS DATA for monthly data download, only 1 and 300 should be use """ logger.info('>> QC%s - Processing channel %s' % (str(level_qc), str(channel_id))) channel_id_info = aims_xml_info[channel_id] from_date = channel_id_info['from_date'] thru_date = channel_id_info['thru_date'] [start_dates, end_dates] = create_list_of_dates_to_download(channel_id, level_qc, from_date, thru_date) if len(start_dates) != 0: # download monthly file for start_date, end_date in zip(start_dates, end_dates): start_date = start_date.strftime("%Y-%m-%dT%H:%M:%SZ") end_date = end_date.strftime("%Y-%m-%dT%H:%M:%SZ") netcdf_tmp_file_path = download_channel(channel_id, start_date, end_date, level_qc) contact_aims_msg = "Process of channel aborted - CONTACT AIMS" if netcdf_tmp_file_path is None: logger.error(' Channel %s - not valid zip file - %s' % (str(channel_id), contact_aims_msg)) break # NO_DATA_FOUND file only means there is no data for the selected time period. Could be some data afterwards if is_no_data_found(netcdf_tmp_file_path): logger.warning(' Channel %s - No data for the time period:%s - %s' % (str(channel_id), start_date, end_date)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) else: if is_time_var_empty(netcdf_tmp_file_path): logger.error(' Channel %s - No values in TIME variable - %s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) break if not modify_anmn_nrs_netcdf(netcdf_tmp_file_path, channel_id_info): logger.error(' Channel %s - Could not modify the NetCDF file - Process of channel aborted' % str(channel_id)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) break main_var = get_main_netcdf_var(netcdf_tmp_file_path) if has_var_only_fill_value(netcdf_tmp_file_path, main_var): logger.error(' Channel %s - _Fillvalues only in main variable - %s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) break if get_anmn_nrs_site_name(netcdf_tmp_file_path) == []: logger.error(' Channel %s - Unknown site_code gatt value - %s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) break if not is_time_monotonic(netcdf_tmp_file_path): logger.error(' Channel %s - TIME value is not strickly monotonic - %s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) break # check every single file of the list. We don't assume that if one passes, all pass ... past proved this wip_path = os.environ.get('data_wip_path') checker_retval = pass_netcdf_checker(netcdf_tmp_file_path, tests=['cf:latest', 'imos:1.3']) if not checker_retval: logger.error(' Channel %s - File does not pass CF/IMOS compliance checker - Process of channel aborted' % str(channel_id)) shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, 'errors')) logger.error(' File copied to %s for debugging' % (os.path.join(wip_path, 'errors', os.path.basename(netcdf_tmp_file_path)))) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) break netcdf_tmp_file_path = fix_data_code_from_filename(netcdf_tmp_file_path) netcdf_tmp_file_path = fix_provider_code_from_filename(netcdf_tmp_file_path, 'IMOS_ANMN') if re.search('IMOS_ANMN_[A-Z]{1}_', netcdf_tmp_file_path) is None: logger.error(' Channel %s - File name Data code does not pass REGEX - Process of channel aborted' % str(channel_id)) shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, 'errors')) logger.error(' File copied to %s for debugging' % (os.path.join(wip_path, 'errors', os.path.basename(netcdf_tmp_file_path)))) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) break move_to_tmp_incoming(netcdf_tmp_file_path) if TESTING: # The 2 next lines download the first month only for every single channel. This is only used for testing save_channel_info(channel_id, aims_xml_info, level_qc, end_date) break save_channel_info(channel_id, aims_xml_info, level_qc, end_date) else: logger.info('QC%s - Channel %s already up to date' % (str(level_qc), str(channel_id))) close_logger(logger)
def process_channel(channel_id, aims_xml_info, level_qc): """ Downloads all the data available for one channel_id and moves the file to a wip_path dir channel_id(str) aims_xml_info(dict) level_qc(int)""" channel_id_info = aims_xml_info[channel_id] from_date = channel_id_info['from_date'] thru_date = channel_id_info['thru_date'] thru_date_already_downloaded = get_last_downloaded_date_channel( channel_id, level_qc, from_date) if not has_channel_already_been_downloaded(channel_id, level_qc) or \ datetime.strptime(thru_date, "%Y-%m-%dT%H:%M:%SZ") > \ datetime.strptime(thru_date_already_downloaded, "%Y-%m-%dT%H:%M:%SZ"): logger.info('QC{level_qc} - Processing channel {channel_id}'.format( channel_id=str(channel_id), level_qc=str(level_qc))) if datetime.strptime(thru_date, "%Y-%m-%dT%H:%M:%SZ") > \ datetime.strptime(thru_date_already_downloaded, "%Y-%m-%dT%H:%M:%SZ"): logger.info('QC%s - Channel %s: Latest date downloaded: [%s]' ' - New date available: [%s]' % (str(level_qc), str(channel_id), thru_date_already_downloaded, thru_date)) netcdf_tmp_file_path = download_channel(channel_id, from_date, thru_date, level_qc) contact_aims_msg = "Process of channel aborted - CONTACT AIMS" if not netcdf_tmp_file_path: logger.error( 'Channel {channel_id}: not valid zip file - {message}'.format( channel_id=str(channel_id), message=contact_aims_msg)) return False try: if is_no_data_found(netcdf_tmp_file_path): logger.error( 'Channel {channel_id}: NO_DATA_FOUND file in Zip file -{message}' .format(channel_id=str(channel_id), message=contact_aims_msg)) return False if not modify_soop_trv_netcdf(netcdf_tmp_file_path, channel_id_info): logger.error( 'Channel {channel_id}:Could not modify the NetCDF file - \ {message}'.format(channel_id=str(channel_id), message=contact_aims_msg)) return False main_var = get_main_soop_trv_var(netcdf_tmp_file_path) if has_var_only_fill_value(netcdf_tmp_file_path, main_var): logger.error( 'Channel {channel_id}: _Fillvalues only in main variable - \ {message}'.format(channel_id=str(channel_id), message=contact_aims_msg)) return False if _is_lat_lon_values_outside_boundaries(netcdf_tmp_file_path): logger.error( 'Channel {channel_id}: Lat/Lon values outside of boundaries \ - {message}'.format(channel_id=str(channel_id), message=contact_aims_msg)) return False if not is_time_monotonic(netcdf_tmp_file_path): logger.error( 'Channel {channel_id}: TIME value is not strickly monotonic \ - {message}'.format(channel_id=str(channel_id), message=contact_aims_msg)) return False checker_retval = pass_netcdf_checker( netcdf_tmp_file_path, tests=['cf:latest', 'imos:1.3']) if not checker_retval: wip_path = os.environ.get('data_wip_path') logger.error( 'Channel {channel_id}: File does not pass CF/IMOS \ compliance checker - {message}'.format( channel_id=str(channel_id), message=contact_aims_msg)) shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, 'errors')) logger.error( 'File copied to %s for debugging' % (os.path.join(wip_path, 'errors', os.path.basename(netcdf_tmp_file_path)))) return False move_to_incoming(netcdf_tmp_file_path) return True finally: # ensure temporary file and it's parent directory are removed try: shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) except OSError as e: if e.errno != errno.ENOENT: raise else: logger.info( 'QC{level_qc} - Channel {channel_id}: already processed'.format( channel_id=str(channel_id), level_qc=str(level_qc))) return False
def process_monthly_channel(channel_id, aims_xml_info, level_qc): """ Downloads all the data available for one channel_id and moves the file to a wip_path dir channel_id(str) aims_xml_info(tuple) level_qc(int) aims_service : 1 -> FAIMMS data 100 -> SOOP TRV data 300 -> NRS DATA for monthly data download, only 1 and 300 should be use """ logger.info('>> QC%s - Processing channel %s' % (str(level_qc), str(channel_id))) channel_id_info = aims_xml_info[channel_id] from_date = channel_id_info['from_date'] thru_date = channel_id_info['thru_date'] [start_dates, end_dates] = create_list_of_dates_to_download(channel_id, level_qc, from_date, thru_date) if len(start_dates) != 0: # download monthly file for start_date, end_date in zip(start_dates, end_dates): start_date = start_date.strftime("%Y-%m-%dT%H:%M:%SZ") end_date = end_date.strftime("%Y-%m-%dT%H:%M:%SZ") netcdf_tmp_file_path = download_channel(channel_id, start_date, end_date, level_qc) contact_aims_msg = "Process of channel aborted - CONTACT AIMS" if netcdf_tmp_file_path is None: logger.error(' Channel %s - not valid zip file - %s' % (str(channel_id), contact_aims_msg)) break # NO_DATA_FOUND file only means there is no data for the selected time period. Could be some data afterwards if is_no_data_found(netcdf_tmp_file_path): logger.warning(' Channel %s - No data for the time period:%s - %s' % (str(channel_id), start_date, end_date)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) else: if is_time_var_empty(netcdf_tmp_file_path): logger.error(' Channel %s - No values in TIME variable - %s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) break if not modify_faimms_netcdf(netcdf_tmp_file_path, channel_id_info): logger.error(' Channel %s - Could not modify the NetCDF file - Process of channel aborted' % str(channel_id)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) break main_var = get_main_faimms_var(netcdf_tmp_file_path) if has_var_only_fill_value(netcdf_tmp_file_path, main_var): logger.error(' Channel %s - _Fillvalues only in main variable - %s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) break if get_faimms_site_name(netcdf_tmp_file_path) == [] or get_faimms_platform_type(netcdf_tmp_file_path) == []: logger.error(' Channel %s - Unknown site_code gatt value - %s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) break if not is_time_monotonic(netcdf_tmp_file_path): logger.error(' Channel %s - TIME value is not strickly monotonic - %s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) break # check every single file of the list. We don't assume that if one passes, all pass ... past proved this wip_path = DATA_WIP_PATH checker_retval = pass_netcdf_checker(netcdf_tmp_file_path, tests=['cf:latest', 'imos:1.3']) if not checker_retval: logger.error(' Channel %s - File does not pass CF/IMOS compliance checker - Process of channel aborted' % str(channel_id)) shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, 'errors')) logger.error(' File copied to %s for debugging' % (os.path.join(wip_path, 'errors', os.path.basename(netcdf_tmp_file_path)))) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) break netcdf_tmp_file_path = fix_data_code_from_filename(netcdf_tmp_file_path) netcdf_tmp_file_path = fix_provider_code_from_filename(netcdf_tmp_file_path, 'IMOS_FAIMMS') if re.search('IMOS_FAIMMS_[A-Z]{1}_', netcdf_tmp_file_path) is None: logger.error(' Channel %s - File name Data code does not pass REGEX - Process of channel aborted' % str(channel_id)) shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, 'errors')) logger.error(' File copied to %s for debugging' % (os.path.join(wip_path, 'errors', os.path.basename(netcdf_tmp_file_path)))) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) break move_to_tmp_incoming(netcdf_tmp_file_path) if TESTING: # The 2 next lines download the first month only for every single channel. This is only used for testing save_channel_info(channel_id, aims_xml_info, level_qc, end_date) break save_channel_info(channel_id, aims_xml_info, level_qc, end_date) else: logger.info('QC%s - Channel %s already up to date' % (str(level_qc), str(channel_id))) close_logger(logger)
def process_channel(channel_id, aims_xml_info, level_qc): """ Downloads all the data available for one channel_id and moves the file to a wip_path dir channel_id(str) aims_xml_info(dict) level_qc(int)""" channel_id_info = aims_xml_info[channel_id] from_date = channel_id_info['from_date'] thru_date = channel_id_info['thru_date'] thru_date_already_downloaded = get_last_downloaded_date_channel( channel_id, level_qc, from_date) if not has_channel_already_been_downloaded(channel_id, level_qc) or \ datetime.strptime(thru_date, "%Y-%m-%dT%H:%M:%SZ") > \ datetime.strptime(thru_date_already_downloaded, "%Y-%m-%dT%H:%M:%SZ"): logger.info('>> QC%s - Processing channel %s' % (str(level_qc), str(channel_id))) if datetime.strptime(thru_date, "%Y-%m-%dT%H:%M:%SZ") > \ datetime.strptime(thru_date_already_downloaded, "%Y-%m-%dT%H:%M:%SZ"): logger.info( '>> QC%s - New data available for channel %s.\nLatest date downloaded: %s' ' \nNew date available: %s' % (str(level_qc), str(channel_id), thru_date_already_downloaded, thru_date)) netcdf_tmp_file_path = download_channel(channel_id, from_date, thru_date, level_qc) contact_aims_msg = "Process of channel aborted - CONTACT AIMS" if netcdf_tmp_file_path is None: logger.error(' Channel %s - not valid zip file - %s' % (str(channel_id), contact_aims_msg)) return False if is_no_data_found(netcdf_tmp_file_path): logger.error( ' Channel %s - NO_DATA_FOUND file in Zip file - %s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) return False if not modify_soop_trv_netcdf(netcdf_tmp_file_path, channel_id_info): logger.error(' Channel %s - Could not modify the NetCDF file - \ %s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) return False main_var = get_main_soop_trv_var(netcdf_tmp_file_path) if has_var_only_fill_value(netcdf_tmp_file_path, main_var): logger.error(' Channel %s - _Fillvalues only in main variable - \ %s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) return False if _is_lat_lon_values_outside_boundaries(netcdf_tmp_file_path): logger.error( ' Channel %s - Lat/Lon values outside of boundaries \ -%s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) return False if not is_time_monotonic(netcdf_tmp_file_path): logger.error( ' Channel %s - TIME value is not strickly monotonic \ - %s' % (str(channel_id), contact_aims_msg)) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) return False checker_retval = pass_netcdf_checker(netcdf_tmp_file_path, tests=['cf:latest', 'imos:1.3']) if not checker_retval: wip_path = os.environ.get('data_wip_path') logger.error(' Channel %s - File does not pass CF/IMOS \ compliance checker - %s' % (str(channel_id), contact_aims_msg)) shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, 'errors')) logger.error(' File copied to %s for debugging' % (os.path.join( wip_path, 'errors', os.path.basename(netcdf_tmp_file_path)))) shutil.rmtree(os.path.dirname(netcdf_tmp_file_path)) return False move_to_incoming(netcdf_tmp_file_path) return True else: logger.info('>> QC%s - Channel %s already processed' % (str(level_qc), str(channel_id))) return False