def Test_get_datasource_descr(self): datasource_descr = querydb.get_datasource_descr(source_type='EUMETCAST', source_id='EO:EUM:DAT:SPOT:S10NDVI') logger.info("Eumetcast source description is: %s", datasource_descr) for row in datasource_descr: print row datasource_descr = querydb.get_datasource_descr(source_type='INTERNET', source_id='USGS:EARLWRN:FEWSNET') logger.info("Internet source description is: %s", datasource_descr) for row in datasource_descr: print row self.assertEqual(1, 1)
def ingest_netcdf_cds(internet_source, downloaded_file, processed_item): ingestion_status = False try: product = { "productcode": internet_source.productcode, "version": internet_source.version } # Datasource description datasource_descr = querydb.get_datasource_descr( source_type='INTERNET', source_id=internet_source.internet_id) datasource_descr = datasource_descr[0] # Get list of subproducts sub_datasource = ingestion.get_subrproducts_from_ingestion( product, datasource_descr.datasource_descr_id) ingestion_status = ingestion_netcdf.ingestion_netcdf( downloaded_file, processed_item.split(':')[0], product, sub_datasource, datasource_descr, logger) except Exception as inst: logger.debug("Error in CDS Ingestion: %s" % internet_source) raise return ingestion_status
def ingest_jrc_wbd(input_dir, in_date=None, avg=None): if avg: date_fileslist = glob.glob(input_dir + '/JRC-WBD_AVG2000-' + in_date + '*') subproductcode = 'avg' mapsetcode = 'WD-GEE-ECOWAS-AVG' datasource_descrID = 'JRC:WBD:GEE:AVG' else: date_fileslist = glob.glob(input_dir + '/JRC-WBD_' + in_date + '*') subproductcode = 'occurr' mapsetcode = 'WD-GEE-ECOWAS-1' datasource_descrID = 'JRC:WBD:GEE' productcode = 'wd-gee' productversion = '1.0' product = {"productcode": productcode, "version": productversion} args = { "productcode": productcode, "subproductcode": subproductcode, "datasource_descr_id": datasource_descrID, "version": productversion } product_in_info = querydb.get_product_in_info(**args) re_process = product_in_info.re_process re_extract = product_in_info.re_extract sprod = { 'subproduct': subproductcode, 'mapsetcode': mapsetcode, 're_extract': re_extract, 're_process': re_process } subproducts = [] subproducts.append(sprod) output_file = es_constants.es2globals['processing_dir']+\ functions.set_path_sub_directory(productcode, subproductcode, 'Ingest', productversion, mapsetcode) +\ functions.set_path_filename(in_date,productcode,subproductcode,mapsetcode,productversion,'.tif') for internet_filter, datasource_descr in querydb.get_datasource_descr( source_type='INTERNET', source_id=datasource_descrID): ingestion.ingestion(date_fileslist, in_date, product, subproducts, datasource_descr, logger, echo_query=1) return output_file
def test_ingest_g_cls_ndvi_200_1Km(self): # Test Copernicus Products version 2.2 (starting with NDVI 2.2.1) productcode = 'vgt-ndvi' productversion = 'proba-v2.2' subproductcode = 'ndv' mapsetcode = 'SPOTV-Africa-1km' datasource_descrID = 'PDF:GLS:PROBA-V1:NDVI300' # input_dir = self.test_ingest_dir + os.path.sep + productcode + os.path.sep + self.native_dir #date_fileslist = [os.path.join(input_dir, 'c_gls_NDVI_202003010000_AFRI_PROBAV_V2.2.1.zip')] date_fileslist = glob.glob( '/eos/jeodpp/home/users/venkavi/data/processing/vgt-ndvi/sv2-pv2.2/archive/c_gls_NDVI300_202007110000_GLOBE_PROBAV_V1.0.1.nc*' ) in_date = '202007110000' out_date = '20200711' product = {"productcode": productcode, "version": productversion} args = { "productcode": productcode, "subproductcode": subproductcode, "datasource_descr_id": datasource_descrID, "version": productversion } product_in_info = querydb.get_product_in_info(**args) re_process = product_in_info.re_process re_extract = product_in_info.re_extract sprod = { 'subproduct': subproductcode, 'mapsetcode': mapsetcode, 're_extract': re_extract, 're_process': re_process } subproducts = [sprod] # Remove existing output # self.remove_output_file(productcode, subproductcode, productversion, mapsetcode, out_date) datasource_descr = querydb.get_datasource_descr( source_type='INTERNET', source_id=datasource_descrID) ingestion.ingestion(date_fileslist, in_date, product, subproducts, datasource_descr[0], logger, echo_query=1, test_mode=False) # status = self.checkIngestedFile(productcode=productcode, subproductcode=subproductcode, # version=productversion, mapsetcode=mapsetcode, date=out_date) self.assertEqual(1, 1)
def debug_CDS_MSLP_hour_netcdf(self): internet_id = "CDS:ERA5:REANALYSIS:MSLP:HOUR" product = {"productcode": "era5-hourly-mslp", "version": "1.0"} downloaded_file = '/data/processing/era5-hourly-mslp/1.0/archive/202101010100_reanalysis-era5-single-levels_reanalysis_mean_sea_level_pressure.nc' in_date = '202101010000' # Datasource description datasource_descr = querydb.get_datasource_descr(source_type='INTERNET', source_id=internet_id) datasource_descr = datasource_descr[0] # Get list of subproducts sub_datasource = ingestion.get_subrproducts_from_ingestion( product, datasource_descr.datasource_descr_id) ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file, in_date, product, sub_datasource, datasource_descr, logger, test_mode=True)
def debug_CDS_SST_MONTH_netcdf(self): internet_id = "CDS:ERA5:REANALYSIS:SST:MONTH" product = {"productcode": "era5-monthly-sst", "version": "1.0"} downloaded_file = '/data/ingest/20210101_sst_monthly_average.nc' in_date = '202101010000' # Datasource description datasource_descr = querydb.get_datasource_descr(source_type='INTERNET', source_id=internet_id) datasource_descr = datasource_descr[0] # Get list of subproducts sub_datasource = ingestion.get_subrproducts_from_ingestion( product, datasource_descr.datasource_descr_id) ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file, in_date, product, sub_datasource, datasource_descr, logger, test_mode=True)
def debug_IRI_PRCP_1Month_ingest_netcdf(self): internet_id = "IRI:NOAA:PRCP:MONTH" #'CDS:ERA5:REANALYSIS:SST:MONTH' product = {"productcode": "iri_prcp", "version": "1.0"} downloaded_file = '/data/processing/iri_prcp/1.0/archive/20210101-iri_prcp.nc' in_date = '20210101' # Datasource description datasource_descr = querydb.get_datasource_descr(source_type='INTERNET', source_id=internet_id) datasource_descr = datasource_descr[0] # Get list of subproducts sub_datasource = ingestion.get_subrproducts_from_ingestion( product, datasource_descr.datasource_descr_id) ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file, in_date, product, sub_datasource, datasource_descr, logger, test_mode=True)
def debug_CDS_RFE_DAY_netcdf(self): internet_id = "CDS:ERA5:REANALYSIS:RFE:DAY" product = {"productcode": "era5-rfe", "version": "1.0"} downloaded_file = '/data/ingest/202103200000_reanalysis-era5-single-levels_reanalysis_total_precipitation.nc' in_date = '202103200000' # Datasource description datasource_descr = querydb.get_datasource_descr(source_type='INTERNET', source_id=internet_id) datasource_descr = datasource_descr[0] # Get list of subproducts sub_datasource = ingestion.get_subrproducts_from_ingestion( product, datasource_descr.datasource_descr_id) ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file, in_date, product, sub_datasource, datasource_descr, logger, test_mode=True)
def debug_IRI_surfacetemp_1Month_ingest_netcdf(self): internet_id = "IRI:NOAA:SURFACETEMP:MONTH" #'CDS:ERA5:REANALYSIS:SST:MONTH' product = {"productcode": "iri-surface-temp", "version": "1.0"} downloaded_file = '/tmp/climatestation/surface_temp_Jan_2020.nc' in_date = '20200101' # Datasource description datasource_descr = querydb.get_datasource_descr(source_type='INTERNET', source_id=internet_id) datasource_descr = datasource_descr[0] # Get list of subproducts sub_datasource = ingestion.get_subrproducts_from_ingestion( product, datasource_descr.datasource_descr_id) ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file, in_date, product, sub_datasource, datasource_descr, logger, test_mode=True)
def iri_api_loop_internet(internet_source): logger_spec = log.my_logger('apps.get_datastore.' + internet_source.internet_id) if internet_source.user_name is None: user_name = "anonymous" else: user_name = internet_source.user_name if internet_source.password is None: password = "******" else: password = internet_source.password usr_pwd = str(user_name) + ':' + str(password) # Create the full filename from a 'template' which contains internet_url = str(internet_source.url) # processed_list = [] # processed_list_filename = es_constants.get_datastore_processed_list_prefix + internet_source.internet_id.replace(":", "_") + '.list' # processed_list = functions.restore_obj_from_json(processed_list, # processed_list_filename) try: # Check if template is dict or string them create resources_parameters # if type(template_paramater) is dict: # resources_parameters = template_paramater # else: # resources_parameters = json.loads(template_paramater) if internet_source.productcode is None or internet_source.version is None: logger.error("Product is not passed") return product = { "productcode": internet_source.productcode, "version": internet_source.version } # Datasource description datasource_descr = querydb.get_datasource_descr( source_type='INTERNET', source_id=internet_source.internet_id) datasource_descr = datasource_descr[0] # Get list of subproducts subproducts = ingestion.get_subrproducts_from_ingestion( product, datasource_descr.datasource_descr_id) dates = build_list_dates_generic(from_date=internet_source.start_date, to_date=internet_source.end_date, frequency_id=str( internet_source.frequency_id)) # Dates defined are dynamic not based on the configuration file iri_api.process_list_matching_url(datasource_descr, product, subproducts, dates) # functions.dump_obj_to_json(processed_list, processed_list_filename) except: logger.error("Error in IRI datastore service. Continue") b_error = True finally: logger.info("IRI datastore service Ending") current_list = [] return current_list
def loop_ingestion_drive(dry_run=False, test_one_product=None): echo_query = False # Get all active product ingestion records with a subproduct count. active_product_ingestions = querydb.get_ingestion_product(allrecs=True) for active_product_ingest in active_product_ingestions: productcode = active_product_ingest[0] productversion = active_product_ingest[1] # Verify the test-one-product case do_ingest_product = is_test_one_product(test_one_product, productcode) if do_ingest_product: logger.info("Ingestion active for product: [%s] subproduct N. %s" % (active_product_ingest[0], active_product_ingest[2])) # For the current active product ingestion: get all product = {"productcode": productcode, "version": productversion} logger.debug("Processing product: %s - version %s" % (productcode, productversion)) # Get the list of acquisition sources that are defined for this ingestion 'trigger' # (i.e. prod/version) # NOTE: the following implies there is 1 and only 1 '_native' subproduct associated to a 'product'; native_product = { "productcode": productcode, "subproductcode": productcode + "_native", "version": productversion } sources_list = querydb.get_product_sources(**native_product) logger.debug("For product [%s] N. %s source is/are found" % (productcode, len(sources_list))) systemsettings = functions.getSystemSettings() for source in sources_list: logger_spec = log.my_logger('apps.ingestion.' + productcode + '.' + productversion) logger.debug("Processing Source type [%s] with id [%s]" % (source.type, source.data_source_id)) # Re-initialize the datasource_descr # datasource_descr = None # Get datasource desctiption datasource_descr = querydb.get_datasource_descr( source_type=source.type, source_id=source.data_source_id) datasource_descr = datasource_descr[0] # TODO optimize this in order to get direct file filter expression my_filter_expr = get_filenaming_info(source, datasource_descr) files = get_files_matching_with_file_expression(my_filter_expr) # See ES2-204 logger_spec.debug( "Number of files found for product [%s] is: %s" % (active_product_ingest[0], len(files))) if len(files) > 0: # Get list of ingestions triggers [prod/subprod/mapset] ingestions = querydb.get_ingestion_subproduct( allrecs=False, **product) # Loop over ingestion triggers subproducts = list() for ingest in ingestions: # TODO if one ingest gives true and another false? dates_not_in_filename = is_date_not_in_filename( ingest.input_to_process_re) logger.debug(" --> processing subproduct: %s" % ingest.subproductcode) args = { "productcode": product['productcode'], "subproductcode": ingest.subproductcode, "datasource_descr_id": datasource_descr.datasource_descr_id, "version": product['version'] } product_in_info = querydb.get_product_in_info(**args) # TODO verify the approach Should we get subproduct from single query subproduct = get_subproduct( ingest, product_in_info, datasource_descr.datasource_descr_id) if subproduct is not None: subproducts.append(subproduct) if subproducts is None: #TODO what to do? logger.error( "For current active ingestion No subproducts for Product [%s] " % (productcode)) # Get the list of unique dates by extracting the date from all files. dates_list = get_list_unique_dates(datasource_descr, files, dates_not_in_filename, product_in_info, ingest.mapsetcode) # Loop over dates and get list of files for in_date in dates_list: date_fileslist = get_dates_file_list( dates_not_in_filename, files, my_filter_expr, in_date, logger_spec) # Pass list of files to ingestion routine if (not dry_run): try: result = ingestion(date_fileslist, in_date, product, subproducts, datasource_descr, logger_spec, echo_query=echo_query) except: logger.error( "Error in ingestion of file [%s] " % (functions.conv_list_2_string( date_fileslist))) else: # Result is None means we are still waiting for some files to be received. Keep files in /data/ingest # dates_not_in_filename means the input files contains many dates (e.g. GSOD precip) if result is not None and not dates_not_in_filename: if source.store_original_data or systemsettings[ 'type_installation'] == 'Server': store_native_files( product, date_fileslist, logger_spec) else: delete_files(date_fileslist, logger_spec) else: time.sleep(10)