Exemple #1
0
def ingest_netcdf_cds(internet_source, downloaded_file, processed_item):
    ingestion_status = False
    try:
        product = {
            "productcode": internet_source.productcode,
            "version": internet_source.version
        }

        # Datasource description
        datasource_descr = querydb.get_datasource_descr(
            source_type='INTERNET', source_id=internet_source.internet_id)
        datasource_descr = datasource_descr[0]
        # Get list of subproducts

        sub_datasource = ingestion.get_subrproducts_from_ingestion(
            product, datasource_descr.datasource_descr_id)
        ingestion_status = ingestion_netcdf.ingestion_netcdf(
            downloaded_file,
            processed_item.split(':')[0], product, sub_datasource,
            datasource_descr, logger)
    except Exception as inst:
        logger.debug("Error in CDS Ingestion: %s" % internet_source)
        raise

    return ingestion_status
Exemple #2
0
def process_list_matching_url(datasource_descr, product, subproducts, dates):
    #Read the IRI parameters from the file and build http url
    # Read the CDS parameters from the file.
    tmpdir = tempfile.mkdtemp(prefix=__name__,
                              suffix='_' +
                              datasource_descr.datasource_descr_id,
                              dir=es_constants.base_tmp_dir)
    parameter = read_parameter_file(datasource_descr.datasource_descr_id)
    internet_url = datasource_descr.url
    # try:
    parameter_url = build_parameter_http(parameter)
    processed_list = []
    processed_list_filename = es_constants.get_datastore_processed_list_prefix + datasource_descr.datasource_descr_id.replace(
        ":", "_") + '.list'
    processed_list = functions.restore_obj_from_json(processed_list,
                                                     processed_list_filename)
    for date in dates:
        # iri_month = date.strftime("%b")
        # iri_year = date.strftime("%Y")
        time_url = manage_IRI_time(date, datasource_descr.frequency_id)
        #Check if the file is already processed
        if not check_processed_list(parameter_url + time_url, processed_list):
            continue
        # import urllib   #python 3 urllib.parse.quote(query)
        # Manage dates depending on the datasource type TODO
        in_date = date.strftime("%Y%m%d")
        downloaded_file = tmpdir + '/' + in_date + '_' + product[
            'productcode'] + '.nc'
        # downloaded_file =
        file_downloaded = get_file(download_url=internet_url + parameter_url +
                                   urllib.parse.quote(time_url) + '/data.nc',
                                   target_path=downloaded_file)
        if not file_downloaded:
            logger.error('Error in downloading the file')
            continue
        # Move the file to cs folder
        # ingestion_status = ingestion_iri(datasource_descr, product, subproducts[0], in_date, downloaded_file, logger)

        ingestion_status = ingestion_netcdf.ingestion_netcdf(
            downloaded_file, in_date, product, subproducts, datasource_descr,
            logger)
        processed_list.append(parameter_url + time_url)
        functions.dump_obj_to_json(processed_list, processed_list_filename)
    shutil.rmtree(tmpdir)
    def debug_CDS_MSLP_hour_netcdf(self):
        internet_id = "CDS:ERA5:REANALYSIS:MSLP:HOUR"
        product = {"productcode": "era5-hourly-mslp", "version": "1.0"}
        downloaded_file = '/data/processing/era5-hourly-mslp/1.0/archive/202101010100_reanalysis-era5-single-levels_reanalysis_mean_sea_level_pressure.nc'
        in_date = '202101010000'
        # Datasource description
        datasource_descr = querydb.get_datasource_descr(source_type='INTERNET',
                                                        source_id=internet_id)
        datasource_descr = datasource_descr[0]
        # Get list of subproducts

        sub_datasource = ingestion.get_subrproducts_from_ingestion(
            product, datasource_descr.datasource_descr_id)

        ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file,
                                                             in_date,
                                                             product,
                                                             sub_datasource,
                                                             datasource_descr,
                                                             logger,
                                                             test_mode=True)
    def debug_CDS_SST_MONTH_netcdf(self):
        internet_id = "CDS:ERA5:REANALYSIS:SST:MONTH"
        product = {"productcode": "era5-monthly-sst", "version": "1.0"}
        downloaded_file = '/data/ingest/20210101_sst_monthly_average.nc'
        in_date = '202101010000'
        # Datasource description
        datasource_descr = querydb.get_datasource_descr(source_type='INTERNET',
                                                        source_id=internet_id)
        datasource_descr = datasource_descr[0]
        # Get list of subproducts

        sub_datasource = ingestion.get_subrproducts_from_ingestion(
            product, datasource_descr.datasource_descr_id)

        ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file,
                                                             in_date,
                                                             product,
                                                             sub_datasource,
                                                             datasource_descr,
                                                             logger,
                                                             test_mode=True)
    def debug_IRI_PRCP_1Month_ingest_netcdf(self):
        internet_id = "IRI:NOAA:PRCP:MONTH"  #'CDS:ERA5:REANALYSIS:SST:MONTH'
        product = {"productcode": "iri_prcp", "version": "1.0"}
        downloaded_file = '/data/processing/iri_prcp/1.0/archive/20210101-iri_prcp.nc'
        in_date = '20210101'
        # Datasource description
        datasource_descr = querydb.get_datasource_descr(source_type='INTERNET',
                                                        source_id=internet_id)
        datasource_descr = datasource_descr[0]
        # Get list of subproducts

        sub_datasource = ingestion.get_subrproducts_from_ingestion(
            product, datasource_descr.datasource_descr_id)

        ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file,
                                                             in_date,
                                                             product,
                                                             sub_datasource,
                                                             datasource_descr,
                                                             logger,
                                                             test_mode=True)
    def debug_CDS_RFE_DAY_netcdf(self):
        internet_id = "CDS:ERA5:REANALYSIS:RFE:DAY"
        product = {"productcode": "era5-rfe", "version": "1.0"}
        downloaded_file = '/data/ingest/202103200000_reanalysis-era5-single-levels_reanalysis_total_precipitation.nc'
        in_date = '202103200000'
        # Datasource description
        datasource_descr = querydb.get_datasource_descr(source_type='INTERNET',
                                                        source_id=internet_id)
        datasource_descr = datasource_descr[0]
        # Get list of subproducts

        sub_datasource = ingestion.get_subrproducts_from_ingestion(
            product, datasource_descr.datasource_descr_id)

        ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file,
                                                             in_date,
                                                             product,
                                                             sub_datasource,
                                                             datasource_descr,
                                                             logger,
                                                             test_mode=True)
    def debug_IRI_surfacetemp_1Month_ingest_netcdf(self):
        internet_id = "IRI:NOAA:SURFACETEMP:MONTH"  #'CDS:ERA5:REANALYSIS:SST:MONTH'
        product = {"productcode": "iri-surface-temp", "version": "1.0"}
        downloaded_file = '/tmp/climatestation/surface_temp_Jan_2020.nc'
        in_date = '20200101'
        # Datasource description
        datasource_descr = querydb.get_datasource_descr(source_type='INTERNET',
                                                        source_id=internet_id)
        datasource_descr = datasource_descr[0]
        # Get list of subproducts

        sub_datasource = ingestion.get_subrproducts_from_ingestion(
            product, datasource_descr.datasource_descr_id)

        ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file,
                                                             in_date,
                                                             product,
                                                             sub_datasource,
                                                             datasource_descr,
                                                             logger,
                                                             test_mode=True)