Esempio n. 1
0
    def Test_get_datasource_descr(self):

        datasource_descr = querydb.get_datasource_descr(source_type='EUMETCAST',
                                                        source_id='EO:EUM:DAT:SPOT:S10NDVI')
        logger.info("Eumetcast source description is: %s", datasource_descr)
        for row in datasource_descr:
            print row

        datasource_descr = querydb.get_datasource_descr(source_type='INTERNET',
                                                        source_id='USGS:EARLWRN:FEWSNET')
        logger.info("Internet source description is: %s", datasource_descr)
        for row in datasource_descr:
            print row

        self.assertEqual(1, 1)
Esempio n. 2
0
    def Test_get_datasource_descr(self):

        datasource_descr = querydb.get_datasource_descr(source_type='EUMETCAST',
                                                        source_id='EO:EUM:DAT:SPOT:S10NDVI')
        logger.info("Eumetcast source description is: %s", datasource_descr)
        for row in datasource_descr:
            print row

        datasource_descr = querydb.get_datasource_descr(source_type='INTERNET',
                                                        source_id='USGS:EARLWRN:FEWSNET')
        logger.info("Internet source description is: %s", datasource_descr)
        for row in datasource_descr:
            print row

        self.assertEqual(1, 1)
Esempio n. 3
0
def ingest_netcdf_cds(internet_source, downloaded_file, processed_item):
    ingestion_status = False
    try:
        product = {
            "productcode": internet_source.productcode,
            "version": internet_source.version
        }

        # Datasource description
        datasource_descr = querydb.get_datasource_descr(
            source_type='INTERNET', source_id=internet_source.internet_id)
        datasource_descr = datasource_descr[0]
        # Get list of subproducts

        sub_datasource = ingestion.get_subrproducts_from_ingestion(
            product, datasource_descr.datasource_descr_id)
        ingestion_status = ingestion_netcdf.ingestion_netcdf(
            downloaded_file,
            processed_item.split(':')[0], product, sub_datasource,
            datasource_descr, logger)
    except Exception as inst:
        logger.debug("Error in CDS Ingestion: %s" % internet_source)
        raise

    return ingestion_status
Esempio n. 4
0
def ingest_jrc_wbd(input_dir, in_date=None, avg=None):

    if avg:
        date_fileslist = glob.glob(input_dir + '/JRC-WBD_AVG2000-' + in_date +
                                   '*')
        subproductcode = 'avg'
        mapsetcode = 'WD-GEE-ECOWAS-AVG'
        datasource_descrID = 'JRC:WBD:GEE:AVG'
    else:
        date_fileslist = glob.glob(input_dir + '/JRC-WBD_' + in_date + '*')
        subproductcode = 'occurr'
        mapsetcode = 'WD-GEE-ECOWAS-1'
        datasource_descrID = 'JRC:WBD:GEE'

    productcode = 'wd-gee'
    productversion = '1.0'

    product = {"productcode": productcode, "version": productversion}
    args = {
        "productcode": productcode,
        "subproductcode": subproductcode,
        "datasource_descr_id": datasource_descrID,
        "version": productversion
    }

    product_in_info = querydb.get_product_in_info(**args)

    re_process = product_in_info.re_process
    re_extract = product_in_info.re_extract

    sprod = {
        'subproduct': subproductcode,
        'mapsetcode': mapsetcode,
        're_extract': re_extract,
        're_process': re_process
    }

    subproducts = []
    subproducts.append(sprod)

    output_file = es_constants.es2globals['processing_dir']+\
                  functions.set_path_sub_directory(productcode, subproductcode, 'Ingest', productversion, mapsetcode) +\
                  functions.set_path_filename(in_date,productcode,subproductcode,mapsetcode,productversion,'.tif')

    for internet_filter, datasource_descr in querydb.get_datasource_descr(
            source_type='INTERNET', source_id=datasource_descrID):
        ingestion.ingestion(date_fileslist,
                            in_date,
                            product,
                            subproducts,
                            datasource_descr,
                            logger,
                            echo_query=1)

    return output_file
Esempio n. 5
0
    def test_ingest_g_cls_ndvi_200_1Km(self):

        # Test Copernicus Products version 2.2 (starting with NDVI 2.2.1)
        productcode = 'vgt-ndvi'
        productversion = 'proba-v2.2'
        subproductcode = 'ndv'
        mapsetcode = 'SPOTV-Africa-1km'
        datasource_descrID = 'PDF:GLS:PROBA-V1:NDVI300'
        # input_dir = self.test_ingest_dir + os.path.sep + productcode + os.path.sep + self.native_dir
        #date_fileslist = [os.path.join(input_dir, 'c_gls_NDVI_202003010000_AFRI_PROBAV_V2.2.1.zip')]
        date_fileslist = glob.glob(
            '/eos/jeodpp/home/users/venkavi/data/processing/vgt-ndvi/sv2-pv2.2/archive/c_gls_NDVI300_202007110000_GLOBE_PROBAV_V1.0.1.nc*'
        )
        in_date = '202007110000'
        out_date = '20200711'
        product = {"productcode": productcode, "version": productversion}
        args = {
            "productcode": productcode,
            "subproductcode": subproductcode,
            "datasource_descr_id": datasource_descrID,
            "version": productversion
        }

        product_in_info = querydb.get_product_in_info(**args)

        re_process = product_in_info.re_process
        re_extract = product_in_info.re_extract

        sprod = {
            'subproduct': subproductcode,
            'mapsetcode': mapsetcode,
            're_extract': re_extract,
            're_process': re_process
        }

        subproducts = [sprod]
        # Remove existing output
        # self.remove_output_file(productcode, subproductcode, productversion, mapsetcode, out_date)
        datasource_descr = querydb.get_datasource_descr(
            source_type='INTERNET', source_id=datasource_descrID)
        ingestion.ingestion(date_fileslist,
                            in_date,
                            product,
                            subproducts,
                            datasource_descr[0],
                            logger,
                            echo_query=1,
                            test_mode=False)

        # status = self.checkIngestedFile(productcode=productcode, subproductcode=subproductcode,
        #                                 version=productversion, mapsetcode=mapsetcode, date=out_date)
        self.assertEqual(1, 1)
    def debug_CDS_MSLP_hour_netcdf(self):
        internet_id = "CDS:ERA5:REANALYSIS:MSLP:HOUR"
        product = {"productcode": "era5-hourly-mslp", "version": "1.0"}
        downloaded_file = '/data/processing/era5-hourly-mslp/1.0/archive/202101010100_reanalysis-era5-single-levels_reanalysis_mean_sea_level_pressure.nc'
        in_date = '202101010000'
        # Datasource description
        datasource_descr = querydb.get_datasource_descr(source_type='INTERNET',
                                                        source_id=internet_id)
        datasource_descr = datasource_descr[0]
        # Get list of subproducts

        sub_datasource = ingestion.get_subrproducts_from_ingestion(
            product, datasource_descr.datasource_descr_id)

        ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file,
                                                             in_date,
                                                             product,
                                                             sub_datasource,
                                                             datasource_descr,
                                                             logger,
                                                             test_mode=True)
    def debug_CDS_SST_MONTH_netcdf(self):
        internet_id = "CDS:ERA5:REANALYSIS:SST:MONTH"
        product = {"productcode": "era5-monthly-sst", "version": "1.0"}
        downloaded_file = '/data/ingest/20210101_sst_monthly_average.nc'
        in_date = '202101010000'
        # Datasource description
        datasource_descr = querydb.get_datasource_descr(source_type='INTERNET',
                                                        source_id=internet_id)
        datasource_descr = datasource_descr[0]
        # Get list of subproducts

        sub_datasource = ingestion.get_subrproducts_from_ingestion(
            product, datasource_descr.datasource_descr_id)

        ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file,
                                                             in_date,
                                                             product,
                                                             sub_datasource,
                                                             datasource_descr,
                                                             logger,
                                                             test_mode=True)
    def debug_IRI_PRCP_1Month_ingest_netcdf(self):
        internet_id = "IRI:NOAA:PRCP:MONTH"  #'CDS:ERA5:REANALYSIS:SST:MONTH'
        product = {"productcode": "iri_prcp", "version": "1.0"}
        downloaded_file = '/data/processing/iri_prcp/1.0/archive/20210101-iri_prcp.nc'
        in_date = '20210101'
        # Datasource description
        datasource_descr = querydb.get_datasource_descr(source_type='INTERNET',
                                                        source_id=internet_id)
        datasource_descr = datasource_descr[0]
        # Get list of subproducts

        sub_datasource = ingestion.get_subrproducts_from_ingestion(
            product, datasource_descr.datasource_descr_id)

        ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file,
                                                             in_date,
                                                             product,
                                                             sub_datasource,
                                                             datasource_descr,
                                                             logger,
                                                             test_mode=True)
    def debug_CDS_RFE_DAY_netcdf(self):
        internet_id = "CDS:ERA5:REANALYSIS:RFE:DAY"
        product = {"productcode": "era5-rfe", "version": "1.0"}
        downloaded_file = '/data/ingest/202103200000_reanalysis-era5-single-levels_reanalysis_total_precipitation.nc'
        in_date = '202103200000'
        # Datasource description
        datasource_descr = querydb.get_datasource_descr(source_type='INTERNET',
                                                        source_id=internet_id)
        datasource_descr = datasource_descr[0]
        # Get list of subproducts

        sub_datasource = ingestion.get_subrproducts_from_ingestion(
            product, datasource_descr.datasource_descr_id)

        ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file,
                                                             in_date,
                                                             product,
                                                             sub_datasource,
                                                             datasource_descr,
                                                             logger,
                                                             test_mode=True)
    def debug_IRI_surfacetemp_1Month_ingest_netcdf(self):
        internet_id = "IRI:NOAA:SURFACETEMP:MONTH"  #'CDS:ERA5:REANALYSIS:SST:MONTH'
        product = {"productcode": "iri-surface-temp", "version": "1.0"}
        downloaded_file = '/tmp/climatestation/surface_temp_Jan_2020.nc'
        in_date = '20200101'
        # Datasource description
        datasource_descr = querydb.get_datasource_descr(source_type='INTERNET',
                                                        source_id=internet_id)
        datasource_descr = datasource_descr[0]
        # Get list of subproducts

        sub_datasource = ingestion.get_subrproducts_from_ingestion(
            product, datasource_descr.datasource_descr_id)

        ingestion_status = ingestion_netcdf.ingestion_netcdf(downloaded_file,
                                                             in_date,
                                                             product,
                                                             sub_datasource,
                                                             datasource_descr,
                                                             logger,
                                                             test_mode=True)
Esempio n. 11
0
def iri_api_loop_internet(internet_source):

    logger_spec = log.my_logger('apps.get_datastore.' +
                                internet_source.internet_id)

    if internet_source.user_name is None:
        user_name = "anonymous"
    else:
        user_name = internet_source.user_name

    if internet_source.password is None:
        password = "******"
    else:
        password = internet_source.password

    usr_pwd = str(user_name) + ':' + str(password)

    # Create the full filename from a 'template' which contains
    internet_url = str(internet_source.url)

    # processed_list = []
    # processed_list_filename = es_constants.get_datastore_processed_list_prefix + internet_source.internet_id.replace(":", "_") + '.list'
    # processed_list = functions.restore_obj_from_json(processed_list,
    # processed_list_filename)
    try:
        # Check if template is dict or string them create resources_parameters
        # if type(template_paramater) is dict:
        # resources_parameters = template_paramater
        # else:
        # resources_parameters = json.loads(template_paramater)
        if internet_source.productcode is None or internet_source.version is None:
            logger.error("Product is not passed")
            return

        product = {
            "productcode": internet_source.productcode,
            "version": internet_source.version
        }

        # Datasource description
        datasource_descr = querydb.get_datasource_descr(
            source_type='INTERNET', source_id=internet_source.internet_id)
        datasource_descr = datasource_descr[0]
        # Get list of subproducts

        subproducts = ingestion.get_subrproducts_from_ingestion(
            product, datasource_descr.datasource_descr_id)
        dates = build_list_dates_generic(from_date=internet_source.start_date,
                                         to_date=internet_source.end_date,
                                         frequency_id=str(
                                             internet_source.frequency_id))
        # Dates defined are dynamic not based on the configuration file
        iri_api.process_list_matching_url(datasource_descr, product,
                                          subproducts, dates)

        # functions.dump_obj_to_json(processed_list, processed_list_filename)

    except:
        logger.error("Error in IRI datastore service. Continue")
        b_error = True

    finally:
        logger.info("IRI datastore service Ending")
        current_list = []
        return current_list
Esempio n. 12
0
def loop_ingestion_drive(dry_run=False, test_one_product=None):
    echo_query = False
    # Get all active product ingestion records with a subproduct count.
    active_product_ingestions = querydb.get_ingestion_product(allrecs=True)

    for active_product_ingest in active_product_ingestions:

        productcode = active_product_ingest[0]
        productversion = active_product_ingest[1]

        # Verify the test-one-product case
        do_ingest_product = is_test_one_product(test_one_product, productcode)

        if do_ingest_product:
            logger.info("Ingestion active for product: [%s] subproduct N. %s" %
                        (active_product_ingest[0], active_product_ingest[2]))
            # For the current active product ingestion: get all
            product = {"productcode": productcode, "version": productversion}
            logger.debug("Processing product: %s - version %s" %
                         (productcode, productversion))

            # Get the list of acquisition sources that are defined for this ingestion 'trigger'
            # (i.e. prod/version)
            # NOTE: the following implies there is 1 and only 1 '_native' subproduct associated to a 'product';
            native_product = {
                "productcode": productcode,
                "subproductcode": productcode + "_native",
                "version": productversion
            }

            sources_list = querydb.get_product_sources(**native_product)

            logger.debug("For product [%s] N. %s  source is/are found" %
                         (productcode, len(sources_list)))

            systemsettings = functions.getSystemSettings()

            for source in sources_list:

                logger_spec = log.my_logger('apps.ingestion.' + productcode +
                                            '.' + productversion)
                logger.debug("Processing Source type [%s] with id [%s]" %
                             (source.type, source.data_source_id))
                # Re-initialize the datasource_descr
                # datasource_descr = None

                # Get datasource desctiption
                datasource_descr = querydb.get_datasource_descr(
                    source_type=source.type, source_id=source.data_source_id)
                datasource_descr = datasource_descr[0]
                # TODO optimize this in order to get direct file filter expression
                my_filter_expr = get_filenaming_info(source, datasource_descr)

                files = get_files_matching_with_file_expression(my_filter_expr)

                # See ES2-204
                logger_spec.debug(
                    "Number of files found for product [%s] is: %s" %
                    (active_product_ingest[0], len(files)))
                if len(files) > 0:
                    # Get list of ingestions triggers [prod/subprod/mapset]
                    ingestions = querydb.get_ingestion_subproduct(
                        allrecs=False, **product)

                    # Loop over ingestion triggers
                    subproducts = list()
                    for ingest in ingestions:
                        # TODO if one ingest gives true and another false?
                        dates_not_in_filename = is_date_not_in_filename(
                            ingest.input_to_process_re)
                        logger.debug(" --> processing subproduct: %s" %
                                     ingest.subproductcode)

                        args = {
                            "productcode": product['productcode'],
                            "subproductcode": ingest.subproductcode,
                            "datasource_descr_id":
                            datasource_descr.datasource_descr_id,
                            "version": product['version']
                        }
                        product_in_info = querydb.get_product_in_info(**args)
                        # TODO verify the approach Should we get subproduct from single query
                        subproduct = get_subproduct(
                            ingest, product_in_info,
                            datasource_descr.datasource_descr_id)
                        if subproduct is not None:
                            subproducts.append(subproduct)

                    if subproducts is None:
                        #TODO what to do?
                        logger.error(
                            "For current active ingestion No subproducts for Product [%s] "
                            % (productcode))

                    # Get the list of unique dates by extracting the date from all files.
                    dates_list = get_list_unique_dates(datasource_descr, files,
                                                       dates_not_in_filename,
                                                       product_in_info,
                                                       ingest.mapsetcode)

                    # Loop over dates and get list of files
                    for in_date in dates_list:
                        date_fileslist = get_dates_file_list(
                            dates_not_in_filename, files, my_filter_expr,
                            in_date, logger_spec)
                        # Pass list of files to ingestion routine
                        if (not dry_run):
                            try:
                                result = ingestion(date_fileslist,
                                                   in_date,
                                                   product,
                                                   subproducts,
                                                   datasource_descr,
                                                   logger_spec,
                                                   echo_query=echo_query)
                            except:
                                logger.error(
                                    "Error in ingestion of file [%s] " %
                                    (functions.conv_list_2_string(
                                        date_fileslist)))
                            else:
                                # Result is None means we are still waiting for some files to be received. Keep files in /data/ingest
                                # dates_not_in_filename means the input files contains many dates (e.g. GSOD precip)
                                if result is not None and not dates_not_in_filename:
                                    if source.store_original_data or systemsettings[
                                            'type_installation'] == 'Server':
                                        store_native_files(
                                            product, date_fileslist,
                                            logger_spec)
                                    else:
                                        delete_files(date_fileslist,
                                                     logger_spec)

                        else:
                            time.sleep(10)