Ejemplo n.º 1
0
    def Test_get_ingestion_subproduct(self):

        ingestion_subproduct = querydb.get_ingestion_subproduct(productcode='vgt_ndvi',
                                                                version='undefined')
        logger.info("All ingestions of product are: %s", ingestion_subproduct)
        for row in ingestion_subproduct:
            print row

        self.assertEqual(1, 1)
Ejemplo n.º 2
0
    def Test_get_ingestion_subproduct(self):

        ingestion_subproduct = querydb.get_ingestion_subproduct(productcode='vgt_ndvi',
                                                                version='undefined')
        logger.info("All ingestions of product are: %s", ingestion_subproduct)
        for row in ingestion_subproduct:
            print row

        self.assertEqual(1, 1)
Ejemplo n.º 3
0
def get_subrproducts_from_ingestion(product, datasource_descr_id):
    # Get list of ingestions triggers [prod/subprod/mapset]
    ingestions = querydb.get_ingestion_subproduct(allrecs=False, **product)

    # Loop over ingestion triggers
    subproducts = list()
    for ingest in ingestions:

        dates_not_in_filename = False
        if ingest.input_to_process_re == 'dates_not_in_filename':
            dates_not_in_filename = True

        logger.debug(" --> processing subproduct: %s" % ingest.subproductcode)
        args = {
            "productcode": product['productcode'],
            "subproductcode": ingest.subproductcode,
            "datasource_descr_id": datasource_descr_id,
            "version": product['version']
        }
        product_in_info = querydb.get_product_in_info(**args)
        try:
            re_process = product_in_info.re_process
            re_extract = product_in_info.re_extract
            nodata_value = product_in_info.no_data
            in_scale_factor = product_in_info.scale_factor
            in_offset = product_in_info.scale_offset
            sprod = {
                'subproduct': ingest.subproductcode,
                'mapsetcode': ingest.mapsetcode,
                're_extract': re_extract,
                're_process': re_process,
                'nodata': nodata_value,
                'in_scale_factor': in_scale_factor,
                'in_offset': in_offset
            }
            subproducts.append(sprod)
        except:
            logger.warning("Subproduct %s not defined for source %s" %
                           (ingest.subproductcode, datasource_descr_id))

    return subproducts
def ingest_archives_eumetcast(dry_run=False):

    #    Ingest the files in format MESA_JRC_<prod>_<sprod>_<date>_<mapset>_<version>
    #    disseminated by JRC through EUMETCast.
    #    Gets the list of products/version/subproducts active for ingestion and for processing
    #    Arguments: dry_run -> if 1, read tables and report activity ONLY

    logger.info("Entering routine %s" % 'ingest_archives_eumetcast')
    echo_query = False

    # Get all active product ingestion records with a subproduct count.
    active_product_ingestions = querydb.get_ingestion_product(allrecs=True)
    for active_product_ingest in active_product_ingestions:

        productcode = active_product_ingest[0]
        productversion = active_product_ingest[1]

        # For the current active product ingestion: get all
        product = {"productcode": productcode, "version": productversion}

        # Get the list of acquisition sources that are defined for this ingestion 'trigger'
        # (i.e. prod/version)
        # NOTE: the following implies there is 1 and only 1 '_native' subproduct associated to a 'subproduct';
        native_product = {
            "productcode": productcode,
            "subproductcode": productcode + "_native",
            "version": productversion
        }
        sources_list = querydb.get_product_sources(**native_product)

        logger.debug("For product [%s] N. %s  source is/are found" %
                     (productcode, len(sources_list)))

        ingestions = querydb.get_ingestion_subproduct(allrecs=False, **product)
        for ingest in ingestions:
            logger.debug(
                "Looking for product [%s]/version [%s]/subproducts [%s]/mapset [%s]"
                % (productcode, productversion, ingest.subproductcode,
                   ingest.mapsetcode))
            ingest_archives_eumetcast_product(productcode,
                                              productversion,
                                              ingest.subproductcode,
                                              ingest.mapsetcode,
                                              dry_run=dry_run)

    # Get all active processing chains [product/version/algo/mapset].
    active_processing_chains = querydb.get_active_processing_chains()
    for chain in active_processing_chains:
        a = chain.process_id
        logger.debug("Processing Chain N.:%s" % str(chain.process_id))
        processed_products = querydb.get_processing_chain_products(
            chain.process_id, type='output')
        for processed_product in processed_products:
            productcode = processed_product.productcode
            version = processed_product.version
            subproductcode = processed_product.subproductcode
            mapset = processed_product.mapsetcode
            logger.debug(
                "Looking for product [%s]/version [%s]/subproducts [%s]/mapset [%s]"
                % (productcode, version, subproductcode, mapset))
            ingest_archives_eumetcast_product(productcode,
                                              version,
                                              subproductcode,
                                              mapset,
                                              dry_run=dry_run)

    # Get the list of files that have been treated
    working_dir = es_constants.es2globals[
        'base_tmp_dir'] + os.path.sep + 'ingested_files'
    trace_files = glob.glob(working_dir + os.path.sep + '*tbd')
    if len(trace_files) > 0:
        for trace in trace_files:
            filename = os.path.basename(trace).strip('.tdb')
            file_path = es_constants.es2globals[
                'ingest_dir'] + os.path.sep + filename
            logger.debug("Removing file %s/" % file_path)
            if os.path.isfile(file_path):
                os.remove(file_path)
            # Remove trace file also
            os.remove(trace)
Ejemplo n.º 5
0
    def list_all_ingested_and_derived_subproducts_mapsets(self):

        # Initialize list of prod/version/subprods/mapset to be returned
        list_ingested_and_derived_subproducts = []

        # Get all active product ingestion records with a subproduct count.
        active_product_ingestions = querydb.get_ingestion_product(
            productcode=self.product_code, version=self.version)
        # Convert tuple to list (if 1 tuple is returned)
        if isinstance(active_product_ingestions, tuple):
            my_list = []
            my_list.append(active_product_ingestions)
            active_product_ingestions = my_list

        for active_product_ingest in active_product_ingestions:

            # For the current active product ingestion: get all
            product = {
                "productcode": self.product_code,
                "version": self.version
            }

            # Get the list of acquisition sources that are defined for this ingestion 'trigger'
            # (i.e. prod/version)
            # NOTE: the following implies there is 1 and only 1 '_native' subproduct associated to a 'subproduct';
            native_product = {
                "productcode": self.product_code,
                "subproductcode": self.product_code + "_native",
                "version": self.version
            }
            sources_list = querydb.get_product_sources(**native_product)
            logger.debug("For product [%s] N. %s  source is/are found" %
                         (self.product_code, len(sources_list)))

            ingestions = querydb.get_ingestion_subproduct(allrecs=False,
                                                          **product)
            for ingest in ingestions:
                my_tuple = {
                    "productcode": self.product_code,
                    "subproductcode": ingest.subproductcode,
                    "version": self.version,
                    "mapset": ingest.mapsetcode
                }
                logger.debug(
                    "Looking for product [%s]/version [%s]/subproducts [%s]/mapset [%s]"
                    % (self.product_code, self.version, ingest.subproductcode,
                       ingest.mapsetcode))
                list_ingested_and_derived_subproducts.append(my_tuple)

        # Get all active processing chains [product/version/algo/mapset].
        active_processing_chains = querydb.get_active_processing_chains()
        for chain in active_processing_chains:
            a = chain.process_id
            logger.debug("Processing Chain N.:%s" % str(chain.process_id))
            processed_products = querydb.get_processing_chain_products(
                chain.process_id, type='output')
            for processed_product in processed_products:
                if processed_product.productcode == self.product_code and processed_product.version == self.version:
                    my_tuple = {
                        "productcode": self.product_code,
                        "subproductcode": processed_product.subproductcode,
                        "version": self.version,
                        "mapset": processed_product.mapsetcode
                    }

                    logger.debug(
                        "Looking for product [%s]/version [%s]/subproducts [%s]/mapset [%s]"
                        % (self.product_code, self.version,
                           processed_product.subproductcode,
                           processed_product.mapsetcode))
                    list_ingested_and_derived_subproducts.append(my_tuple)

        return list_ingested_and_derived_subproducts
Ejemplo n.º 6
0
def loop_ingestion_drive(dry_run=False, test_one_product=None):
    echo_query = False
    # Get all active product ingestion records with a subproduct count.
    active_product_ingestions = querydb.get_ingestion_product(allrecs=True)

    for active_product_ingest in active_product_ingestions:

        productcode = active_product_ingest[0]
        productversion = active_product_ingest[1]

        # Verify the test-one-product case
        do_ingest_product = is_test_one_product(test_one_product, productcode)

        if do_ingest_product:
            logger.info("Ingestion active for product: [%s] subproduct N. %s" %
                        (active_product_ingest[0], active_product_ingest[2]))
            # For the current active product ingestion: get all
            product = {"productcode": productcode, "version": productversion}
            logger.debug("Processing product: %s - version %s" %
                         (productcode, productversion))

            # Get the list of acquisition sources that are defined for this ingestion 'trigger'
            # (i.e. prod/version)
            # NOTE: the following implies there is 1 and only 1 '_native' subproduct associated to a 'product';
            native_product = {
                "productcode": productcode,
                "subproductcode": productcode + "_native",
                "version": productversion
            }

            sources_list = querydb.get_product_sources(**native_product)

            logger.debug("For product [%s] N. %s  source is/are found" %
                         (productcode, len(sources_list)))

            systemsettings = functions.getSystemSettings()

            for source in sources_list:

                logger_spec = log.my_logger('apps.ingestion.' + productcode +
                                            '.' + productversion)
                logger.debug("Processing Source type [%s] with id [%s]" %
                             (source.type, source.data_source_id))
                # Re-initialize the datasource_descr
                # datasource_descr = None

                # Get datasource desctiption
                datasource_descr = querydb.get_datasource_descr(
                    source_type=source.type, source_id=source.data_source_id)
                datasource_descr = datasource_descr[0]
                # TODO optimize this in order to get direct file filter expression
                my_filter_expr = get_filenaming_info(source, datasource_descr)

                files = get_files_matching_with_file_expression(my_filter_expr)

                # See ES2-204
                logger_spec.debug(
                    "Number of files found for product [%s] is: %s" %
                    (active_product_ingest[0], len(files)))
                if len(files) > 0:
                    # Get list of ingestions triggers [prod/subprod/mapset]
                    ingestions = querydb.get_ingestion_subproduct(
                        allrecs=False, **product)

                    # Loop over ingestion triggers
                    subproducts = list()
                    for ingest in ingestions:
                        # TODO if one ingest gives true and another false?
                        dates_not_in_filename = is_date_not_in_filename(
                            ingest.input_to_process_re)
                        logger.debug(" --> processing subproduct: %s" %
                                     ingest.subproductcode)

                        args = {
                            "productcode": product['productcode'],
                            "subproductcode": ingest.subproductcode,
                            "datasource_descr_id":
                            datasource_descr.datasource_descr_id,
                            "version": product['version']
                        }
                        product_in_info = querydb.get_product_in_info(**args)
                        # TODO verify the approach Should we get subproduct from single query
                        subproduct = get_subproduct(
                            ingest, product_in_info,
                            datasource_descr.datasource_descr_id)
                        if subproduct is not None:
                            subproducts.append(subproduct)

                    if subproducts is None:
                        #TODO what to do?
                        logger.error(
                            "For current active ingestion No subproducts for Product [%s] "
                            % (productcode))

                    # Get the list of unique dates by extracting the date from all files.
                    dates_list = get_list_unique_dates(datasource_descr, files,
                                                       dates_not_in_filename,
                                                       product_in_info,
                                                       ingest.mapsetcode)

                    # Loop over dates and get list of files
                    for in_date in dates_list:
                        date_fileslist = get_dates_file_list(
                            dates_not_in_filename, files, my_filter_expr,
                            in_date, logger_spec)
                        # Pass list of files to ingestion routine
                        if (not dry_run):
                            try:
                                result = ingestion(date_fileslist,
                                                   in_date,
                                                   product,
                                                   subproducts,
                                                   datasource_descr,
                                                   logger_spec,
                                                   echo_query=echo_query)
                            except:
                                logger.error(
                                    "Error in ingestion of file [%s] " %
                                    (functions.conv_list_2_string(
                                        date_fileslist)))
                            else:
                                # Result is None means we are still waiting for some files to be received. Keep files in /data/ingest
                                # dates_not_in_filename means the input files contains many dates (e.g. GSOD precip)
                                if result is not None and not dates_not_in_filename:
                                    if source.store_original_data or systemsettings[
                                            'type_installation'] == 'Server':
                                        store_native_files(
                                            product, date_fileslist,
                                            logger_spec)
                                    else:
                                        delete_files(date_fileslist,
                                                     logger_spec)

                        else:
                            time.sleep(10)