Ejemplo n.º 1
0
def build_list_dates_generic(from_date, to_date, frequency_id):
    # Add a check on frequency
    try:
        frequency = datasets.Dataset.get_frequency(
            frequency_id, datasets.Frequency.DATEFORMAT.DATETIME)
    except Exception as inst:
        logger.debug("Error in datasets.Dataset.get_frequency: %s" %
                     inst.args[0])
        raise

    # Manage the start_date (mandatory).
    try:
        # If it is a date, convert to datetime
        if functions.is_date_yyyymmdd(str(from_date), silent=True):
            datetime_start = datetime.datetime.strptime(
                str(from_date), '%Y%m%d')
        else:
            # If it is a negative number, subtract from current date
            if isinstance(from_date, int) or isinstance(from_date, int):
                if from_date < 0:
                    datetime_start = datetime.datetime.today(
                    ) - datetime.timedelta(days=-from_date)
            else:
                logger.debug("Error in Start Date: must be YYYYMMDD or -Ndays")
                raise Exception("Start Date not valid")
    except:
        raise Exception("Start Date not valid")

    # Manage the end_date (mandatory).
    try:
        if functions.is_date_yyyymmdd(str(to_date), silent=True):
            datetime_end = datetime.datetime.strptime(str(to_date), '%Y%m%d')
        # If it is a negative number, subtract from current date
        elif isinstance(to_date, int) or isinstance(to_date, int):
            if to_date < 0:
                datetime_end = datetime.datetime.today() - datetime.timedelta(
                    days=-to_date)
            elif to_date > 0:
                datetime_end = datetime.datetime.today() + datetime.timedelta(
                    days=to_date)
        else:
            datetime_end = datetime.datetime.today()
    except:
        pass

    try:
        dates = frequency.get_dates(datetime_start, datetime_end)
    except Exception as inst:
        logger.debug("Error in frequency.get_dates: %s" % inst.args[0])
        raise

    return dates
Ejemplo n.º 2
0
def syncGeoserver():
    #
    #   Copy some 'relevant' datasets to GeoServer
    #   Selection of datasets is done on the basis of the product.geoserver table
    #

    # Get list of all 'relevant' subproducts (see 2. above)
    list_active_geoserver = esTools.get_activated_geoserver()

    # Loop over existing sub_products
    for geoserver_sprod in list_active_geoserver:

        # Extract local variable:
        my_prod = geoserver_sprod.productcode
        my_subprod = geoserver_sprod.subproductcode
        my_version = geoserver_sprod.version
        start_date = geoserver_sprod.startdate
        end_date = geoserver_sprod.enddate

        logger.info("Working on Product/Subproduct/Version: {0}/{1}/{2}".format(my_prod, my_subprod, my_version))

        # Manage dates from bigint to datetime
        if functions.is_date_yyyymmdd(str(start_date), silent=True):
            date_start = datetime.datetime.strptime(str(start_date), '%Y%m%d').date()
        else:
            date_start = None

        if functions.is_date_yyyymmdd(str(end_date), silent=True):
            date_end = datetime.datetime.strptime(str(end_date), '%Y%m%d').date()
        else:
            date_end = None

        # Get additional products info
        product_info = querydb.get_product_out_info(productcode=my_prod,
                                                    subproductcode=my_subprod,
                                                    version=my_version)

        # my_mapset   = subprod.mapsetcode
        my_type = product_info[0].product_type
        my_category = product_info[0].category_id

        # Create a Product object (to get mapsets)
        my_product = products.Product(my_prod, version=my_version)
        my_mapsets = my_product.mapsets

        if len(my_mapsets) > 1:
            logger.info('More than 1 mapset exists. Take the first')

        if len(my_mapsets) == 0:
            logger.warning('No any mapset exists. Skip.')
            continue

        my_mapset = my_mapsets[0]

        # Create a Dataset object (to get file list)
        # If data_start is not set (e.g. for 10davg prod) create w/o dates
        if date_start:
            my_dataset = datasets.Dataset(my_prod, my_subprod, my_mapset, version=my_version, from_date=date_start,
                                          to_date=date_end)
            if my_dataset._frequency.dateformat == 'MMDD':
                logger.warning('Product of type MMDD: date specification not supported. Skip.')
                continue
            file_list = my_dataset.get_filenames_range()
        else:
            my_dataset = datasets.Dataset(my_prod, my_subprod, my_mapset, version=my_version)
            file_list = my_dataset.get_filenames()

        # Check that there is at least 1 file
        if len(file_list) > 0:
            # Check the Workspace exists, or create it
            my_workspace = esTools.setWorkspaceName(my_category, my_prod, my_subprod, my_version, my_mapset,
                                                    nameType=geoserverREST.geoserverWorkspaceName)

            if not geoserverREST.isWorkspace(my_workspace):
                geoserverREST.createWorkspace(my_workspace)

            # Loop over files and upload
            for my_file in file_list:
                my_date = functions.get_date_from_path_full(my_file)

                # if subprod in list_active_subprods:
                logger.debug("Working on Product/Subproduct/Version/Mapset/Date: {0}/{1}/{2}/{3}/{4}".format(
                    my_prod, my_subprod, my_version, my_mapset, my_date))

                # Upload the file and register
                esTools.uploadAndRegisterRaster(my_category, my_prod, my_subprod, my_version, my_mapset, my_date,
                                                my_type, local_data_dir)
Ejemplo n.º 3
0
def loop_get_internet(dry_run=False):

    global processed_list_filename, processed_list
    global processed_info_filename, processed_info

    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGILL, signal_handler)

    logger.info("Starting retrieving data from INTERNET.")

    while True:
        output_dir = es_constants.ingest_dir
        logger.debug("Check if the Ingest Server input directory : %s exists.", output_dir)
        if not os.path.exists(output_dir):
            logger.fatal("The Ingest Server input directory : %s doesn't exists.", output_dir)
            exit(1)

        if not os.path.exists(es_constants.processed_list_int_dir):
            os.mkdir(es_constants.processed_list_int_dir)

        while 1:

            try:
                time_sleep = user_def_sleep
                logger.debug("Sleep time set to : %s.", time_sleep)
            except:
                logger.warning("Sleep time not defined. Setting to default=1min. Continue.")
                time_sleep = 60

            logger.debug("Reading active INTERNET data sources from database")
            internet_sources_list = querydb.get_active_internet_sources(echo=echo_query)

            # Loop over active triggers
            try:
              for internet_source in internet_sources_list:
                logger.debug("Processing internet source  %s.", internet_source.descriptive_name)

                processed_list_filename = es_constants.get_internet_processed_list_prefix+str(internet_source.internet_id)+'.list'
                processed_info_filename = es_constants.get_internet_processed_list_prefix+str(internet_source.internet_id)+'.info'

                # Create objects for list and info
                processed_list = []
                processed_info = {'length_proc_list': 0,
                                  'time_latest_exec': datetime.datetime.now(),
                                  'time_latest_copy': datetime.datetime.now()}
                # Restore/Create List
                processed_list=functions.restore_obj_from_pickle(processed_list, processed_list_filename)
                # Restore/Create Info
                processed_info=functions.restore_obj_from_pickle(processed_info, processed_info_filename)
                # Update processing time (in case it is restored)
                processed_info['time_latest_exec']=datetime.datetime.now()

                logger.debug("Create current list of file to process for source %s.", internet_source.internet_id)
                if internet_source.user_name is None:
                    user_name = "anonymous"
                else:
                    user_name = internet_source.user_name
                
                if internet_source.password is None:
                    password = "******"
                else:
                    password = internet_source.password
                    
                usr_pwd = str(user_name)+':'+str(password)
                
                logger.debug("              Url is %s.", internet_source.url)
                logger.debug("              usr/pwd is %s.", usr_pwd)
                logger.debug("              regex   is %s.", internet_source.include_files_expression)

                internet_type = internet_source.type

                if internet_type == 'ftp':
                    # Note that the following list might contain sub-dirs (it reflects full_regex)
                    current_list = get_list_matching_files_dir_ftp(str(internet_source.url), str(usr_pwd), str(internet_source.include_files_expression))

                elif internet_type == 'http_tmpl':
                    # Manage the dates:start_date is mandatory .. end_date replaced by 'today' if missing/wrong
                    try:
                      if functions.is_date_yyyymmdd(str(internet_source.start_date), silent=True):
                        datetime_start=datetime.datetime.strptime(str(internet_source.start_date),'%Y%m%d')
                      else:
                        raise Exception("Start Date not valid")
                    except:
                        raise Exception("Start Date not valid")
                    try:
                      if functions.is_date_yyyymmdd(str(internet_source.end_date), silent=True):
                        datetime_end=datetime.datetime.strptime(str(internet_source.end_date),'%Y%m%d')
                      else:
                        datetime_end=datetime.datetime.today()
                    except:
                        pass
                    # Create the full filename from a 'template' which contains
                    try:
                        current_list = build_list_matching_for_http(str(internet_source.url),
                                                                    str(internet_source.include_files_expression),
                                                                    datetime_start,
                                                                    datetime_end,
                                                                    str(internet_source.frequency_id))
                    except:
                         logger.error("Error in creating date lists. Continue")

                logger.debug("Number of files currently available for source %s is %i", internet_source.internet_id, len(current_list))
                if len(current_list) > 0:
                    logger.debug("Number of files already copied for trigger %s is %i", internet_source.internet_id, len(processed_list))
                    listtoprocess = []
                    for current_file in current_list:
                        if len(processed_list) == 0:
                            listtoprocess.append(current_file)
                        else:
                            #if os.path.basename(current_file) not in processed_list: -> save in .list subdirs as well !!
                            if current_file not in processed_list:
                                listtoprocess.append(current_file)

                    logger.debug("Number of files to be copied for trigger %s is %i", internet_source.internet_id, len(listtoprocess))
                    if listtoprocess != set([]):
                         logger.debug("Loop on the found files.")
                         if not dry_run:
                             for filename in list(listtoprocess):
                                 logger.debug("Processing file: "+str(internet_source.url)+os.path.sep+filename)
                                 try:
                                    result = get_file_from_url(str(internet_source.url)+os.path.sep+filename, target_file=os.path.basename(filename), target_dir=es_constants.ingest_dir, userpwd=str(usr_pwd))
                                    if not result:
                                        logger.info("File %s copied.", filename)
                                        processed_list.append(filename)
                                 except:
                                   logger.warning("Problem while copying file: %s.", filename)
                         else:
                             logger.info('Dry_run is set: do not get files')

                if not dry_run:
                    functions.dump_obj_to_pickle(processed_list, processed_list_filename)
                    functions.dump_obj_to_pickle(processed_info, processed_info_filename)

              sleep(float(user_def_sleep))
            # Loop over sources
            except Exception as inst:
              logger.error("Error while processing source %s. Continue" % internet_source.descriptive_name)
              sleep(float(user_def_sleep))

    exit(0)
def define_output_data_format(datasource_descr, in_date, out_date_format):
    # Convert the in_date format into a convenient one for DB and file naming
    # (i.e YYYYMMDD or YYYYMMDDHHMM)
    # Initialize to error value
    output_date_str = -1

    if datasource_descr.date_format == 'YYYYMMDD':
        if functions.is_date_yyyymmdd(in_date):
            output_date_str = in_date
        else:
            output_date_str = -1

    if datasource_descr.date_format == 'YYYYMMDDHHMM':
        if functions.is_date_yyyymmddhhmm(in_date):
            output_date_str = in_date
        else:
            output_date_str = -1

    if datasource_descr.date_format == 'YYYYDOY_YYYYDOY':
        output_date_str = functions.conv_date_yyyydoy_2_yyyymmdd(
            str(in_date)[0:7])

    if datasource_descr.date_format == 'YYYYMMDD_YYYYMMDD':
        output_date_str = str(in_date)[0:8]
        if not functions.is_date_yyyymmdd(output_date_str):
            output_date_str = -1

    if datasource_descr.date_format == 'YYYYDOY':
        output_date_str = functions.conv_date_yyyydoy_2_yyyymmdd(in_date)

    if datasource_descr.date_format == 'YYYY_MM_DKX':
        output_date_str = functions.conv_yyyy_mm_dkx_2_yyyymmdd(in_date)

    if datasource_descr.date_format == 'YYMMK':
        output_date_str = functions.conv_yymmk_2_yyyymmdd(in_date)

    if datasource_descr.date_format == 'YYYYdMMdK':
        output_date_str = functions.conv_yyyydmmdk_2_yyyymmdd(in_date)

    if datasource_descr.date_format == 'YYYYMMDD_G2':
        # The date (e.g. 20151103) is converted to the dekad it belongs to (e.g. 20151101)
        output_date_str = functions.conv_yyyymmdd_g2_2_yyyymmdd(in_date)

    if datasource_descr.date_format == 'MMDD':
        output_date_str = str(in_date)

    if datasource_descr.date_format == 'YYYYMM':
        # Convert from YYYYMM -> YYYYMMDD
        output_date_str = str(in_date) + '01'

    if datasource_descr.date_format == 'YYYY_DK':
        # The date (e.g. 2020_36) is converted to the dekad it belongs to (e.g. 20201221)
        output_date_str = functions.conv_yyyydk_2_yyyymmdd(in_date)

    if output_date_str == -1:
        out_date_str_final = in_date + '_DATE_ERROR_'
    else:
        if out_date_format == 'YYYYMMDDHHMM':
            if functions.is_date_yyyymmddhhmm(output_date_str):
                out_date_str_final = output_date_str
            elif functions.is_date_yyyymmdd(output_date_str):
                out_date_str_final = output_date_str + '0000'
        elif out_date_format == 'YYYYMMDD':
            if functions.is_date_yyyymmdd(output_date_str, silent=True):
                out_date_str_final = output_date_str
            elif functions.is_date_yyyymmddhhmm(output_date_str):
                out_date_str_final = output_date_str[0:8]
        elif out_date_format == 'MMDD':
            if functions.is_date_mmdd(output_date_str):
                out_date_str_final = output_date_str

    return out_date_str_final
Ejemplo n.º 5
0
def build_list_matching_files_motu(base_url, template, from_date, to_date,
                                   frequency_id, username, password,
                                   files_filter_expression):

    # Add a check on frequency
    try:
        frequency = datasets.Dataset.get_frequency(
            frequency_id, datasets.Frequency.DATEFORMAT.DATETIME)
    except Exception as inst:
        logger.debug("Error in datasets.Dataset.get_frequency: %s" %
                     inst.args[0])
        raise

    # Manage the start_date (mandatory).
    try:
        # If it is a date, convert to datetime
        if functions.is_date_yyyymmdd(str(from_date), silent=True):
            datetime_start = datetime.datetime.strptime(
                str(from_date), '%Y%m%d')
        else:
            # If it is a negative number, subtract from current date
            if isinstance(from_date, int) or isinstance(from_date, long):
                if from_date < 0:
                    datetime_start = datetime.datetime.today(
                    ) - datetime.timedelta(days=-from_date)
            else:
                logger.debug("Error in Start Date: must be YYYYMMDD or -Ndays")
                raise Exception("Start Date not valid")
    except:
        raise Exception("Start Date not valid")

    # Manage the end_date (mandatory).
    try:
        if functions.is_date_yyyymmdd(str(to_date), silent=True):
            datetime_end = datetime.datetime.strptime(str(to_date), '%Y%m%d')
        # If it is a negative number, subtract from current date
        elif isinstance(to_date, int) or isinstance(to_date, long):
            if to_date < 0:
                datetime_end = datetime.datetime.today() - datetime.timedelta(
                    days=-to_date)
            elif to_date > 0:
                datetime_end = datetime.datetime.today() + datetime.timedelta(
                    days=to_date)
        else:
            datetime_end = datetime.datetime.today()
    except:
        pass

    try:
        dates = frequency.get_dates(datetime_start, datetime_end)
    except Exception as inst:
        logger.debug("Error in frequency.get_dates: %s" % inst.args[0])
        raise

    try:
        list_filenames = motu_api.motu_4_dates(dates, template, base_url,
                                               username, password,
                                               files_filter_expression)
        #list_filenames = frequency.get_internet_dates(dates, template)
    except Exception as inst:
        logger.debug("Error in motu_api.motu_getlists: %s" % inst.args[0])
        raise

    return list_filenames
Ejemplo n.º 6
0
def build_list_matching_files_tmpl(base_url, template, from_date, to_date,
                                   frequency_id):

    # Add a check on frequency
    try:
        frequency = datasets.Dataset.get_frequency(
            frequency_id, datasets.Frequency.DATEFORMAT.DATETIME)
    except Exception as inst:
        logger.debug("Error in datasets.Dataset.get_frequency: %s" %
                     inst.args[0])
        raise

    # Manage the start_date (mandatory).
    try:
        # If it is a date, convert to datetime
        if functions.is_date_yyyymmdd(str(from_date), silent=True):
            datetime_start = datetime.datetime.strptime(
                str(from_date), '%Y%m%d')
        else:
            # If it is a negative number, subtract from current date
            if isinstance(from_date, int) or isinstance(from_date, long):
                if from_date < 0:
                    datetime_start = datetime.datetime.today(
                    ) - datetime.timedelta(days=-from_date)
            else:
                logger.debug("Error in Start Date: must be YYYYMMDD or -Ndays")
                raise Exception("Start Date not valid")
    except:
        raise Exception("Start Date not valid")

    # Manage the end_date (mandatory).
    try:
        if functions.is_date_yyyymmdd(str(to_date), silent=True):
            datetime_end = datetime.datetime.strptime(str(to_date), '%Y%m%d')
        # If it is a negative number, subtract from current date
        elif isinstance(to_date, int) or isinstance(to_date, long):
            if to_date < 0:
                datetime_end = datetime.datetime.today() - datetime.timedelta(
                    days=-to_date)
        else:
            datetime_end = datetime.datetime.today()
    except:
        pass

    try:
        dates = frequency.get_dates(datetime_start, datetime_end)
    except Exception as inst:
        logger.debug("Error in frequency.get_dates: %s" % inst.args[0])
        raise

    try:
        if sys.platform == 'win32':
            template.replace("-", "#")
        list_filenames = frequency.get_internet_dates(dates, template)
    except Exception as inst:
        logger.debug("Error in frequency.get_internet_dates: %s" %
                     inst.args[0])
        raise

    return list_filenames

    ######################################################################################
    #   build_list_matching_files_sentinel_sat
    #   Purpose: return the list of file names matching a 'template' with 'date' placeholders
    #            It is the entry point for the 'http_templ' source type
    #   Author: Vijay Charan Venkatachalam, JRC, European Commission
    #   Date: 2015/02/18
    #   Inputs: template: regex including subdirs (e.g. 'Collection51/TIFF/Win1[01]/201[1-3]/MCD45monthly.A20.*burndate.tif.gz'
    #           from_date: start date for the dataset (datetime.datetime object)
    #           to_date: end date for the dataset (datetime.datetime object)
    #           frequency: dataset 'frequency' (see DB 'frequency' table)
    #
    # def build_list_matching_files_sentinel_sat(base_url, template, from_date, to_date, frequency_id,  username, password):
    #
    #     # Add a check on frequency
    #     try:
    #         frequency = datasets.Dataset.get_frequency(frequency_id, datasets.Frequency.DATEFORMAT.DATETIME)
    #     except Exception as inst:
    #         logger.debug("Error in datasets.Dataset.get_frequency: %s" %inst.args[0])
    #         raise
    #
    #     # Manage the start_date (mandatory).
    #     try:
    #         # If it is a date, convert to datetime
    #         if functions.is_date_yyyymmdd(str(from_date), silent=True):
    #             datetime_start=datetime.datetime.strptime(str(from_date),'%Y%m%d')
    #         else:
    #             # If it is a negative number, subtract from current date
    #             if isinstance(from_date,int) or isinstance(from_date,long):
    #                 if from_date < 0:
    #                     datetime_start=datetime.datetime.today() - datetime.timedelta(days=-from_date)
    #             else:
    #                 logger.debug("Error in Start Date: must be YYYYMMDD or -Ndays")
    #                 raise Exception("Start Date not valid")
    #     except:
    #         raise Exception("Start Date not valid")
    #
    #     # Manage the end_date (mandatory).
    #     try:
    #         if functions.is_date_yyyymmdd(str(to_date), silent=True):
    #             datetime_end=datetime.datetime.strptime(str(to_date),'%Y%m%d')
    #         # If it is a negative number, subtract from current date
    #         elif isinstance(to_date,int) or isinstance(to_date,long):
    #             if to_date < 0:
    #                 datetime_end=datetime.datetime.today() - datetime.timedelta(days=-to_date)
    #         else:
    #             datetime_end=datetime.datetime.today()
    #     except:
    #         pass
    #
    #     try:
    #         list_filenames = sentinelsat_api.sentinelsat_getlists(base_url, template, datetime_start, datetime_end)#frequency.get_dates(datetime_start, datetime_end)
    #     except Exception as inst:
    #         logger.debug("Error in sentinelsat.get_lists: %s" %inst.args[0])
    #         raise

    # try:
    #     dates = frequency.get_dates(datetime_start, datetime_end)
    # except Exception as inst:
    #     logger.debug("Error in frequency.get_dates: %s" %inst.args[0])
    #     raise
    #
    # try:
    #     list_filenames = frequency.get_internet_dates(dates, template)
    # except Exception as inst:
    #     logger.debug("Error in frequency.get_internet_dates: %s" %inst.args[0])
    #     raise

    return list_filenames
Ejemplo n.º 7
0
    def test_is_date_time(self):

        self.assertTrue(f.is_date_yyyymmdd(self.string_yyyymmdd))
        self.assertTrue(f.is_date_mmdd(self.string_mmdd))
        self.assertTrue(f.is_date_yyyymmddhhmm(self.string_yyyymmddhhmm))
        self.assertTrue(f.is_date_yyyydoy(self.string_yyyydoy))
Ejemplo n.º 8
0
    def test_is_date_time(self):

        self.assertTrue(f.is_date_yyyymmdd(self.string_yyyymmdd))
        self.assertTrue(f.is_date_mmdd(self.string_mmdd))
        self.assertTrue(f.is_date_yyyymmddhhmm(self.string_yyyymmddhhmm))
        self.assertTrue(f.is_date_yyyydoy(self.string_yyyydoy))
Ejemplo n.º 9
0
def build_list_matching_files_cds(base_url, template, from_date, to_date,
                                  frequency_id, resourcename_uuid):
    # Add a check on frequency
    try:
        frequency = datasets.Dataset.get_frequency(
            frequency_id, datasets.Frequency.DATEFORMAT.DATETIME)
    except Exception as inst:
        logger.debug("Error in datasets.Dataset.get_frequency: %s" %
                     inst.args[0])
        raise

    # Manage the start_date (mandatory).
    try:
        # If it is a date, convert to datetime
        if functions.is_date_yyyymmdd(str(from_date), silent=True):
            datetime_start = datetime.datetime.strptime(
                str(from_date), '%Y%m%d')
        else:
            # If it is a negative number, subtract from current date
            if isinstance(from_date, int) or isinstance(from_date, int):
                if from_date < 0:
                    datetime_start = datetime.datetime.today(
                    ) - datetime.timedelta(days=-from_date)
            else:
                logger.debug("Error in Start Date: must be YYYYMMDD or -Ndays")
                raise Exception("Start Date not valid")
    except:
        raise Exception("Start Date not valid")

    # Manage the end_date (mandatory).
    try:
        if functions.is_date_yyyymmdd(str(to_date), silent=True):
            datetime_end = datetime.datetime.strptime(str(to_date), '%Y%m%d')
        # If it is a negative number, subtract from current date
        elif isinstance(to_date, int) or isinstance(to_date, int):
            if to_date < 0:
                datetime_end = datetime.datetime.today() - datetime.timedelta(
                    days=-to_date)
        else:
            datetime_end = datetime.datetime.today()
    except:
        pass

    try:
        dates = frequency.get_dates(datetime_start, datetime_end)
    except Exception as inst:
        logger.debug("Error in frequency.get_dates: %s" % inst.args[0])
        raise

    try:
        if sys.platform == 'win32':
            template.replace("-", "#")

        # return lst
        list_input_files = cds_api.create_list_cds(dates, template, base_url,
                                                   resourcename_uuid)

    except Exception as inst:
        logger.debug("Error in frequency.get_internet_dates: %s" %
                     inst.args[0])
        raise

    return list_input_files
Ejemplo n.º 10
0
 def test_is_date_yyyymmdd(self):
     self.assertTrue(functions.is_date_yyyymmdd(self.string_yyyymmdd))