Exemple #1
0
    def test_save_status(self):

        # Define .pck filename
        pickle_filename=es2system.system_status_filename()

        # Run the method to save status
        result = es2system.save_status_local_machine()

        # Read the .pck
        object=None
        machine_status=functions.restore_obj_from_pickle(object, pickle_filename)

        # Checks
        print('Check psql ON')
        self.assertEquals(machine_status['postgresql_status'], True)
Exemple #2
0
def loop_get_internet(dry_run=False):

    global processed_list_filename, processed_list
    global processed_info_filename, processed_info

    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGILL, signal_handler)

    logger.info("Starting retrieving data from INTERNET.")

    while True:
        output_dir = es_constants.ingest_dir
        logger.debug("Check if the Ingest Server input directory : %s exists.", output_dir)
        if not os.path.exists(output_dir):
            logger.fatal("The Ingest Server input directory : %s doesn't exists.", output_dir)
            exit(1)

        if not os.path.exists(es_constants.processed_list_int_dir):
            os.mkdir(es_constants.processed_list_int_dir)

        while 1:

            try:
                time_sleep = user_def_sleep
                logger.debug("Sleep time set to : %s.", time_sleep)
            except:
                logger.warning("Sleep time not defined. Setting to default=1min. Continue.")
                time_sleep = 60

            logger.debug("Reading active INTERNET data sources from database")
            internet_sources_list = querydb.get_active_internet_sources(echo=echo_query)

            # Loop over active triggers
            try:
              for internet_source in internet_sources_list:
                logger.debug("Processing internet source  %s.", internet_source.descriptive_name)

                processed_list_filename = es_constants.get_internet_processed_list_prefix+str(internet_source.internet_id)+'.list'
                processed_info_filename = es_constants.get_internet_processed_list_prefix+str(internet_source.internet_id)+'.info'

                # Create objects for list and info
                processed_list = []
                processed_info = {'length_proc_list': 0,
                                  'time_latest_exec': datetime.datetime.now(),
                                  'time_latest_copy': datetime.datetime.now()}
                # Restore/Create List
                processed_list=functions.restore_obj_from_pickle(processed_list, processed_list_filename)
                # Restore/Create Info
                processed_info=functions.restore_obj_from_pickle(processed_info, processed_info_filename)
                # Update processing time (in case it is restored)
                processed_info['time_latest_exec']=datetime.datetime.now()

                logger.debug("Create current list of file to process for source %s.", internet_source.internet_id)
                if internet_source.user_name is None:
                    user_name = "anonymous"
                else:
                    user_name = internet_source.user_name
                
                if internet_source.password is None:
                    password = "******"
                else:
                    password = internet_source.password
                    
                usr_pwd = str(user_name)+':'+str(password)
                
                logger.debug("              Url is %s.", internet_source.url)
                logger.debug("              usr/pwd is %s.", usr_pwd)
                logger.debug("              regex   is %s.", internet_source.include_files_expression)

                internet_type = internet_source.type

                if internet_type == 'ftp':
                    # Note that the following list might contain sub-dirs (it reflects full_regex)
                    current_list = get_list_matching_files_dir_ftp(str(internet_source.url), str(usr_pwd), str(internet_source.include_files_expression))

                elif internet_type == 'http_tmpl':
                    # Manage the dates:start_date is mandatory .. end_date replaced by 'today' if missing/wrong
                    try:
                      if functions.is_date_yyyymmdd(str(internet_source.start_date), silent=True):
                        datetime_start=datetime.datetime.strptime(str(internet_source.start_date),'%Y%m%d')
                      else:
                        raise Exception("Start Date not valid")
                    except:
                        raise Exception("Start Date not valid")
                    try:
                      if functions.is_date_yyyymmdd(str(internet_source.end_date), silent=True):
                        datetime_end=datetime.datetime.strptime(str(internet_source.end_date),'%Y%m%d')
                      else:
                        datetime_end=datetime.datetime.today()
                    except:
                        pass
                    # Create the full filename from a 'template' which contains
                    try:
                        current_list = build_list_matching_for_http(str(internet_source.url),
                                                                    str(internet_source.include_files_expression),
                                                                    datetime_start,
                                                                    datetime_end,
                                                                    str(internet_source.frequency_id))
                    except:
                         logger.error("Error in creating date lists. Continue")

                logger.debug("Number of files currently available for source %s is %i", internet_source.internet_id, len(current_list))
                if len(current_list) > 0:
                    logger.debug("Number of files already copied for trigger %s is %i", internet_source.internet_id, len(processed_list))
                    listtoprocess = []
                    for current_file in current_list:
                        if len(processed_list) == 0:
                            listtoprocess.append(current_file)
                        else:
                            #if os.path.basename(current_file) not in processed_list: -> save in .list subdirs as well !!
                            if current_file not in processed_list:
                                listtoprocess.append(current_file)

                    logger.debug("Number of files to be copied for trigger %s is %i", internet_source.internet_id, len(listtoprocess))
                    if listtoprocess != set([]):
                         logger.debug("Loop on the found files.")
                         if not dry_run:
                             for filename in list(listtoprocess):
                                 logger.debug("Processing file: "+str(internet_source.url)+os.path.sep+filename)
                                 try:
                                    result = get_file_from_url(str(internet_source.url)+os.path.sep+filename, target_file=os.path.basename(filename), target_dir=es_constants.ingest_dir, userpwd=str(usr_pwd))
                                    if not result:
                                        logger.info("File %s copied.", filename)
                                        processed_list.append(filename)
                                 except:
                                   logger.warning("Problem while copying file: %s.", filename)
                         else:
                             logger.info('Dry_run is set: do not get files')

                if not dry_run:
                    functions.dump_obj_to_pickle(processed_list, processed_list_filename)
                    functions.dump_obj_to_pickle(processed_info, processed_info_filename)

              sleep(float(user_def_sleep))
            # Loop over sources
            except Exception as inst:
              logger.error("Error while processing source %s. Continue" % internet_source.descriptive_name)
              sleep(float(user_def_sleep))

    exit(0)
Exemple #3
0
def loop_get_internet(dry_run=False, test_one_source=False):

    global processed_list_filename, processed_list
    global processed_info_filename, processed_info

    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGILL, signal_handler)

    logger.info("Starting retrieving data from INTERNET.")

    while True:
        output_dir = es_constants.get_internet_output_dir
        logger.debug("Check if the Ingest Server input directory : %s exists.",
                     output_dir)
        if not os.path.exists(output_dir):
            # ToDo: create output_dir - ingest directory
            logger.fatal(
                "The Ingest Server input directory : %s doesn't exists.",
                output_dir)
            exit(1)

        if not os.path.exists(es_constants.processed_list_int_dir):
            os.mkdir(es_constants.processed_list_int_dir)

        while 1:

            # Check internet connection (or continue)
            if not functions.internet_on():
                logger.error(
                    "The computer is not currently connected to the internet. Wait 1 minute."
                )
                time.sleep(60)

            else:
                try:
                    time_sleep = user_def_sleep
                    logger.debug("Sleep time set to : %s.", time_sleep)
                except:
                    logger.warning(
                        "Sleep time not defined. Setting to default=1min. Continue."
                    )
                    time_sleep = 60

                logger.info(
                    "Reading active INTERNET data sources from database")
                internet_sources_list = querydb.get_active_internet_sources()

                # Loop over active triggers
                for internet_source in internet_sources_list:
                    try:

                        if test_one_source and (internet_source.internet_id !=
                                                test_one_source):
                            logger.info(
                                "Running in test mode, and source is not %s. Continue.",
                                test_one_source)
                            continue
                        execute_trigger = True
                        # Get this from the pads database table (move from internet_source 'pull_frequency' to the pads table,
                        # so that it can be exploited by eumetcast triggers as well). It is in minute
                        pull_frequency = internet_source.pull_frequency

                        # Manage the case of files to be continuously downloaded (delay < 0)
                        if pull_frequency < 0:
                            do_not_consider_processed_list = True
                            delay_time_source_minutes = -pull_frequency
                        else:
                            do_not_consider_processed_list = False
                            delay_time_source_minutes = pull_frequency

                        if sys.platform == 'win32':
                            internet_id = str(
                                internet_source.internet_id).replace(':', '_')
                        else:
                            internet_id = str(internet_source.internet_id)

                        logger_spec = log.my_logger('apps.get_internet.' +
                                                    internet_id)
                        logger.info("Processing internet source  %s.",
                                    internet_source.descriptive_name)

                        # Create objects for list and info
                        processed_info_filename = es_constants.get_internet_processed_list_prefix + str(
                            internet_id) + '.info'

                        # Restore/Create Info
                        processed_info = None
                        processed_info = functions.restore_obj_from_pickle(
                            processed_info, processed_info_filename)
                        if processed_info is not None:
                            # Check the delay
                            current_delta = datetime.datetime.now(
                            ) - processed_info['time_latest_exec']
                            current_delta_minutes = int(current_delta.seconds /
                                                        60)
                            if current_delta_minutes < delay_time_source_minutes:
                                logger.debug(
                                    "Still waiting up to %i minute - since latest execution.",
                                    delay_time_source_minutes)
                                execute_trigger = False
                        else:
                            # Create processed_info object
                            processed_info = {
                                'lenght_proc_list': 0,
                                'time_latest_exec': datetime.datetime.now(),
                                'time_latest_copy': datetime.datetime.now()
                            }
                            execute_trigger = True

                        if execute_trigger:
                            # Restore/Create List
                            processed_list = []
                            if not do_not_consider_processed_list:
                                processed_list_filename = es_constants.get_internet_processed_list_prefix + internet_id + '.list'
                                processed_list = functions.restore_obj_from_pickle(
                                    processed_list, processed_list_filename)

                            processed_info[
                                'time_latest_exec'] = datetime.datetime.now()

                            logger.debug(
                                "Create current list of file to process for source %s.",
                                internet_source.internet_id)
                            if internet_source.user_name is None:
                                user_name = "anonymous"
                            else:
                                user_name = internet_source.user_name

                            if internet_source.password is None:
                                password = "******"
                            else:
                                password = internet_source.password

                            usr_pwd = str(user_name) + ':' + str(password)

                            logger_spec.debug("              Url is %s.",
                                              internet_source.url)
                            logger_spec.debug("              usr/pwd is %s.",
                                              usr_pwd)
                            logger_spec.debug(
                                "              regex   is %s.",
                                internet_source.include_files_expression)

                            internet_type = internet_source.type

                            if internet_type == 'ftp' or internet_type == 'http':
                                # Manage the end_date (added for MODIS_FIRMS)
                                if (internet_source.end_date != ''):
                                    end_date = internet_source.end_date
                                else:
                                    end_date = None
                                # Note that the following list might contain sub-dirs (it reflects full_regex)
                                try:
                                    current_list = get_list_matching_files(
                                        str(internet_source.url),
                                        str(usr_pwd),
                                        str(internet_source.
                                            include_files_expression),
                                        internet_type,
                                        end_date=end_date)
                                except:
                                    logger.error(
                                        "Error in creating file lists. Continue"
                                    )
                                    continue

                            elif internet_type == 'http_tmpl':
                                # Create the full filename from a 'template' which contains
                                try:
                                    current_list = build_list_matching_files_tmpl(
                                        str(internet_source.url),
                                        str(internet_source.
                                            include_files_expression),
                                        internet_source.start_date,
                                        internet_source.end_date,
                                        str(internet_source.frequency_id))
                                except:
                                    logger.error(
                                        "Error in creating date lists. Continue"
                                    )
                                    continue

                            elif internet_type == 'motu_client':
                                # Create the full filename from a 'template' which contains
                                try:
                                    current_list = build_list_matching_files_motu(
                                        str(internet_source.url),
                                        str(internet_source.
                                            include_files_expression),
                                        internet_source.start_date,
                                        internet_source.end_date,
                                        str(internet_source.frequency_id),
                                        str(internet_source.user_name),
                                        str(internet_source.password),
                                        str(internet_source.
                                            files_filter_expression),
                                    )

                                except:
                                    logger.error(
                                        "Error in creating motu_client lists. Continue"
                                    )
                                    continue

                            # elif internet_type == 'sentinel_sat':
                            #     # Create the full filename from a 'template' which contains
                            #     try:
                            #         current_list = build_list_matching_files_sentinel_sat(str(internet_source.url),
                            #                                                     str(internet_source.include_files_expression),
                            #                                                     internet_source.start_date,
                            #                                                     internet_source.end_date,
                            #                                                     str(internet_source.frequency_id),
                            #                                                     str(internet_source.user_name),
                            #                                                     str(internet_source.password),
                            #                                                     #str(internet_source.files_filter_expression),
                            #                                                       )
                            #
                            #     except:
                            #         logger.error("Error in creating sentinel_sat lists. Continue")
                            #         continue

                            elif internet_type == 'local':
                                logger.info(
                                    "This internet source is meant to copy data on local filesystem"
                                )
                                try:
                                    current_list = get_list_matching_files_dir_local(
                                        str(internet_source.url),
                                        str(internet_source.
                                            include_files_expression))
                                except:
                                    logger.error(
                                        "Error in creating date lists. Continue"
                                    )
                                    continue

                            elif internet_type == 'offline':
                                logger.info(
                                    "This internet source is meant to work offline (GoogleDrive)"
                                )
                                current_list = []
                            else:
                                logger.error(
                                    "No correct type for this internet source type: %s"
                                    % internet_type)
                                current_list = []
                            logger_spec.debug(
                                "Number of files currently available for source %s is %i",
                                internet_id, len(current_list))

                            if len(current_list) > 0:
                                logger_spec.debug(
                                    "Number of files already copied for trigger %s is %i",
                                    internet_id, len(processed_list))
                                listtoprocess = []
                                for current_file in current_list:
                                    if len(processed_list) == 0:
                                        listtoprocess.append(current_file)
                                    else:
                                        #if os.path.basename(current_file) not in processed_list: -> save in .list subdirs as well !!
                                        if current_file not in processed_list:
                                            listtoprocess.append(current_file)

                                logger_spec.debug(
                                    "Number of files to be copied for trigger %s is %i",
                                    internet_id, len(listtoprocess))
                                if listtoprocess != set([]):
                                    # # Debug
                                    # toprint=''
                                    # for elem in listtoprocess:
                                    #    toprint+=elem+','
                                    #    logger_spec.info('List in get_list_matching_files: %s' % toprint)

                                    logger_spec.debug(
                                        "Loop on the found files.")
                                    if not dry_run:
                                        for filename in list(listtoprocess):
                                            logger_spec.debug(
                                                "Processing file: " +
                                                str(internet_source.url) +
                                                os.path.sep + filename)
                                            try:
                                                if internet_type == 'local':
                                                    shutil.copyfile(
                                                        str(internet_source[
                                                            'url']) +
                                                        os.path.sep + filename,
                                                        es_constants.ingest_dir
                                                        + os.path.basename(
                                                            filename))
                                                    result = 0
                                                elif internet_type == 'motu_client':
                                                    result = get_file_from_motu_command(
                                                        str(filename),
                                                        #target_file=internet_source.files_filter_expression,
                                                        target_dir=es_constants
                                                        .ingest_dir,
                                                        userpwd=str(usr_pwd))

                                                # elif internet_type == 'sentinel_sat':
                                                #     result = get_file_from_sentinelsat_url(str(filename),
                                                #                                            target_dir=es_constants.ingest_dir)
                                                else:
                                                    result = get_file_from_url(
                                                        str(internet_source.url
                                                            ) + os.path.sep +
                                                        filename,
                                                        target_file=os.path.
                                                        basename(filename),
                                                        target_dir=es_constants
                                                        .ingest_dir,
                                                        userpwd=str(usr_pwd))
                                                if not result:
                                                    logger_spec.info(
                                                        "File %s copied.",
                                                        filename)
                                                    processed_list.append(
                                                        filename)
                                                else:
                                                    logger_spec.warning(
                                                        "File %s not copied: ",
                                                        filename)
                                            except:
                                                logger_spec.warning(
                                                    "Problem while copying file: %s.",
                                                    filename)
                                    else:
                                        logger_spec.info(
                                            'Dry_run is set: do not get files')

                            if not dry_run:
                                functions.dump_obj_to_pickle(
                                    processed_list, processed_list_filename)
                                functions.dump_obj_to_pickle(
                                    processed_info, processed_info_filename)

                        sleep(float(user_def_sleep))
                    # Loop over sources
                    except Exception as inst:
                        logger.error(
                            "Error while processing source %s. Continue" %
                            internet_source.descriptive_name)
                sleep(float(user_def_sleep))

    exit(0)
Exemple #4
0
def get_archives_eumetcast_ftp():

    # Ad-hoc definitions (to be copied to settings file)
    source_id = 'MESA:JRC:Archives'
    filter_expression_mesa_jrc = 'MESA_JRC_.*.tif'

    # Get Access credentials
    ftp_eumetcast_url = es_constants.es2globals['ftp_eumetcast_url']
    ftp_eumetcast_userpwd = es_constants.es2globals['ftp_eumetcast_userpwd']

    # Define a file_handler logger 'source-specific' (for GUI)
    logger_spec = log.my_logger('apps.get_archives_eumetcast')
    logger.info("Retrieving MESA_JRC files from PC1.")

    if sys.platform == 'win32':
        source_id = source_id.replace(':', '_')  #Pierluigi
    processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str(
        source_id) + '.list'
    processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str(
        source_id) + '.info'

    # Create objects for list and info
    processed_list = []
    processed_info = {
        'length_proc_list': 0,
        'time_latest_exec': datetime.datetime.now(),
        'time_latest_copy': datetime.datetime.now()
    }

    logger.debug("Loading the processed file list for source %s ", source_id)

    # Restore/Create List
    processed_list = functions.restore_obj_from_pickle(
        processed_list, processed_list_filename)
    # Restore/Create Info
    processed_info = functions.restore_obj_from_pickle(
        processed_info, processed_info_filename)
    # Update processing time (in case it is restored)
    processed_info['time_latest_exec'] = datetime.datetime.now()

    logger.debug("Create current list of file to process for trigger %s.",
                 source_id)
    try:
        current_list = get_list_matching_files(ftp_eumetcast_url,
                                               ftp_eumetcast_userpwd,
                                               filter_expression_mesa_jrc,
                                               'ftp',
                                               my_logger=logger_spec)
    except:
        logger.error("Cannot connect to the PC1 via ftp. Wait 1 minute")
        current_list = []
        time.sleep(60)

    logger_spec.info("Number of files currently on PC1 for trigger %s is %i",
                     source_id, len(current_list))

    if len(current_list) > 0:

        #logger.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list))
        logger_spec.debug(
            "Number of files already copied for trigger %s is %i", source_id,
            len(processed_list))
        listtoprocess = []
        listtoprocess = set(current_list) - set(processed_list)
        #logger.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess))
        logger_spec.info("Number of files to be copied for trigger %s is %i",
                         source_id, len(listtoprocess))
        if listtoprocess != set([]):
            logger_spec.debug("Loop on the found files.")
            for filename in list(listtoprocess):
                try:
                    result = get_file_from_url(
                        str(ftp_eumetcast_url) + os.path.sep + filename,
                        target_file=os.path.basename(filename),
                        target_dir=es_constants.ingest_dir,
                        userpwd=str(ftp_eumetcast_userpwd))
                    if not result:
                        logger_spec.info("File %s copied.", filename)
                        processed_list.append(filename)
                    else:
                        logger_spec.warning("File %s not copied: ", filename)
                except:
                    logger_spec.warning("Problem while copying file: %s.",
                                        filename)
        else:
            logger.debug("Nothing to process - go to next trigger.")
            pass

    for infile in processed_list:
        if not infile in current_list:
            processed_list.remove(infile)

    functions.dump_obj_to_pickle(processed_list, processed_list_filename)
    functions.dump_obj_to_pickle(processed_info, processed_info_filename)
Exemple #5
0
def loop_eumetcast(dry_run=False):

    global processed_list_filename, processed_list
    global processed_info_filename, processed_info

    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGILL, signal_handler)

    logger.info("Starting retrieving EUMETCast data.")

    while True:
        logger.debug("Check if the EUMETCast input directory : %s exists.",
                     input_dir)
        if not os.path.exists(input_dir):
            logger.error(
                "The EUMETCast input directory : %s is not yet mounted.",
                input_dir)

        logger.debug("Check if the Ingest Server input directory : %s exists.",
                     output_dir)
        if not os.path.exists(output_dir):
            logger.fatal(
                "The Ingest Server input directory : %s doesn't exists.",
                output_dir)
            exit(1)

        if not os.path.exists(es_constants.base_tmp_dir):
            os.mkdir(es_constants.base_tmp_dir)

        if not os.path.exists(es_constants.processed_list_base_dir):
            os.mkdir(es_constants.processed_list_base_dir)

        if not os.path.exists(es_constants.processed_list_eum_dir):
            os.mkdir(es_constants.processed_list_eum_dir)

        while 1:
            try:
                time_sleep = user_def_sleep
                logger.debug("Sleep time set to : %s.", time_sleep)
            except:
                logger.warning(
                    "Sleep time not defined. Setting to default=1min. Continue."
                )
                time_sleep = 60

            # try:
            logger.debug("Reading active EUMETCAST data sources from database")
            eumetcast_sources_list = querydb.get_eumetcast_sources()
            logger.debug("N. %i active EUMETCAST data sources found",
                         len(eumetcast_sources_list))

            # Get the EUMETCast MESA_JRC files
            try:
                get_archives_eumetcast()
            except:
                logger.error(
                    "Error in executing get_archives_eumetcast. Continue")

            # Loop over active triggers
            for eumetcast_source in eumetcast_sources_list:

                # Define a file_handler logger 'source-specific' (for GUI)
                logger_spec = log.my_logger('apps.get_eumetcast.' +
                                            eumetcast_source.eumetcast_id)
                logger.info("Processing eumetcast source  %s.",
                            eumetcast_source.eumetcast_id)

                if sys.platform == 'win32':  # Pierluigi
                    processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str(
                        eumetcast_source.eumetcast_id).replace(':',
                                                               '_') + '.list'
                    processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str(
                        eumetcast_source.eumetcast_id).replace(':',
                                                               '_') + '.info'

                else:
                    processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str(
                        eumetcast_source.eumetcast_id) + '.list'
                    processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str(
                        eumetcast_source.eumetcast_id) + '.info'

                # Create objects for list and info
                processed_list = []
                processed_info = {
                    'length_proc_list': 0,
                    'time_latest_exec': datetime.datetime.now(),
                    'time_latest_copy': datetime.datetime.now()
                }

                logger.debug("Loading the processed file list for source %s ",
                             eumetcast_source.eumetcast_id)

                # Restore/Create List
                processed_list = functions.restore_obj_from_pickle(
                    processed_list, processed_list_filename)
                # Restore/Create Info
                processed_info = functions.restore_obj_from_pickle(
                    processed_info, processed_info_filename)
                # Update processing time (in case it is restored)
                processed_info['time_latest_exec'] = datetime.datetime.now()

                logger.debug(
                    "Create current list of file to process for trigger %s.",
                    eumetcast_source.eumetcast_id)
                current_list = find_files(
                    input_dir, eumetcast_source.filter_expression_jrc)
                #logger.debug("Number of files currently on PC1 for trigger %s is %i", eumetcast_source.eumetcast_id, len(current_list))
                logger_spec.info(
                    "Number of files currently on PC1 for trigger %s is %i",
                    eumetcast_source.eumetcast_id, len(current_list))
                if len(current_list) > 0:

                    #logger.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list))
                    logger_spec.debug(
                        "Number of files already copied for trigger %s is %i",
                        eumetcast_source.eumetcast_id, len(processed_list))
                    listtoprocess = []
                    listtoprocess = set(current_list) - set(processed_list)
                    #logger.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess))
                    logger_spec.debug(
                        "Number of files to be copied for trigger %s is %i",
                        eumetcast_source.eumetcast_id, len(listtoprocess))
                    if listtoprocess != set([]):
                        logger_spec.debug("Loop on the found files.")
                        for filename in list(listtoprocess):
                            if os.path.isfile(os.path.join(
                                    input_dir, filename)):
                                if os.stat(os.path.join(
                                        input_dir, filename)).st_mtime < int(
                                            time.time()):
                                    logger_spec.debug(
                                        "Processing file: " +
                                        os.path.basename(filename))
                                    if not dry_run:
                                        if subprocess.getstatusoutput(
                                                "cp " + filename + " " +
                                                output_dir + os.sep +
                                                os.path.basename(filename)
                                        )[0] == 0:
                                            logger_spec.info(
                                                "File %s copied.", filename)
                                            processed_list.append(filename)
                                            # Update processing info
                                            processed_info[
                                                'time_latest_copy'] = datetime.datetime.now(
                                                )
                                            processed_info[
                                                'length_proc_list'] = len(
                                                    processed_list)
                                        else:
                                            logger_spec.warning(
                                                "Problem while copying file: %s.",
                                                filename)
                                    else:
                                        logger_spec.info(
                                            'Dry_run is set: do not get files')
                            else:
                                logger_spec.error(
                                    "File %s removed by the system before being processed.",
                                    filename)
                    else:
                        logger.debug(
                            "Nothing to process - go to next trigger.")
                        pass

                for infile in processed_list:
                    if not os.path.exists(infile):
                        processed_list.remove(infile)

                if not dry_run:
                    functions.dump_obj_to_pickle(processed_list,
                                                 processed_list_filename)
                    functions.dump_obj_to_pickle(processed_info,
                                                 processed_info_filename)

            logger.info("End of Get EUMETCast loop. Sleep")
            time.sleep(float(time_sleep))

    exit(0)
Exemple #6
0
def loop_eumetcast_ftp(dry_run=False):

    global processed_list_filename, processed_list
    global processed_info_filename, processed_info

    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGILL, signal_handler)

    logger.info("Starting retrieving EUMETCast data.")

    ftp_eumetcast_url = es_constants.es2globals['ftp_eumetcast_url']
    ftp_eumetcast_userpwd = es_constants.es2globals['ftp_eumetcast_userpwd']

    while True:

        logger.debug("Check if the Ingest Server input directory : %s exists.",
                     output_dir)
        if not os.path.exists(output_dir):
            logger.fatal(
                "The Ingest Server input directory : %s doesn't exists.",
                output_dir)
            exit(1)

        if not os.path.exists(es_constants.base_tmp_dir):
            os.mkdir(es_constants.base_tmp_dir)

        if not os.path.exists(es_constants.processed_list_base_dir):
            os.mkdir(es_constants.processed_list_base_dir)

        if not os.path.exists(es_constants.processed_list_eum_dir):
            os.mkdir(es_constants.processed_list_eum_dir)

        while 1:
            try:
                time_sleep = user_def_sleep
                logger.debug("Sleep time set to : %s.", time_sleep)
            except:
                logger.warning(
                    "Sleep time not defined. Setting to default=1min. Continue."
                )
                time_sleep = 60

            # try:
            logger.debug("Reading active EUMETCAST data sources from database")
            eumetcast_sources_list = querydb.get_eumetcast_sources()
            logger.debug("N. %i active EUMETCAST data sources found",
                         len(eumetcast_sources_list))

            # Get the EUMETCast MESA_JRC files
            try:
                get_archives_eumetcast_ftp()
            except:
                logger.error(
                    "Error in executing get_archives_eumetcast_ftp. Continue")

            # Loop over active triggers
            for eumetcast_source in eumetcast_sources_list:

                # Define a file_handler logger 'source-specific' (for GUI)
                logger_spec = log.my_logger('apps.get_eumetcast.' +
                                            eumetcast_source.eumetcast_id)
                logger.info("Processing eumetcast source  %s.",
                            eumetcast_source.eumetcast_id)

                if sys.platform == 'win32':  #Pierluigi
                    processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str(
                        eumetcast_source.eumetcast_id).replace(':',
                                                               '_') + '.list'
                    processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str(
                        eumetcast_source.eumetcast_id).replace(':',
                                                               '_') + '.info'
                else:
                    processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str(
                        eumetcast_source.eumetcast_id) + '.list'
                    processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str(
                        eumetcast_source.eumetcast_id) + '.info'

                # Create objects for list and info
                processed_list = []
                processed_info = {
                    'length_proc_list': 0,
                    'time_latest_exec': datetime.datetime.now(),
                    'time_latest_copy': datetime.datetime.now()
                }

                logger.debug("Loading the processed file list for source %s ",
                             eumetcast_source.eumetcast_id)

                # Restore/Create List
                processed_list = functions.restore_obj_from_pickle(
                    processed_list, processed_list_filename)
                # Restore/Create Info
                processed_info = functions.restore_obj_from_pickle(
                    processed_info, processed_info_filename)
                # Update processing time (in case it is restored)
                processed_info['time_latest_exec'] = datetime.datetime.now()

                logger.debug(
                    "Create current list of file to process for trigger %s.",
                    eumetcast_source.eumetcast_id)
                try:
                    current_list = get_list_matching_files(
                        ftp_eumetcast_url,
                        ftp_eumetcast_userpwd,
                        eumetcast_source.filter_expression_jrc,
                        'ftp',
                        my_logger=logger_spec)
                except:
                    logger.error(
                        "Cannot connect to the PC1 via ftp. Wait 1 minute")
                    current_list = []
                    time.sleep(60)

                if len(current_list) > 0:
                    # See ES2-204
                    logger_spec.debug(
                        "Number of files currently on PC1 for trigger %s is %i",
                        eumetcast_source.eumetcast_id, len(current_list))

                    #logger.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list))
                    logger_spec.debug(
                        "Number of files already copied for trigger %s is %i",
                        eumetcast_source.eumetcast_id, len(processed_list))
                    listtoprocess = []
                    listtoprocess = set(current_list) - set(processed_list)
                    #logger.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess))
                    logger_spec.debug(
                        "Number of files to be copied for trigger %s is %i",
                        eumetcast_source.eumetcast_id, len(listtoprocess))
                    if listtoprocess != set([]):
                        logger_spec.debug("Loop on the found files.")
                        for filename in list(listtoprocess):
                            try:
                                result = get_file_from_url(
                                    str(ftp_eumetcast_url) + os.path.sep +
                                    filename,
                                    target_file=os.path.basename(filename),
                                    target_dir=es_constants.ingest_dir,
                                    userpwd=str(ftp_eumetcast_userpwd))
                                if not result:
                                    logger_spec.info("File %s copied.",
                                                     filename)
                                    processed_list.append(filename)
                                else:
                                    logger_spec.warning(
                                        "File %s not copied: ", filename)
                            except:
                                logger_spec.warning(
                                    "Problem while copying file: %s.",
                                    filename)
                    else:
                        logger.debug(
                            "Nothing to process - go to next trigger.")
                        pass

                for infile in processed_list:
                    if not infile in current_list:
                        processed_list.remove(infile)

                if not dry_run:
                    functions.dump_obj_to_pickle(processed_list,
                                                 processed_list_filename)
                    functions.dump_obj_to_pickle(processed_info,
                                                 processed_info_filename)

            logger.info("End of Get EUMETCast loop. Sleep")
            time.sleep(float(time_sleep))

    exit(0)
Exemple #7
0
def get_archives_eumetcast():

    input_dir = '/eumetcast_test/'

    # Ad-hoc definitions (to be copied to settings file)
    source_id = 'MESA:JRC:Archives'
    filter_expression_mesa_jrc = 'MESA_JRC_.*.tif'

    # Define a file_handler logger 'source-specific' (for GUI)
    logger_spec = log.my_logger('apps.get_archives_eumetcast')
    logger.info("Retrieving MESA_JRC files from PC1.")

    processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str(
        source_id) + '.list'
    processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str(
        source_id) + '.info'

    # Create objects for list and info
    processed_list = []
    processed_info = {
        'length_proc_list': 0,
        'time_latest_exec': datetime.datetime.now(),
        'time_latest_copy': datetime.datetime.now()
    }

    logger.warning(
        "Input DIR for get_archives_eumetcast is defined as: *** %s ***",
        input_dir)
    logger.debug("Loading the processed file list for source %s ", source_id)

    # Restore/Create List
    processed_list = functions.restore_obj_from_pickle(
        processed_list, processed_list_filename)
    # Restore/Create Info
    processed_info = functions.restore_obj_from_pickle(
        processed_info, processed_info_filename)
    # Update processing time (in case it is restored)
    processed_info['time_latest_exec'] = datetime.datetime.now()

    logger.debug("Create current list of file to process for trigger %s.",
                 source_id)
    try:
        current_list = find_files(input_dir, filter_expression_mesa_jrc)
    except:
        logger.error("Cannot connect to the PC1 via ftp. Wait 1 minute")
        current_list = []
        time.sleep(60)

    logger_spec.info("Number of files currently on PC1 for trigger %s is %i",
                     source_id, len(current_list))
    if len(current_list) > 0:

        #logger.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list))
        logger_spec.debug(
            "Number of files already copied for trigger %s is %i", source_id,
            len(processed_list))
        listtoprocess = []
        listtoprocess = set(current_list) - set(processed_list)
        #logger.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess))
        logger_spec.info("Number of files to be copied for trigger %s is %i",
                         source_id, len(listtoprocess))
        if listtoprocess != set([]):
            logger_spec.debug("Loop on the found files.")
            for filename in list(listtoprocess):
                try:
                    if subprocess.getstatusoutput(
                            "cp " + filename + " " + output_dir + os.sep +
                            os.path.basename(filename))[0] == 0:
                        logger_spec.info("File %s copied.", filename)
                        processed_list.append(filename)
                        # Update processing info
                        processed_info[
                            'time_latest_copy'] = datetime.datetime.now()
                        processed_info['length_proc_list'] = len(
                            processed_list)
                    else:
                        logger_spec.warning("Problem while copying file: %s.",
                                            filename)
                except:
                    logger_spec.warning("Problem while copying file: %s.",
                                        filename)
        else:
            logger.debug("Nothing to process - go to next trigger.")
            pass

    for infile in processed_list:
        if not infile in current_list:
            processed_list.remove(infile)

    functions.dump_obj_to_pickle(processed_list, processed_list_filename)
    functions.dump_obj_to_pickle(processed_info, processed_info_filename)
Exemple #8
0
def loop_get_datastore(dry_run=False, test_one_source=False, my_source=None):
    global processed_list_filename, processed_list
    global processed_info_filename, processed_info

    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGILL, signal_handler)

    logger.info("Starting retrieving data from INTERNET.")

    b_loop = True  # to exit loops in testing mode
    b_error = False  # checking files download - for testing mode

    while b_loop:
        output_dir = es_constants.get_internet_output_dir
        logger.debug(
            "Check if the Datastore Server input directory : %s exists.",
            output_dir)
        if not os.path.exists(output_dir):
            # ToDo: create output_dir - ingest directory
            logger.fatal(
                "The Ingest Server input directory : %s doesn't exists.",
                output_dir)
            if test_one_source:
                return 1
            else:
                exit(1)

        if not os.path.exists(es_constants.processed_list_datastore_dir):
            os.mkdir(es_constants.processed_list_datastore_dir)

        while b_loop:

            # # Check internet connection (or continue)
            # if not functions.internet_on():  #False: JEodesk- doesnt detect internet connection properly so provide False#
            #     logger.error("The computer is not currently connected to the internet. Wait 1 minute.")
            #     b_error = True
            #     time.sleep(60)
            #
            # else:

            logger.info("Reading active Datastore data sources from database")
            # internet_sources_list = querydb.get_active_internet_sources()
            datastore_sources_list = querydb.get_active_datastore_sources()

            # Loop over active triggers
            for datastore_source in datastore_sources_list:
                try:
                    # In case of test_one_source, skip all other sources
                    if test_one_source:
                        if (datastore_source.internet_id != test_one_source):
                            logger.debug(
                                "Running in test mode, and source is not %s. Continue.",
                                test_one_source)
                            continue
                        else:
                            # Overwrite DB definitions with the passed object (if defined - for testing purposes)
                            if my_source:
                                datastore_source = my_source

                    execute_trigger = True
                    # Get this from the pads database table (move from internet_source 'pull_frequency' to the pads table,
                    # so that it can be exploited by eumetcast triggers as well). It is in minute
                    pull_frequency = datastore_source.pull_frequency

                    # Manage the case of files to be continuously downloaded (delay < 0)
                    if pull_frequency < 0:
                        do_not_consider_processed_list = True
                        delay_time_source_minutes = -pull_frequency
                    else:
                        do_not_consider_processed_list = False
                        delay_time_source_minutes = pull_frequency

                    if sys.platform == 'win32':
                        internet_id = str(
                            datastore_source.internet_id).replace(':', '_')
                    else:
                        internet_id = str(
                            datastore_source.internet_id).replace(":", "_")

                    logger_spec = log.my_logger('apps.get_internet.' +
                                                internet_id)
                    logger.info("Processing internet source  %s.",
                                datastore_source.descriptive_name)

                    # Create objects for list and info
                    processed_info_filename = es_constants.get_datastore_processed_list_prefix + str(
                        internet_id) + '.info'

                    # Restore/Create Info
                    processed_info = None
                    processed_info = functions.restore_obj_from_pickle(
                        processed_info, processed_info_filename)
                    if processed_info is not None:
                        # Check the delay
                        current_delta = datetime.datetime.now(
                        ) - processed_info['time_latest_exec']
                        current_delta_minutes = int(
                            old_div(current_delta.seconds, 60))
                        if current_delta_minutes < delay_time_source_minutes:
                            logger.debug(
                                "Still waiting up to %i minute - since latest execution.",
                                delay_time_source_minutes)
                            execute_trigger = False
                    else:
                        # Create processed_info object
                        processed_info = {
                            'lenght_proc_list': 0,
                            'time_latest_exec': datetime.datetime.now(),
                            'time_latest_copy': datetime.datetime.now()
                        }
                        execute_trigger = True

                    if execute_trigger:
                        # Restore/Create List
                        processed_list = []
                        if not do_not_consider_processed_list:
                            processed_list_filename = es_constants.get_datastore_processed_list_prefix + internet_id + '.list'
                            # processed_list = functions.restore_obj_from_json(processed_list,
                            #                                                    processed_list_filename)

                        processed_info[
                            'time_latest_exec'] = datetime.datetime.now()

                        logger.debug(
                            "Create current list of file to process for source %s.",
                            datastore_source.internet_id)
                        if datastore_source.user_name is None:
                            user_name = "anonymous"
                        else:
                            user_name = datastore_source.user_name

                        if datastore_source.password is None:
                            password = "******"
                        else:
                            password = datastore_source.password

                        usr_pwd = str(user_name) + ':' + str(password)

                        logger_spec.debug("              Url is %s.",
                                          datastore_source.url)
                        logger_spec.debug("              usr/pwd is %s.",
                                          usr_pwd)
                        logger_spec.debug(
                            "              regex   is %s.",
                            datastore_source.include_files_expression)

                        internet_type = datastore_source.type

                        if internet_type == 'cds_api':
                            current_list = cds_api_loop_internet(
                                datastore_source)
                        elif internet_type == 'iri_api':
                            current_list = iri_api_loop_internet(
                                datastore_source)

                        else:
                            logger.debug(
                                "No correct type for this datastore source type: %s"
                                % internet_type)
                            current_list = []

                        logger_spec.debug(
                            "Number of files currently available for source %s is %i",
                            internet_id, len(current_list))

                        if not dry_run:
                            # functions.dump_obj_to_json(processed_list, processed_list_filename)
                            functions.dump_obj_to_pickle(
                                processed_info, processed_info_filename)

                    # if test_one_source:
                    #     b_loop = False
                    # else:
                    #     sleep(float(user_def_sleep))
                # # Loop over sources
                except Exception as inst:
                    logger.error("Error while processing source %s. Continue" %
                                 datastore_source.descriptive_name)
                    b_error = True
            sleep(float(user_def_sleep))
    if not test_one_source:
        exit(0)
    else:
        return b_error
Exemple #9
0
def loop_eumetcast(dry_run=False):

    global processed_list_filename, processed_list
    global processed_info_filename, processed_info

    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGILL, signal_handler)

    logger.info("Starting retrieving EUMETCast data.")

    while True:
        logger.debug("Check if the EUMETCast input directory : %s exists.", input_dir)
        if not os.path.exists(input_dir):
            logger.error("The EUMETCast input directory : %s is not yet mounted.", input_dir)

        logger.debug("Check if the Ingest Server input directory : %s exists.", output_dir)
        if not os.path.exists(output_dir):
            logger.fatal("The Ingest Server input directory : %s doesn't exists.", output_dir)
            # TODO Jurvtk: Create the Ingest Server output directory if it doesn't exist!
            exit(1)

        if not os.path.exists(es_constants.base_tmp_dir):
            os.mkdir(es_constants.base_tmp_dir)

        if not os.path.exists(es_constants.processed_list_base_dir):
            os.mkdir(es_constants.processed_list_base_dir)

        if not os.path.exists(es_constants.processed_list_eum_dir):
            os.mkdir(es_constants.processed_list_eum_dir)

        while 1:
            try:
                time_sleep = user_def_sleep
                logger.debug("Sleep time set to : %s.", time_sleep)
            except:
                logger.warning("Sleep time not defined. Setting to default=1min. Continue.")
                time_sleep = 60

            # try:
            logger.debug("Reading active EUMETCAST data sources from database")
            eumetcast_sources_list = querydb.get_eumetcast_sources(echo=echo_query)
            logger.debug("N. %i active EUMETCAST data sources found", len(eumetcast_sources_list))

            # Loop over active triggers
            for eumetcast_source in eumetcast_sources_list:

                # Define a file_handler logger 'source-specific' (for GUI)
                logger_spec = log.my_logger('apps.get_eumetcast.'+eumetcast_source.eumetcast_id)
                logger.debug("Processing eumetcast source  %s.", eumetcast_source.eumetcast_id)

                processed_list_filename = es_constants.get_eumetcast_processed_list_prefix+str(eumetcast_source.eumetcast_id)+'.list'
                processed_info_filename = es_constants.get_eumetcast_processed_list_prefix+str(eumetcast_source.eumetcast_id)+'.info'

                # Create objects for list and info
                processed_list = []
                processed_info = {'length_proc_list': 0,
                                  'time_latest_exec': datetime.datetime.now(),
                                  'time_latest_copy': datetime.datetime.now()}

                logger.debug("Loading the processed file list for source %s ", eumetcast_source.eumetcast_id)

                # Restore/Create List
                processed_list=functions.restore_obj_from_pickle(processed_list, processed_list_filename)
                # Restore/Create Info
                processed_info=functions.restore_obj_from_pickle(processed_info, processed_info_filename)
                # Update processing time (in case it is restored)
                processed_info['time_latest_exec']=datetime.datetime.now()

                logger.debug("Create current list of file to process for trigger %s.", eumetcast_source.eumetcast_id)
                current_list = find_files(input_dir, eumetcast_source.filter_expression_jrc)
                #logger.debug("Number of files currently on PC1 for trigger %s is %i", eumetcast_source.eumetcast_id, len(current_list))
                logger_spec.debug("Number of files currently on PC1 for trigger %s is %i", eumetcast_source.eumetcast_id, len(current_list))
                if len(current_list) > 0:

                    #logger.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list))
                    logger_spec.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list))
                    listtoprocess = []
                    listtoprocess = set(current_list) - set(processed_list)
                    #logger.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess))
                    logger_spec.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess))
                    if listtoprocess != set([]):
                        logger_spec.debug("Loop on the found files.")
                        for filename in list(listtoprocess):
                            if os.path.isfile(os.path.join(input_dir, filename)):
                                if os.stat(os.path.join(input_dir, filename)).st_mtime < int(time.time()):
                                    logger_spec.debug("Processing file: "+os.path.basename(filename))
                                    if not dry_run:
                                        if commands.getstatusoutput("cp " + filename + " " + output_dir + os.sep + os.path.basename(filename))[0] == 0:
                                            logger.info("File %s copied.", filename)
                                            processed_list.append(filename)
                                            # Update processing info
                                            processed_info['time_latest_copy']=datetime.datetime.now()
                                            processed_info['length_proc_list']=len(processed_list)
                                        else:
                                            logger_spec.warning("Problem while copying file: %s.", filename)
                                    else:
                                        logger_spec.info('Dry_run is set: do not get files')
                            else:
                                logger_spec.error("File %s removed by the system before being processed.", filename)
                    else:
                        logger.debug("Nothing to process - go to next trigger.")
                        pass

                for infile in processed_list:
                       if not os.path.exists(infile):
                           processed_list.remove(infile)

                if not dry_run:
                    functions.dump_obj_to_pickle(processed_list, processed_list_filename)
                    functions.dump_obj_to_pickle(processed_info, processed_info_filename)

            time.sleep(float(10))

        # except Exception, e:
        #     logger.fatal(str(e))
        #     exit(1)
    exit(0)
 def test_restore_obj_from_pickle(self):
     functions.dump_obj_to_pickle(self.processed_info,
                                  self.processed_info_filename)
     result = functions.restore_obj_from_pickle(
         self.processed_info, self.processed_info_filename)
     self.assertEqual(result, self.processed_info)