Example #1
0
    def Test_get_active_internet_sources(self):
        internet_sources = querydb.get_active_internet_sources()
        logger.info("Internet sources are: %s", internet_sources)
        for internet_source in internet_sources:
            print internet_source.url

        self.assertEqual(1, 1)
Example #2
0
    def Test_get_active_internet_sources(self):
        internet_sources = querydb.get_active_internet_sources()
        logger.info("Internet sources are: %s", internet_sources)
        for internet_source in internet_sources:
            print internet_source.url

        self.assertEqual(1, 1)
Example #3
0
    def testLocal_EOS_JEODESK_OLCI(self):
        source_active = False
        list_internet_id = ['EOS:S3A:OLCI:WRR', 'EOS:S3B:OLCI:WRR']
        #internet_id = 'EOS:S3A:OLCI:WRR'
        start_date_dyn = -5
        end_date_dyn = -1

        for internet_id in list_internet_id:

            internet_sources = querydb.get_active_internet_sources()
            for s in internet_sources:
                if s.internet_id == internet_id:
                    internet_source = s
                    source_active = True

            if source_active:
                my_source = SourceEOS(
                    internet_id=internet_id,
                    url=internet_source.url,
                    descriptive_name="OLCI WRR",
                    include_files_expression=internet_source.
                    include_files_expression,
                    pull_frequency=internet_source.pull_frequency,
                    user_name=internet_source.user_name,
                    password=internet_source.password,
                    start_date=start_date_dyn,
                    end_date=end_date_dyn,
                    frequency_id=internet_source.frequency_id,
                    type=internet_source.type,
                    files_filter_expression=internet_source.
                    files_filter_expression,
                    https_params=internet_source.https_params)

                productcode = 'olci-wrr'
                productversion = 'V02.0'
                product = {
                    "productcode": productcode,
                    "version": productversion
                }

                result = get_internet.loop_get_internet(
                    test_one_source=internet_id)
                self.assertEqual(0, 0)
Example #4
0
    def testLocal_EOS_JEODESK_SLSTR(self):
        source_active = False
        list_internet_id = ['EOS:S3A:SLSTR:WST', 'EOS:S3B:SLSTR:WST']
        start_date_dyn = -5
        end_date_dyn = -1

        internet_sources = querydb.get_active_internet_sources()

        for internet_id in list_internet_id:
            for s in internet_sources:
                if s.internet_id == internet_id:
                    internet_source = s
                    source_active = True

            if source_active:
                my_source = SourceEOS(
                    internet_id=internet_id,
                    url=internet_source.url,
                    descriptive_name='sentinel',
                    include_files_expression=internet_source.
                    include_files_expression,
                    pull_frequency=internet_source.pull_frequency,
                    user_name=internet_source.user_name,
                    password=internet_source.password,
                    start_date=start_date_dyn,
                    end_date=end_date_dyn,
                    frequency_id=internet_source.frequency_id,
                    type=internet_source.type,
                    files_filter_expression=internet_source.
                    files_filter_expression,
                    https_params=internet_source.https_params)

                productcode = 'slstr-sst'
                productversion = '1.0'
                product = {
                    "productcode": productcode,
                    "version": productversion
                }
                # Test download (dynamic dates

                result = get_internet.loop_get_internet(
                    test_one_source=internet_id)
                self.assertEqual(0, 0)
Example #5
0
def testLocal_EOS_JEODESK_SLSTR(self):
    source_active = False
    internet_id = 'EOS:S3A:SLSTR:WST'
    start_date_fixed = 20200301
    end_date_fixed = 20200310
    start_date_dyn = -5
    end_date_dyn = -3
    file_to_check = '32e61b08-0bcb-4d0a-a06e-f3d499dfb5fc/S3A_SL_2_WST____20200310T073813_20200310T091913_20200311T185257_6059_056_006______MAR_O_NT_003'

    internet_sources = querydb.get_active_internet_sources()
    for s in internet_sources:
        if s.internet_id == internet_id:
            internet_source = s
            source_active = True

    if source_active:
        my_source = Source(
            internet_id=internet_id,
            url=internet_source.url,
            descriptive_name='sentinel',
            include_files_expression=internet_source.include_files_expression,
            pull_frequency=internet_source.pull_frequency,
            user_name=internet_source.user_name,
            password=internet_source.password,
            start_date=start_date_dyn,
            end_date=end_date_dyn,
            frequency_id=internet_source.frequency_id,
            type=internet_source.type,
            files_filter_expression=internet_source.files_filter_expression,
            https_params=internet_source.https_params)

        productcode = 'slstr-sst'
        productversion = '1.0'
        product = {"productcode": productcode, "version": productversion}
        # Test download (dynamic dates
        if True:
            result = loop_get_internet(test_one_source=internet_id,
                                       my_source=my_source)
            self.assertEqual(result, 0)
Example #6
0
def testLocal_EOS_JEODESK_OLCI(self):
    source_active = False
    internet_id = 'EOS:S3A:OLCI:WRR'
    start_date_fixed = 20200301
    end_date_fixed = 20200310
    start_date_dyn = -2
    end_date_dyn = -1
    file_to_check = '44c285d7-3809-4810-836e-510ee52f326a/S3A_OL_2_WRR____20200310T065044_20200310T073438_20200311T133228_2634_056_006______MAR_O_NT_002'

    internet_sources = querydb.get_active_internet_sources()
    for s in internet_sources:
        if s.internet_id == internet_id:
            internet_source = s
            source_active = True

    if source_active:
        my_source = Source(
            internet_id=internet_id,
            url=internet_source.url,
            descriptive_name="OLCI WRR",
            include_files_expression=internet_source.include_files_expression,
            pull_frequency=internet_source.pull_frequency,
            user_name=internet_source.user_name,
            password=internet_source.password,
            start_date=start_date_dyn,
            end_date=end_date_dyn,
            frequency_id=internet_source.frequency_id,
            type=internet_source.type,
            files_filter_expression=internet_source.files_filter_expression,
            https_params=internet_source.https_params)

        productcode = 'olci-wrr'
        productversion = 'V02.0'
        product = {"productcode": productcode, "version": productversion}

        if True:
            result = loop_get_internet(test_one_source=internet_id,
                                       my_source=my_source)
            self.assertEqual(result, 0)
Example #7
0
def loop_get_internet(dry_run=False):

    global processed_list_filename, processed_list
    global processed_info_filename, processed_info

    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGILL, signal_handler)

    logger.info("Starting retrieving data from INTERNET.")

    while True:
        output_dir = es_constants.ingest_dir
        logger.debug("Check if the Ingest Server input directory : %s exists.", output_dir)
        if not os.path.exists(output_dir):
            logger.fatal("The Ingest Server input directory : %s doesn't exists.", output_dir)
            exit(1)

        if not os.path.exists(es_constants.processed_list_int_dir):
            os.mkdir(es_constants.processed_list_int_dir)

        while 1:

            try:
                time_sleep = user_def_sleep
                logger.debug("Sleep time set to : %s.", time_sleep)
            except:
                logger.warning("Sleep time not defined. Setting to default=1min. Continue.")
                time_sleep = 60

            logger.debug("Reading active INTERNET data sources from database")
            internet_sources_list = querydb.get_active_internet_sources(echo=echo_query)

            # Loop over active triggers
            try:
              for internet_source in internet_sources_list:
                logger.debug("Processing internet source  %s.", internet_source.descriptive_name)

                processed_list_filename = es_constants.get_internet_processed_list_prefix+str(internet_source.internet_id)+'.list'
                processed_info_filename = es_constants.get_internet_processed_list_prefix+str(internet_source.internet_id)+'.info'

                # Create objects for list and info
                processed_list = []
                processed_info = {'length_proc_list': 0,
                                  'time_latest_exec': datetime.datetime.now(),
                                  'time_latest_copy': datetime.datetime.now()}
                # Restore/Create List
                processed_list=functions.restore_obj_from_pickle(processed_list, processed_list_filename)
                # Restore/Create Info
                processed_info=functions.restore_obj_from_pickle(processed_info, processed_info_filename)
                # Update processing time (in case it is restored)
                processed_info['time_latest_exec']=datetime.datetime.now()

                logger.debug("Create current list of file to process for source %s.", internet_source.internet_id)
                if internet_source.user_name is None:
                    user_name = "anonymous"
                else:
                    user_name = internet_source.user_name
                
                if internet_source.password is None:
                    password = "******"
                else:
                    password = internet_source.password
                    
                usr_pwd = str(user_name)+':'+str(password)
                
                logger.debug("              Url is %s.", internet_source.url)
                logger.debug("              usr/pwd is %s.", usr_pwd)
                logger.debug("              regex   is %s.", internet_source.include_files_expression)

                internet_type = internet_source.type

                if internet_type == 'ftp':
                    # Note that the following list might contain sub-dirs (it reflects full_regex)
                    current_list = get_list_matching_files_dir_ftp(str(internet_source.url), str(usr_pwd), str(internet_source.include_files_expression))

                elif internet_type == 'http_tmpl':
                    # Manage the dates:start_date is mandatory .. end_date replaced by 'today' if missing/wrong
                    try:
                      if functions.is_date_yyyymmdd(str(internet_source.start_date), silent=True):
                        datetime_start=datetime.datetime.strptime(str(internet_source.start_date),'%Y%m%d')
                      else:
                        raise Exception("Start Date not valid")
                    except:
                        raise Exception("Start Date not valid")
                    try:
                      if functions.is_date_yyyymmdd(str(internet_source.end_date), silent=True):
                        datetime_end=datetime.datetime.strptime(str(internet_source.end_date),'%Y%m%d')
                      else:
                        datetime_end=datetime.datetime.today()
                    except:
                        pass
                    # Create the full filename from a 'template' which contains
                    try:
                        current_list = build_list_matching_for_http(str(internet_source.url),
                                                                    str(internet_source.include_files_expression),
                                                                    datetime_start,
                                                                    datetime_end,
                                                                    str(internet_source.frequency_id))
                    except:
                         logger.error("Error in creating date lists. Continue")

                logger.debug("Number of files currently available for source %s is %i", internet_source.internet_id, len(current_list))
                if len(current_list) > 0:
                    logger.debug("Number of files already copied for trigger %s is %i", internet_source.internet_id, len(processed_list))
                    listtoprocess = []
                    for current_file in current_list:
                        if len(processed_list) == 0:
                            listtoprocess.append(current_file)
                        else:
                            #if os.path.basename(current_file) not in processed_list: -> save in .list subdirs as well !!
                            if current_file not in processed_list:
                                listtoprocess.append(current_file)

                    logger.debug("Number of files to be copied for trigger %s is %i", internet_source.internet_id, len(listtoprocess))
                    if listtoprocess != set([]):
                         logger.debug("Loop on the found files.")
                         if not dry_run:
                             for filename in list(listtoprocess):
                                 logger.debug("Processing file: "+str(internet_source.url)+os.path.sep+filename)
                                 try:
                                    result = get_file_from_url(str(internet_source.url)+os.path.sep+filename, target_file=os.path.basename(filename), target_dir=es_constants.ingest_dir, userpwd=str(usr_pwd))
                                    if not result:
                                        logger.info("File %s copied.", filename)
                                        processed_list.append(filename)
                                 except:
                                   logger.warning("Problem while copying file: %s.", filename)
                         else:
                             logger.info('Dry_run is set: do not get files')

                if not dry_run:
                    functions.dump_obj_to_pickle(processed_list, processed_list_filename)
                    functions.dump_obj_to_pickle(processed_info, processed_info_filename)

              sleep(float(user_def_sleep))
            # Loop over sources
            except Exception as inst:
              logger.error("Error while processing source %s. Continue" % internet_source.descriptive_name)
              sleep(float(user_def_sleep))

    exit(0)
Example #8
0
def loop_get_internet(dry_run=False, test_one_source=False):

    global processed_list_filename, processed_list
    global processed_info_filename, processed_info

    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGILL, signal_handler)

    logger.info("Starting retrieving data from INTERNET.")

    while True:
        output_dir = es_constants.get_internet_output_dir
        logger.debug("Check if the Ingest Server input directory : %s exists.",
                     output_dir)
        if not os.path.exists(output_dir):
            # ToDo: create output_dir - ingest directory
            logger.fatal(
                "The Ingest Server input directory : %s doesn't exists.",
                output_dir)
            exit(1)

        if not os.path.exists(es_constants.processed_list_int_dir):
            os.mkdir(es_constants.processed_list_int_dir)

        while 1:

            # Check internet connection (or continue)
            if not functions.internet_on():
                logger.error(
                    "The computer is not currently connected to the internet. Wait 1 minute."
                )
                time.sleep(60)

            else:
                try:
                    time_sleep = user_def_sleep
                    logger.debug("Sleep time set to : %s.", time_sleep)
                except:
                    logger.warning(
                        "Sleep time not defined. Setting to default=1min. Continue."
                    )
                    time_sleep = 60

                logger.info(
                    "Reading active INTERNET data sources from database")
                internet_sources_list = querydb.get_active_internet_sources()

                # Loop over active triggers
                for internet_source in internet_sources_list:
                    try:

                        if test_one_source and (internet_source.internet_id !=
                                                test_one_source):
                            logger.info(
                                "Running in test mode, and source is not %s. Continue.",
                                test_one_source)
                            continue
                        execute_trigger = True
                        # Get this from the pads database table (move from internet_source 'pull_frequency' to the pads table,
                        # so that it can be exploited by eumetcast triggers as well). It is in minute
                        pull_frequency = internet_source.pull_frequency

                        # Manage the case of files to be continuously downloaded (delay < 0)
                        if pull_frequency < 0:
                            do_not_consider_processed_list = True
                            delay_time_source_minutes = -pull_frequency
                        else:
                            do_not_consider_processed_list = False
                            delay_time_source_minutes = pull_frequency

                        if sys.platform == 'win32':
                            internet_id = str(
                                internet_source.internet_id).replace(':', '_')
                        else:
                            internet_id = str(internet_source.internet_id)

                        logger_spec = log.my_logger('apps.get_internet.' +
                                                    internet_id)
                        logger.info("Processing internet source  %s.",
                                    internet_source.descriptive_name)

                        # Create objects for list and info
                        processed_info_filename = es_constants.get_internet_processed_list_prefix + str(
                            internet_id) + '.info'

                        # Restore/Create Info
                        processed_info = None
                        processed_info = functions.restore_obj_from_pickle(
                            processed_info, processed_info_filename)
                        if processed_info is not None:
                            # Check the delay
                            current_delta = datetime.datetime.now(
                            ) - processed_info['time_latest_exec']
                            current_delta_minutes = int(current_delta.seconds /
                                                        60)
                            if current_delta_minutes < delay_time_source_minutes:
                                logger.debug(
                                    "Still waiting up to %i minute - since latest execution.",
                                    delay_time_source_minutes)
                                execute_trigger = False
                        else:
                            # Create processed_info object
                            processed_info = {
                                'lenght_proc_list': 0,
                                'time_latest_exec': datetime.datetime.now(),
                                'time_latest_copy': datetime.datetime.now()
                            }
                            execute_trigger = True

                        if execute_trigger:
                            # Restore/Create List
                            processed_list = []
                            if not do_not_consider_processed_list:
                                processed_list_filename = es_constants.get_internet_processed_list_prefix + internet_id + '.list'
                                processed_list = functions.restore_obj_from_pickle(
                                    processed_list, processed_list_filename)

                            processed_info[
                                'time_latest_exec'] = datetime.datetime.now()

                            logger.debug(
                                "Create current list of file to process for source %s.",
                                internet_source.internet_id)
                            if internet_source.user_name is None:
                                user_name = "anonymous"
                            else:
                                user_name = internet_source.user_name

                            if internet_source.password is None:
                                password = "******"
                            else:
                                password = internet_source.password

                            usr_pwd = str(user_name) + ':' + str(password)

                            logger_spec.debug("              Url is %s.",
                                              internet_source.url)
                            logger_spec.debug("              usr/pwd is %s.",
                                              usr_pwd)
                            logger_spec.debug(
                                "              regex   is %s.",
                                internet_source.include_files_expression)

                            internet_type = internet_source.type

                            if internet_type == 'ftp' or internet_type == 'http':
                                # Manage the end_date (added for MODIS_FIRMS)
                                if (internet_source.end_date != ''):
                                    end_date = internet_source.end_date
                                else:
                                    end_date = None
                                # Note that the following list might contain sub-dirs (it reflects full_regex)
                                try:
                                    current_list = get_list_matching_files(
                                        str(internet_source.url),
                                        str(usr_pwd),
                                        str(internet_source.
                                            include_files_expression),
                                        internet_type,
                                        end_date=end_date)
                                except:
                                    logger.error(
                                        "Error in creating file lists. Continue"
                                    )
                                    continue

                            elif internet_type == 'http_tmpl':
                                # Create the full filename from a 'template' which contains
                                try:
                                    current_list = build_list_matching_files_tmpl(
                                        str(internet_source.url),
                                        str(internet_source.
                                            include_files_expression),
                                        internet_source.start_date,
                                        internet_source.end_date,
                                        str(internet_source.frequency_id))
                                except:
                                    logger.error(
                                        "Error in creating date lists. Continue"
                                    )
                                    continue

                            elif internet_type == 'motu_client':
                                # Create the full filename from a 'template' which contains
                                try:
                                    current_list = build_list_matching_files_motu(
                                        str(internet_source.url),
                                        str(internet_source.
                                            include_files_expression),
                                        internet_source.start_date,
                                        internet_source.end_date,
                                        str(internet_source.frequency_id),
                                        str(internet_source.user_name),
                                        str(internet_source.password),
                                        str(internet_source.
                                            files_filter_expression),
                                    )

                                except:
                                    logger.error(
                                        "Error in creating motu_client lists. Continue"
                                    )
                                    continue

                            # elif internet_type == 'sentinel_sat':
                            #     # Create the full filename from a 'template' which contains
                            #     try:
                            #         current_list = build_list_matching_files_sentinel_sat(str(internet_source.url),
                            #                                                     str(internet_source.include_files_expression),
                            #                                                     internet_source.start_date,
                            #                                                     internet_source.end_date,
                            #                                                     str(internet_source.frequency_id),
                            #                                                     str(internet_source.user_name),
                            #                                                     str(internet_source.password),
                            #                                                     #str(internet_source.files_filter_expression),
                            #                                                       )
                            #
                            #     except:
                            #         logger.error("Error in creating sentinel_sat lists. Continue")
                            #         continue

                            elif internet_type == 'local':
                                logger.info(
                                    "This internet source is meant to copy data on local filesystem"
                                )
                                try:
                                    current_list = get_list_matching_files_dir_local(
                                        str(internet_source.url),
                                        str(internet_source.
                                            include_files_expression))
                                except:
                                    logger.error(
                                        "Error in creating date lists. Continue"
                                    )
                                    continue

                            elif internet_type == 'offline':
                                logger.info(
                                    "This internet source is meant to work offline (GoogleDrive)"
                                )
                                current_list = []
                            else:
                                logger.error(
                                    "No correct type for this internet source type: %s"
                                    % internet_type)
                                current_list = []
                            logger_spec.debug(
                                "Number of files currently available for source %s is %i",
                                internet_id, len(current_list))

                            if len(current_list) > 0:
                                logger_spec.debug(
                                    "Number of files already copied for trigger %s is %i",
                                    internet_id, len(processed_list))
                                listtoprocess = []
                                for current_file in current_list:
                                    if len(processed_list) == 0:
                                        listtoprocess.append(current_file)
                                    else:
                                        #if os.path.basename(current_file) not in processed_list: -> save in .list subdirs as well !!
                                        if current_file not in processed_list:
                                            listtoprocess.append(current_file)

                                logger_spec.debug(
                                    "Number of files to be copied for trigger %s is %i",
                                    internet_id, len(listtoprocess))
                                if listtoprocess != set([]):
                                    # # Debug
                                    # toprint=''
                                    # for elem in listtoprocess:
                                    #    toprint+=elem+','
                                    #    logger_spec.info('List in get_list_matching_files: %s' % toprint)

                                    logger_spec.debug(
                                        "Loop on the found files.")
                                    if not dry_run:
                                        for filename in list(listtoprocess):
                                            logger_spec.debug(
                                                "Processing file: " +
                                                str(internet_source.url) +
                                                os.path.sep + filename)
                                            try:
                                                if internet_type == 'local':
                                                    shutil.copyfile(
                                                        str(internet_source[
                                                            'url']) +
                                                        os.path.sep + filename,
                                                        es_constants.ingest_dir
                                                        + os.path.basename(
                                                            filename))
                                                    result = 0
                                                elif internet_type == 'motu_client':
                                                    result = get_file_from_motu_command(
                                                        str(filename),
                                                        #target_file=internet_source.files_filter_expression,
                                                        target_dir=es_constants
                                                        .ingest_dir,
                                                        userpwd=str(usr_pwd))

                                                # elif internet_type == 'sentinel_sat':
                                                #     result = get_file_from_sentinelsat_url(str(filename),
                                                #                                            target_dir=es_constants.ingest_dir)
                                                else:
                                                    result = get_file_from_url(
                                                        str(internet_source.url
                                                            ) + os.path.sep +
                                                        filename,
                                                        target_file=os.path.
                                                        basename(filename),
                                                        target_dir=es_constants
                                                        .ingest_dir,
                                                        userpwd=str(usr_pwd))
                                                if not result:
                                                    logger_spec.info(
                                                        "File %s copied.",
                                                        filename)
                                                    processed_list.append(
                                                        filename)
                                                else:
                                                    logger_spec.warning(
                                                        "File %s not copied: ",
                                                        filename)
                                            except:
                                                logger_spec.warning(
                                                    "Problem while copying file: %s.",
                                                    filename)
                                    else:
                                        logger_spec.info(
                                            'Dry_run is set: do not get files')

                            if not dry_run:
                                functions.dump_obj_to_pickle(
                                    processed_list, processed_list_filename)
                                functions.dump_obj_to_pickle(
                                    processed_info, processed_info_filename)

                        sleep(float(user_def_sleep))
                    # Loop over sources
                    except Exception as inst:
                        logger.error(
                            "Error while processing source %s. Continue" %
                            internet_source.descriptive_name)
                sleep(float(user_def_sleep))

    exit(0)