Beispiel #1
0
def process_channel(channel_id, aims_xml_info, level_qc):
    """ Downloads all the data available for one channel_id and moves the file to a wip_path dir
    channel_id(str)
    aims_xml_info(dict)
    level_qc(int)"""
    channel_id_info = aims_xml_info[channel_id]
    from_date = channel_id_info['from_date']
    thru_date = channel_id_info['thru_date']

    thru_date_already_downloaded = get_last_downloaded_date_channel(channel_id, level_qc, from_date)

    if not has_channel_already_been_downloaded(channel_id, level_qc) or \
            datetime.strptime(thru_date, "%Y-%m-%dT%H:%M:%SZ") > \
            datetime.strptime(thru_date_already_downloaded, "%Y-%m-%dT%H:%M:%SZ"):
        logger.info('>> QC%s - Processing channel %s' % (str(level_qc),
                                                         str(channel_id)))

        if datetime.strptime(thru_date, "%Y-%m-%dT%H:%M:%SZ") > \
            datetime.strptime(thru_date_already_downloaded, "%Y-%m-%dT%H:%M:%SZ"):
            logger.info('>> QC%s - New data available for channel %s.\nLatest date downloaded: %s'
                        ' \nNew date available: %s' % (str(level_qc),
                                                       str(channel_id),
                                                       thru_date_already_downloaded,
                                                       thru_date))

        netcdf_tmp_file_path = download_channel(channel_id, from_date,
                                                thru_date, level_qc)
        contact_aims_msg     = "Process of channel aborted - CONTACT AIMS"

        if netcdf_tmp_file_path is None:
            logger.error('   Channel %s - not valid zip file - %s'
                         % (str(channel_id), contact_aims_msg))
            return False

        if is_no_data_found(netcdf_tmp_file_path):
            logger.error('   Channel %s - NO_DATA_FOUND file in Zip file - %s'
                         % (str(channel_id), contact_aims_msg))
            shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
            return False

        if not modify_soop_trv_netcdf(netcdf_tmp_file_path, channel_id_info):
            logger.error('   Channel %s - Could not modify the NetCDF file - \
                         %s' % (str(channel_id), contact_aims_msg))
            shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
            return False

        main_var = get_main_soop_trv_var(netcdf_tmp_file_path)
        if has_var_only_fill_value(netcdf_tmp_file_path, main_var):
            logger.error('   Channel %s - _Fillvalues only in main variable - \
                         %s' % (str(channel_id), contact_aims_msg))
            shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
            return False

        if _is_lat_lon_values_outside_boundaries(netcdf_tmp_file_path):
            logger.error('   Channel %s - Lat/Lon values outside of boundaries \
                         -%s' % (str(channel_id), contact_aims_msg))
            shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
            return False

        if not is_time_monotonic(netcdf_tmp_file_path):
            logger.error('   Channel %s - TIME value is not strickly monotonic \
                         - %s' % (str(channel_id), contact_aims_msg))
            shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
            return False

        checker_retval = pass_netcdf_checker(netcdf_tmp_file_path, tests=['cf:latest', 'imos:1.3'])
        if not checker_retval:
            wip_path = os.environ.get('data_wip_path')
            logger.error('   Channel %s - File does not pass CF/IMOS \
                         compliance checker - %s' %
                         (str(channel_id), contact_aims_msg))
            shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, 'errors'))
            logger.error('   File copied to %s for debugging'
                         % (os.path.join(wip_path, 'errors',
                                         os.path.basename(netcdf_tmp_file_path)
                                         )))
            shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
            return False

        move_to_incoming(netcdf_tmp_file_path)
        return True

    else:
        logger.info('>> QC%s - Channel %s already processed' % (str(level_qc),
                                                                str(channel_id)))

        return False
Beispiel #2
0
def process_monthly_channel(channel_id, aims_xml_info, level_qc):
    """ Downloads all the data available for one channel_id and moves the file to a wip_path dir
    channel_id(str)
    aims_xml_info(tuple)
    level_qc(int)

    aims_service : 1   -> FAIMMS data
                   100 -> SOOP TRV data
                   300 -> NRS DATA
    for monthly data download, only 1 and 300 should be use
    """
    logger.info('>> QC%s - Processing channel %s' % (str(level_qc), str(channel_id)))
    channel_id_info = aims_xml_info[channel_id]
    from_date = channel_id_info['from_date']
    thru_date = channel_id_info['thru_date']
    [start_dates, end_dates] = create_list_of_dates_to_download(channel_id, level_qc, from_date, thru_date)

    if len(start_dates) != 0:
        # download monthly file
        for start_date, end_date in zip(start_dates, end_dates):
            start_date           = start_date.strftime("%Y-%m-%dT%H:%M:%SZ")
            end_date             = end_date.strftime("%Y-%m-%dT%H:%M:%SZ")
            netcdf_tmp_file_path = download_channel(channel_id, start_date, end_date, level_qc)
            contact_aims_msg     = "Process of channel aborted - CONTACT AIMS"

            if netcdf_tmp_file_path is None:
                logger.error('   Channel %s - not valid zip file - %s' % (str(channel_id), contact_aims_msg))
                break

            # NO_DATA_FOUND file only means there is no data for the selected time period. Could be some data afterwards
            if is_no_data_found(netcdf_tmp_file_path):
                logger.warning('   Channel %s - No data for the time period:%s - %s' % (str(channel_id), start_date, end_date))
                shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
            else:
                if is_time_var_empty(netcdf_tmp_file_path):
                    logger.error('   Channel %s - No values in TIME variable - %s' % (str(channel_id), contact_aims_msg))
                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                    break

                if not modify_anmn_nrs_netcdf(netcdf_tmp_file_path, channel_id_info):
                    logger.error('   Channel %s - Could not modify the NetCDF file - Process of channel aborted' % str(channel_id))
                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                    break

                main_var = get_main_netcdf_var(netcdf_tmp_file_path)
                if has_var_only_fill_value(netcdf_tmp_file_path, main_var):
                    logger.error('   Channel %s - _Fillvalues only in main variable - %s' % (str(channel_id), contact_aims_msg))
                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                    break

                if get_anmn_nrs_site_name(netcdf_tmp_file_path) == []:
                    logger.error('   Channel %s - Unknown site_code gatt value - %s' % (str(channel_id), contact_aims_msg))
                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                    break

                if not is_time_monotonic(netcdf_tmp_file_path):
                    logger.error('   Channel %s - TIME value is not strickly monotonic - %s' % (str(channel_id), contact_aims_msg))
                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                    break

                # check every single file of the list. We don't assume that if one passes, all pass ... past proved this
                wip_path = os.environ.get('data_wip_path')
                checker_retval = pass_netcdf_checker(netcdf_tmp_file_path, tests=['cf:latest', 'imos:1.3'])
                if not checker_retval:
                    logger.error('   Channel %s - File does not pass CF/IMOS compliance checker - Process of channel aborted' % str(channel_id))
                    shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, 'errors'))
                    logger.error('   File copied to %s for debugging' % (os.path.join(wip_path, 'errors', os.path.basename(netcdf_tmp_file_path))))
                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                    break

                netcdf_tmp_file_path = fix_data_code_from_filename(netcdf_tmp_file_path)
                netcdf_tmp_file_path = fix_provider_code_from_filename(netcdf_tmp_file_path, 'IMOS_ANMN')

                if re.search('IMOS_ANMN_[A-Z]{1}_', netcdf_tmp_file_path) is None:
                    logger.error('   Channel %s - File name Data code does not pass REGEX - Process of channel aborted' % str(channel_id))
                    shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, 'errors'))
                    logger.error('   File copied to %s for debugging' % (os.path.join(wip_path, 'errors', os.path.basename(netcdf_tmp_file_path))))
                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                    break

                move_to_tmp_incoming(netcdf_tmp_file_path)

                if TESTING:
                    # The 2 next lines download the first month only for every single channel. This is only used for testing
                    save_channel_info(channel_id, aims_xml_info, level_qc, end_date)
                    break

            save_channel_info(channel_id, aims_xml_info, level_qc, end_date)

    else:
        logger.info('QC%s - Channel %s already up to date' % (str(level_qc), str(channel_id)))

    close_logger(logger)
Beispiel #3
0
def process_channel(channel_id, aims_xml_info, level_qc):
    """ Downloads all the data available for one channel_id and moves the file to a wip_path dir
    channel_id(str)
    aims_xml_info(dict)
    level_qc(int)"""
    channel_id_info = aims_xml_info[channel_id]
    from_date = channel_id_info['from_date']
    thru_date = channel_id_info['thru_date']

    thru_date_already_downloaded = get_last_downloaded_date_channel(
        channel_id, level_qc, from_date)

    if not has_channel_already_been_downloaded(channel_id, level_qc) or \
            datetime.strptime(thru_date, "%Y-%m-%dT%H:%M:%SZ") > \
            datetime.strptime(thru_date_already_downloaded, "%Y-%m-%dT%H:%M:%SZ"):

        logger.info('QC{level_qc} - Processing channel {channel_id}'.format(
            channel_id=str(channel_id), level_qc=str(level_qc)))

        if datetime.strptime(thru_date, "%Y-%m-%dT%H:%M:%SZ") > \
                datetime.strptime(thru_date_already_downloaded, "%Y-%m-%dT%H:%M:%SZ"):
            logger.info('QC%s - Channel %s: Latest date downloaded: [%s]'
                        ' - New date available: [%s]' %
                        (str(level_qc), str(channel_id),
                         thru_date_already_downloaded, thru_date))

        netcdf_tmp_file_path = download_channel(channel_id, from_date,
                                                thru_date, level_qc)

        contact_aims_msg = "Process of channel aborted - CONTACT AIMS"

        if not netcdf_tmp_file_path:
            logger.error(
                'Channel {channel_id}: not valid zip file - {message}'.format(
                    channel_id=str(channel_id), message=contact_aims_msg))
            return False

        try:
            if is_no_data_found(netcdf_tmp_file_path):
                logger.error(
                    'Channel {channel_id}: NO_DATA_FOUND file in Zip file -{message}'
                    .format(channel_id=str(channel_id),
                            message=contact_aims_msg))
                return False

            if not modify_soop_trv_netcdf(netcdf_tmp_file_path,
                                          channel_id_info):
                logger.error(
                    'Channel {channel_id}:Could not modify the NetCDF file - \
                             {message}'.format(channel_id=str(channel_id),
                                               message=contact_aims_msg))
                return False

            main_var = get_main_soop_trv_var(netcdf_tmp_file_path)
            if has_var_only_fill_value(netcdf_tmp_file_path, main_var):
                logger.error(
                    'Channel {channel_id}: _Fillvalues only in main variable - \
                             {message}'.format(channel_id=str(channel_id),
                                               message=contact_aims_msg))
                return False

            if _is_lat_lon_values_outside_boundaries(netcdf_tmp_file_path):
                logger.error(
                    'Channel {channel_id}: Lat/Lon values outside of boundaries \
                             - {message}'.format(channel_id=str(channel_id),
                                                 message=contact_aims_msg))
                return False

            if not is_time_monotonic(netcdf_tmp_file_path):
                logger.error(
                    'Channel {channel_id}: TIME value is not strickly monotonic \
                             - {message}'.format(channel_id=str(channel_id),
                                                 message=contact_aims_msg))
                return False

            checker_retval = pass_netcdf_checker(
                netcdf_tmp_file_path, tests=['cf:latest', 'imos:1.3'])
            if not checker_retval:
                wip_path = os.environ.get('data_wip_path')
                logger.error(
                    'Channel {channel_id}: File does not pass CF/IMOS \
                             compliance checker - {message}'.format(
                        channel_id=str(channel_id), message=contact_aims_msg))
                shutil.copy(netcdf_tmp_file_path,
                            os.path.join(wip_path, 'errors'))
                logger.error(
                    'File copied to %s for debugging' %
                    (os.path.join(wip_path, 'errors',
                                  os.path.basename(netcdf_tmp_file_path))))
                return False

            move_to_incoming(netcdf_tmp_file_path)
            return True
        finally:
            # ensure temporary file and it's parent directory are removed
            try:
                shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
            except OSError as e:
                if e.errno != errno.ENOENT:
                    raise

    else:
        logger.info(
            'QC{level_qc} - Channel {channel_id}: already processed'.format(
                channel_id=str(channel_id), level_qc=str(level_qc)))

        return False
Beispiel #4
0
def process_monthly_channel(channel_id, aims_xml_info, level_qc):
    """ Downloads all the data available for one channel_id and moves the file to a wip_path dir
    channel_id(str)
    aims_xml_info(tuple)
    level_qc(int)

    aims_service : 1   -> FAIMMS data
                   100 -> SOOP TRV data
                   300 -> NRS DATA
    for monthly data download, only 1 and 300 should be use
    """
    logger.info('>> QC%s - Processing channel %s' % (str(level_qc), str(channel_id)))
    channel_id_info = aims_xml_info[channel_id]
    from_date = channel_id_info['from_date']
    thru_date = channel_id_info['thru_date']
    [start_dates, end_dates] = create_list_of_dates_to_download(channel_id, level_qc, from_date, thru_date)

    if len(start_dates) != 0:
        # download monthly file
        for start_date, end_date in zip(start_dates, end_dates):
            start_date           = start_date.strftime("%Y-%m-%dT%H:%M:%SZ")
            end_date             = end_date.strftime("%Y-%m-%dT%H:%M:%SZ")
            netcdf_tmp_file_path = download_channel(channel_id, start_date, end_date, level_qc)
            contact_aims_msg     = "Process of channel aborted - CONTACT AIMS"

            if netcdf_tmp_file_path is None:
                logger.error('   Channel %s - not valid zip file - %s' % (str(channel_id), contact_aims_msg))
                break

            # NO_DATA_FOUND file only means there is no data for the selected time period. Could be some data afterwards
            if is_no_data_found(netcdf_tmp_file_path):
                logger.warning('   Channel %s - No data for the time period:%s - %s' % (str(channel_id), start_date, end_date))
                shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
            else:
                if is_time_var_empty(netcdf_tmp_file_path):
                    logger.error('   Channel %s - No values in TIME variable - %s' % (str(channel_id), contact_aims_msg))
                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                    break

                if not modify_faimms_netcdf(netcdf_tmp_file_path, channel_id_info):
                    logger.error('   Channel %s - Could not modify the NetCDF file - Process of channel aborted' % str(channel_id))
                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                    break

                main_var = get_main_faimms_var(netcdf_tmp_file_path)
                if has_var_only_fill_value(netcdf_tmp_file_path, main_var):
                    logger.error('   Channel %s - _Fillvalues only in main variable - %s' % (str(channel_id), contact_aims_msg))
                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                    break

                if get_faimms_site_name(netcdf_tmp_file_path) == [] or get_faimms_platform_type(netcdf_tmp_file_path) == []:
                    logger.error('   Channel %s - Unknown site_code gatt value - %s' % (str(channel_id), contact_aims_msg))
                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                    break

                if not is_time_monotonic(netcdf_tmp_file_path):
                    logger.error('   Channel %s - TIME value is not strickly monotonic - %s' % (str(channel_id), contact_aims_msg))
                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                    break

                # check every single file of the list. We don't assume that if one passes, all pass ... past proved this
                wip_path = DATA_WIP_PATH
                checker_retval = pass_netcdf_checker(netcdf_tmp_file_path, tests=['cf:latest', 'imos:1.3'])
                if not checker_retval:
                    logger.error('   Channel %s - File does not pass CF/IMOS compliance checker - Process of channel aborted' % str(channel_id))
                    shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, 'errors'))
                    logger.error('   File copied to %s for debugging' % (os.path.join(wip_path, 'errors', os.path.basename(netcdf_tmp_file_path))))
                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                    break

                netcdf_tmp_file_path = fix_data_code_from_filename(netcdf_tmp_file_path)
                netcdf_tmp_file_path = fix_provider_code_from_filename(netcdf_tmp_file_path, 'IMOS_FAIMMS')

                if re.search('IMOS_FAIMMS_[A-Z]{1}_', netcdf_tmp_file_path) is None:
                    logger.error('   Channel %s - File name Data code does not pass REGEX - Process of channel aborted' % str(channel_id))
                    shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, 'errors'))
                    logger.error('   File copied to %s for debugging' % (os.path.join(wip_path, 'errors', os.path.basename(netcdf_tmp_file_path))))
                    shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
                    break
                move_to_tmp_incoming(netcdf_tmp_file_path)

                if TESTING:
                    # The 2 next lines download the first month only for every single channel. This is only used for testing
                    save_channel_info(channel_id, aims_xml_info, level_qc, end_date)
                    break

            save_channel_info(channel_id, aims_xml_info, level_qc, end_date)

    else:
        logger.info('QC%s - Channel %s already up to date' % (str(level_qc), str(channel_id)))

    close_logger(logger)
Beispiel #5
0
def process_channel(channel_id, aims_xml_info, level_qc):
    """ Downloads all the data available for one channel_id and moves the file to a wip_path dir
    channel_id(str)
    aims_xml_info(dict)
    level_qc(int)"""
    channel_id_info = aims_xml_info[channel_id]
    from_date = channel_id_info['from_date']
    thru_date = channel_id_info['thru_date']

    thru_date_already_downloaded = get_last_downloaded_date_channel(
        channel_id, level_qc, from_date)

    if not has_channel_already_been_downloaded(channel_id, level_qc) or \
            datetime.strptime(thru_date, "%Y-%m-%dT%H:%M:%SZ") > \
            datetime.strptime(thru_date_already_downloaded, "%Y-%m-%dT%H:%M:%SZ"):
        logger.info('>> QC%s - Processing channel %s' %
                    (str(level_qc), str(channel_id)))

        if datetime.strptime(thru_date, "%Y-%m-%dT%H:%M:%SZ") > \
                datetime.strptime(thru_date_already_downloaded, "%Y-%m-%dT%H:%M:%SZ"):
            logger.info(
                '>> QC%s - New data available for channel %s.\nLatest date downloaded: %s'
                ' \nNew date available: %s' %
                (str(level_qc), str(channel_id), thru_date_already_downloaded,
                 thru_date))

        netcdf_tmp_file_path = download_channel(channel_id, from_date,
                                                thru_date, level_qc)
        contact_aims_msg = "Process of channel aborted - CONTACT AIMS"

        if netcdf_tmp_file_path is None:
            logger.error('   Channel %s - not valid zip file - %s' %
                         (str(channel_id), contact_aims_msg))
            return False

        if is_no_data_found(netcdf_tmp_file_path):
            logger.error(
                '   Channel %s - NO_DATA_FOUND file in Zip file - %s' %
                (str(channel_id), contact_aims_msg))
            shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
            return False

        if not modify_soop_trv_netcdf(netcdf_tmp_file_path, channel_id_info):
            logger.error('   Channel %s - Could not modify the NetCDF file - \
                         %s' % (str(channel_id), contact_aims_msg))
            shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
            return False

        main_var = get_main_soop_trv_var(netcdf_tmp_file_path)
        if has_var_only_fill_value(netcdf_tmp_file_path, main_var):
            logger.error('   Channel %s - _Fillvalues only in main variable - \
                         %s' % (str(channel_id), contact_aims_msg))
            shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
            return False

        if _is_lat_lon_values_outside_boundaries(netcdf_tmp_file_path):
            logger.error(
                '   Channel %s - Lat/Lon values outside of boundaries \
                         -%s' % (str(channel_id), contact_aims_msg))
            shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
            return False

        if not is_time_monotonic(netcdf_tmp_file_path):
            logger.error(
                '   Channel %s - TIME value is not strickly monotonic \
                         - %s' % (str(channel_id), contact_aims_msg))
            shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
            return False

        checker_retval = pass_netcdf_checker(netcdf_tmp_file_path,
                                             tests=['cf:latest', 'imos:1.3'])
        if not checker_retval:
            wip_path = os.environ.get('data_wip_path')
            logger.error('   Channel %s - File does not pass CF/IMOS \
                         compliance checker - %s' %
                         (str(channel_id), contact_aims_msg))
            shutil.copy(netcdf_tmp_file_path, os.path.join(wip_path, 'errors'))
            logger.error('   File copied to %s for debugging' % (os.path.join(
                wip_path, 'errors', os.path.basename(netcdf_tmp_file_path))))
            shutil.rmtree(os.path.dirname(netcdf_tmp_file_path))
            return False

        move_to_incoming(netcdf_tmp_file_path)
        return True

    else:
        logger.info('>> QC%s - Channel %s already processed' %
                    (str(level_qc), str(channel_id)))

        return False