コード例 #1
0
ファイル: scrape.py プロジェクト: opencadc/neossat2caom2
def list_for_validate(config):
    """
    :return: A dict, where keys are file names available from the CSA Open Data
        ftp site, and values are the timestamps for the files at the CSA site.
        available from the CSA Open Data ftp site, and values are the
        fully-qualified names at the CSA site, suitable for providing 'pull'
        task type content for a todo file.
    """
    list_fqn = os.path.join(config.working_directory, NEOSSAT_SOURCE_LIST)
    if os.path.exists(list_fqn):
        logging.debug(f'Retrieve content from existing file {list_fqn}')
        temp = mc.read_as_yaml(list_fqn)
        # 0 - False indicates a file, True indicates a directory
        # 1 - timestamp
        cached = {key: [False, value] for key, value in temp.items()}
    else:
        # current will be empty if there's no cache
        cached = _read_cache(config.working_directory)

    ts_s = mc.make_seconds(NEOSSAT_START_DATE)
    temp, ignore_max_date = _append_source_listing(ts_s,
                                                   config.working_directory,
                                                   cached)
    mc.write_as_yaml(temp, list_fqn)

    # remove the fully-qualified path names from the validator list
    # while creating a dictionary where the file name is the key, and the
    # fully-qualified file name at the FTP site is the value
    validator_list = {ii.split('/')[-1]: ii for ii in temp}
    result = {ii.split('/')[-1]: temp[ii] for ii in temp}
    return result, validator_list
コード例 #2
0
def _execute_and_check_list_for_validate(ftp_mock, source_list_fqn,
                                         result_count, cache_count):
    source_dir_fqn = os.path.join(
        test_main_app.TEST_DATA_DIR, scrape.NEOSSAT_DIR_LIST)
    source_fqn = os.path.join(test_main_app.TEST_DATA_DIR,
                              'test_source_dir_listing.csv')
    shutil.copy(source_fqn, source_dir_fqn)

    ftp_mock.return_value.__enter__.return_value.listdir. \
        side_effect = _list_dirs
    ftp_mock.return_value.__enter__.return_value.stat. \
        side_effect = _entry_stats
    getcwd_orig = os.getcwd
    os.getcwd = Mock(return_value=test_main_app.TEST_DATA_DIR)
    try:
        test_config = mc.Config()
        test_config.get_executors()
        scrape.list_for_validate(test_config)

        result = mc.read_as_yaml(source_list_fqn)
        assert result is not None, 'expect a file record'
        assert len(result) == result_count, 'wrong number of entries'
        assert f'{MOCK_DIR}/NEOS_SCI_2017213215701_cord.fits' in result, \
            'wrong content'

        cache_result = scrape._read_cache(test_config.working_directory)
        assert cache_result is not None, 'expected return value'
        assert len(cache_result) == cache_count, \
            'wrong number of cached entries'
        assert f'{MOCK_DIR}/NEOS_SCI_2017213215701.fits' in cache_result, \
            'wrong content'
    finally:
        os.getcwd = getcwd_orig
コード例 #3
0
def test_capture_failure(test_config):
    start_s = datetime.utcnow().timestamp()
    test_obs_id = 'test_obs_id'
    test_obs_id_2 = 'test_obs_id_2'
    log_file_directory = os.path.join(tc.THIS_DIR, 'logs')
    test_config.log_to_file = True
    test_config.log_file_directory = log_file_directory
    success_log_file_name = 'success_log.txt'
    test_config.success_log_file_name = success_log_file_name
    failure_log_file_name = 'failure_log.txt'
    test_config.failure_log_file_name = failure_log_file_name
    retry_file_name = 'retries.txt'
    test_config.retry_file_name = retry_file_name
    rejected_file_name = 'rejected.yml'
    test_config.rejected_file_name = rejected_file_name

    # clean up from last execution
    if not os.path.exists(log_file_directory):
        os.mkdir(log_file_directory)
    if os.path.exists(test_config.success_fqn):
        os.remove(test_config.success_fqn)
    if os.path.exists(test_config.failure_fqn):
        os.remove(test_config.failure_fqn)
    if os.path.exists(test_config.retry_fqn):
        os.remove(test_config.retry_fqn)
    if os.path.exists(test_config.rejected_fqn):
        os.remove(test_config.rejected_fqn)

    test_oe = ec.OrganizeExecutesWithDoOne(test_config, 'command', [], [])
    test_sname = tc.TestStorageName(obs_id=test_obs_id_2)
    test_oe.capture_failure(test_sname, 'Cannot build an observation')
    test_sname = tc.TestStorageName(obs_id=test_obs_id)
    test_oe.capture_failure(test_sname, 'exception text')
    test_oe.capture_success(test_obs_id, 'C121212_01234_CAL.fits.gz', start_s)
    test_oe.finish_run(test_config)

    assert os.path.exists(test_config.success_fqn)
    assert os.path.exists(test_config.failure_fqn)
    assert os.path.exists(test_config.retry_fqn)
    assert os.path.exists(test_config.rejected_fqn)

    success_content = open(test_config.success_fqn).read()
    assert ('test_obs_id C121212_01234_CAL.fits.gz'
            in success_content), 'wrong content'
    retry_content = open(test_config.retry_fqn).read()
    assert retry_content == 'test_obs_id\n'
    failure_content = open(test_config.failure_fqn).read()
    assert failure_content.endswith(
        'Unknown error. Check specific log.\n'), failure_content
    assert os.path.exists(test_config.rejected_fqn), test_config.rejected_fqn
    rejected_content = mc.read_as_yaml(test_config.rejected_fqn)
    assert rejected_content is not None, 'expect a result'
    test_result = rejected_content.get('bad_metadata')
    assert test_result is not None, 'wrong result'
    assert len(test_result) == 1, 'wrong number of entries'
    assert test_result[0] == test_obs_id, 'wrong entry'
コード例 #4
0
def read_file_url_list_from_nrao(nrao_state_fqn):
    """
    :param nrao_state_fqn: str cache file name
    :return: result dict key is file_name, value is timestamp from NRAO site
        of file
        validate_dict key is file_name, value is NRAO URL of file
    """
    if os.path.exists(nrao_state_fqn):
        vlass_list = mc.read_as_yaml(nrao_state_fqn)
    else:
        start_date = scrape.make_date_time('01Jan1990 00:00')
        vlass_list = scrape.build_url_list(start_date)
        mc.write_as_yaml(vlass_list, nrao_state_fqn)
    result, validate_dict = get_file_url_list_max_versions(vlass_list)
    return result, validate_dict
コード例 #5
0
def read_list_from_nrao(nrao_state_fqn):
    if os.path.exists(nrao_state_fqn):
        vlass_list = mc.read_as_yaml(nrao_state_fqn)
    else:
        start_date = scrape.make_date_time('01Jan1990 00:00')
        vlass_list, vlass_date = scrape.build_file_url_list(start_date)
        mc.write_as_yaml(vlass_list, nrao_state_fqn)
    result = {}
    validate_dict = {}
    for key, value in vlass_list.items():
        for url in value:
            f_name = url.split('/')[-1]
            result[f_name] = key
            validate_dict[f_name] = url
    return result, validate_dict