def list_for_validate(config): """ :return: A dict, where keys are file names available from the CSA Open Data ftp site, and values are the timestamps for the files at the CSA site. available from the CSA Open Data ftp site, and values are the fully-qualified names at the CSA site, suitable for providing 'pull' task type content for a todo file. """ list_fqn = os.path.join(config.working_directory, NEOSSAT_SOURCE_LIST) if os.path.exists(list_fqn): logging.debug(f'Retrieve content from existing file {list_fqn}') temp = mc.read_as_yaml(list_fqn) # 0 - False indicates a file, True indicates a directory # 1 - timestamp cached = {key: [False, value] for key, value in temp.items()} else: # current will be empty if there's no cache cached = _read_cache(config.working_directory) ts_s = mc.make_seconds(NEOSSAT_START_DATE) temp, ignore_max_date = _append_source_listing(ts_s, config.working_directory, cached) mc.write_as_yaml(temp, list_fqn) # remove the fully-qualified path names from the validator list # while creating a dictionary where the file name is the key, and the # fully-qualified file name at the FTP site is the value validator_list = {ii.split('/')[-1]: ii for ii in temp} result = {ii.split('/')[-1]: temp[ii] for ii in temp} return result, validator_list
def _execute_and_check_list_for_validate(ftp_mock, source_list_fqn, result_count, cache_count): source_dir_fqn = os.path.join( test_main_app.TEST_DATA_DIR, scrape.NEOSSAT_DIR_LIST) source_fqn = os.path.join(test_main_app.TEST_DATA_DIR, 'test_source_dir_listing.csv') shutil.copy(source_fqn, source_dir_fqn) ftp_mock.return_value.__enter__.return_value.listdir. \ side_effect = _list_dirs ftp_mock.return_value.__enter__.return_value.stat. \ side_effect = _entry_stats getcwd_orig = os.getcwd os.getcwd = Mock(return_value=test_main_app.TEST_DATA_DIR) try: test_config = mc.Config() test_config.get_executors() scrape.list_for_validate(test_config) result = mc.read_as_yaml(source_list_fqn) assert result is not None, 'expect a file record' assert len(result) == result_count, 'wrong number of entries' assert f'{MOCK_DIR}/NEOS_SCI_2017213215701_cord.fits' in result, \ 'wrong content' cache_result = scrape._read_cache(test_config.working_directory) assert cache_result is not None, 'expected return value' assert len(cache_result) == cache_count, \ 'wrong number of cached entries' assert f'{MOCK_DIR}/NEOS_SCI_2017213215701.fits' in cache_result, \ 'wrong content' finally: os.getcwd = getcwd_orig
def test_capture_failure(test_config): start_s = datetime.utcnow().timestamp() test_obs_id = 'test_obs_id' test_obs_id_2 = 'test_obs_id_2' log_file_directory = os.path.join(tc.THIS_DIR, 'logs') test_config.log_to_file = True test_config.log_file_directory = log_file_directory success_log_file_name = 'success_log.txt' test_config.success_log_file_name = success_log_file_name failure_log_file_name = 'failure_log.txt' test_config.failure_log_file_name = failure_log_file_name retry_file_name = 'retries.txt' test_config.retry_file_name = retry_file_name rejected_file_name = 'rejected.yml' test_config.rejected_file_name = rejected_file_name # clean up from last execution if not os.path.exists(log_file_directory): os.mkdir(log_file_directory) if os.path.exists(test_config.success_fqn): os.remove(test_config.success_fqn) if os.path.exists(test_config.failure_fqn): os.remove(test_config.failure_fqn) if os.path.exists(test_config.retry_fqn): os.remove(test_config.retry_fqn) if os.path.exists(test_config.rejected_fqn): os.remove(test_config.rejected_fqn) test_oe = ec.OrganizeExecutesWithDoOne(test_config, 'command', [], []) test_sname = tc.TestStorageName(obs_id=test_obs_id_2) test_oe.capture_failure(test_sname, 'Cannot build an observation') test_sname = tc.TestStorageName(obs_id=test_obs_id) test_oe.capture_failure(test_sname, 'exception text') test_oe.capture_success(test_obs_id, 'C121212_01234_CAL.fits.gz', start_s) test_oe.finish_run(test_config) assert os.path.exists(test_config.success_fqn) assert os.path.exists(test_config.failure_fqn) assert os.path.exists(test_config.retry_fqn) assert os.path.exists(test_config.rejected_fqn) success_content = open(test_config.success_fqn).read() assert ('test_obs_id C121212_01234_CAL.fits.gz' in success_content), 'wrong content' retry_content = open(test_config.retry_fqn).read() assert retry_content == 'test_obs_id\n' failure_content = open(test_config.failure_fqn).read() assert failure_content.endswith( 'Unknown error. Check specific log.\n'), failure_content assert os.path.exists(test_config.rejected_fqn), test_config.rejected_fqn rejected_content = mc.read_as_yaml(test_config.rejected_fqn) assert rejected_content is not None, 'expect a result' test_result = rejected_content.get('bad_metadata') assert test_result is not None, 'wrong result' assert len(test_result) == 1, 'wrong number of entries' assert test_result[0] == test_obs_id, 'wrong entry'
def read_file_url_list_from_nrao(nrao_state_fqn): """ :param nrao_state_fqn: str cache file name :return: result dict key is file_name, value is timestamp from NRAO site of file validate_dict key is file_name, value is NRAO URL of file """ if os.path.exists(nrao_state_fqn): vlass_list = mc.read_as_yaml(nrao_state_fqn) else: start_date = scrape.make_date_time('01Jan1990 00:00') vlass_list = scrape.build_url_list(start_date) mc.write_as_yaml(vlass_list, nrao_state_fqn) result, validate_dict = get_file_url_list_max_versions(vlass_list) return result, validate_dict
def read_list_from_nrao(nrao_state_fqn): if os.path.exists(nrao_state_fqn): vlass_list = mc.read_as_yaml(nrao_state_fqn) else: start_date = scrape.make_date_time('01Jan1990 00:00') vlass_list, vlass_date = scrape.build_file_url_list(start_date) mc.write_as_yaml(vlass_list, nrao_state_fqn) result = {} validate_dict = {} for key, value in vlass_list.items(): for url in value: f_name = url.split('/')[-1] result[f_name] = key validate_dict[f_name] = url return result, validate_dict