def list_for_validate(config): """ :return: A dict, where keys are file names available from the CSA Open Data ftp site, and values are the timestamps for the files at the CSA site. available from the CSA Open Data ftp site, and values are the fully-qualified names at the CSA site, suitable for providing 'pull' task type content for a todo file. """ list_fqn = os.path.join(config.working_directory, NEOSSAT_SOURCE_LIST) if os.path.exists(list_fqn): logging.debug(f'Retrieve content from existing file {list_fqn}') temp = mc.read_as_yaml(list_fqn) # 0 - False indicates a file, True indicates a directory # 1 - timestamp cached = {key: [False, value] for key, value in temp.items()} else: # current will be empty if there's no cache cached = _read_cache(config.working_directory) ts_s = mc.make_seconds(NEOSSAT_START_DATE) temp, ignore_max_date = _append_source_listing(ts_s, config.working_directory, cached) mc.write_as_yaml(temp, list_fqn) # remove the fully-qualified path names from the validator list # while creating a dictionary where the file name is the key, and the # fully-qualified file name at the FTP site is the value validator_list = {ii.split('/')[-1]: ii for ii in temp} result = {ii.split('/')[-1]: temp[ii] for ii in temp} return result, validator_list
def _write_state(prior_timestamp=None, end_timestamp=None): # to ensure at least one spin through the execution loop, test case # must have a starting time greater than one config.interval prior # to 'now', default interval is 10 minutes if prior_timestamp is None: prior_s = datetime.utcnow().timestamp() - 15 * 60 else: if type(prior_timestamp) is float: prior_s = prior_timestamp else: prior_s = make_seconds(prior_timestamp) test_start_time = datetime.fromtimestamp(prior_s).isoformat() logging.error(f'test_start_time {test_start_time}') if end_timestamp is None: test_bookmark = { 'bookmarks': { 'gemini_timestamp': { 'last_record': test_start_time, }, }, } else: assert isinstance(end_timestamp, datetime), 'end_timestamp wrong type' test_bookmark = { 'bookmarks': { 'gemini_timestamp': { 'last_record': test_start_time, 'end_timestamp': end_timestamp, }, }, } write_as_yaml(test_bookmark, STATE_FILE)
def mock_write_state(start_time): test_bookmark = { 'bookmarks': { data_source.GEM_BOOKMARK: { 'last_record': start_time, }, }, } mc.write_as_yaml(test_bookmark, STATE_FILE)
def mock_write_state(start_time): test_bookmark = { 'bookmarks': { composable.GEM_BOOKMARK: { 'last_record': start_time } } } mc.write_as_yaml(test_bookmark, STATE_FILE)
def _write_state(start_time): if os.path.exists(STATE_FILE): os.unlink(STATE_FILE) test_bookmark = { 'bookmarks': { TEST_BOOKMARK: { 'last_record': start_time, }, }, } mc.write_as_yaml(test_bookmark, STATE_FILE)
def _write_state(start_time_str): test_time = datetime.strptime(start_time_str, mc.ISO_8601_FORMAT) test_bookmark = { 'bookmarks': { NEOS_BOOKMARK: { 'last_record': test_time } }, 'context': { scrape.NEOS_CONTEXT: ['NEOSS', '2017', '2018', '2019'] } } mc.write_as_yaml(test_bookmark, STATE_FILE)
def read_file_url_list_from_nrao(nrao_state_fqn): """ :param nrao_state_fqn: str cache file name :return: result dict key is file_name, value is timestamp from NRAO site of file validate_dict key is file_name, value is NRAO URL of file """ if os.path.exists(nrao_state_fqn): vlass_list = mc.read_as_yaml(nrao_state_fqn) else: start_date = scrape.make_date_time('01Jan1990 00:00') vlass_list = scrape.build_url_list(start_date) mc.write_as_yaml(vlass_list, nrao_state_fqn) result, validate_dict = get_file_url_list_max_versions(vlass_list) return result, validate_dict
def read_list_from_nrao(nrao_state_fqn): if os.path.exists(nrao_state_fqn): vlass_list = mc.read_as_yaml(nrao_state_fqn) else: start_date = scrape.make_date_time('01Jan1990 00:00') vlass_list, vlass_date = scrape.build_file_url_list(start_date) mc.write_as_yaml(vlass_list, nrao_state_fqn) result = {} validate_dict = {} for key, value in vlass_list.items(): for url in value: f_name = url.split('/')[-1] result[f_name] = key validate_dict[f_name] = url return result, validate_dict
def mock_write_state2(prior_timestamp=None): # to ensure at least one spin through the execution loop, test case # must have a starting time greater than one config.interval prior # to 'now', default interval is 10 minutes if prior_timestamp is None: prior_s = datetime.utcnow().timestamp() - 15 * 60 else: prior_s = mc.make_seconds(prior_timestamp) test_start_time = datetime.fromtimestamp(prior_s) test_bookmark = { 'bookmarks': { data_source.GEM_BOOKMARK: { 'last_record': test_start_time, }, }, } mc.write_as_yaml(test_bookmark, STATE_FILE)
def _write_state(start_time_str): test_time = scrape.make_date_time(start_time_str) test_bookmark = { 'bookmarks': { 'vlass_timestamp': { 'last_record': test_time } }, 'context': { 'vlass_context': { 'VLASS1.1': '01-Jan-2018 00:00', 'VLASS1.2': '01-Nov-2018 00:00', 'VLASS2.1': '01-Jul-2020 00:00' } } } mc.write_as_yaml(test_bookmark, STATE_FILE)
def _write_rejected(test_obs_id): content = {'bad_metadata': [test_obs_id]} write_as_yaml(content, REJECTED_FILE)
def test_run_by_incremental_reproduce( access_mock, query_mock, header_mock, data_client_mock, meta_client_mock, pi_mock, svo_mock, http_get_mock, reader_mock, ): # https://archive.gemini.edu/jsonsummary/canonical/NotFail/notengineering/ # entrytimedaterange= # 2022-03-14T17:30:05.000006%202022-03-14T17:31:05.000006/ # ?orderby=entrytime # get results query_mock.side_effect = gem_mocks.mock_query_endpoint_reproduce access_mock.return_value = 'https://localhost:2022' test_header = Header() test_header['INSTRUME'] = 'GMOS-S' header_mock.return_value = [test_header] data_client_mock.get_head.return_value = [test_header] meta_client_mock.read.return_value = None pi_mock.return_value = None svo_mock.return_value = None def _repo_create_mock(observation): plane_count = 0 artifact_count = 0 for plane in observation.planes.values(): plane_count += 1 for _ in plane.artifacts.values(): artifact_count += 1 assert plane_count == 1, 'wrong plane count' assert artifact_count == 1, 'wrong artifact count' meta_client_mock.create = _repo_create_mock getcwd_orig = os.getcwd cwd = os.getcwd() with TemporaryDirectory() as tmp_dir_name: os.chdir(tmp_dir_name) test_config = Config() test_config.working_directory = tmp_dir_name test_config.logging_level = 'INFO' test_config.proxy_file_name = 'cadcproxy.pem' test_config.proxy_fqn = f'{tmp_dir_name}/cadcproxy.pem' test_config.state_file_name = 'state.yml' test_config.task_types = [TaskType.VISIT] test_config.features.supports_latest_client = True test_config.interval = 70 Config.write_to_file(test_config) with open(test_config.proxy_fqn, 'w') as f: f.write('test content') test_bookmark = { 'bookmarks': { GEM_BOOKMARK: { 'last_record': datetime.now() - timedelta(hours=1), }, }, } write_as_yaml(test_bookmark, test_config.state_fqn) os.getcwd = Mock(return_value=tmp_dir_name) try: # execution composable._run_state() assert meta_client_mock.read.called, 'should have been called' assert ( meta_client_mock.read.call_count == 2 ), f'wrong call count {meta_client_mock.read.call_count}' meta_client_mock.read.assert_called_with( 'GEMINI', 'GN-CAL20220314-18-090' ), 'wrong run args' reader_mock.called, 'reset called' reader_mock.call_count == 1, 'reset call count' finally: os.getcwd = getcwd_orig os.chdir(cwd)