def test_make_seconds(): t1 = '2017-06-26T17:07:21.527+00' t1_dt = mc.make_seconds(t1) assert t1_dt is not None, 'expect a result' assert t1_dt == 1498496841.527, 'wrong result' t2 = '2017-07-26T17:07:21.527' t2_dt = mc.make_seconds(t2) assert t2_dt is not None, 'expect a result' assert t2_dt == 1501088841.527, 'wrong result' t3 = '16-Jul-2019 09:08' t3_dt = mc.make_seconds(t3) assert t3_dt is not None, 'expect a result' assert t3_dt == 1563268080.0, 'wrong result'
def list_for_validate(config): """ :return: A dict, where keys are file names available from the CSA Open Data ftp site, and values are the timestamps for the files at the CSA site. available from the CSA Open Data ftp site, and values are the fully-qualified names at the CSA site, suitable for providing 'pull' task type content for a todo file. """ list_fqn = os.path.join(config.working_directory, NEOSSAT_SOURCE_LIST) if os.path.exists(list_fqn): logging.debug(f'Retrieve content from existing file {list_fqn}') temp = mc.read_as_yaml(list_fqn) # 0 - False indicates a file, True indicates a directory # 1 - timestamp cached = {key: [False, value] for key, value in temp.items()} else: # current will be empty if there's no cache cached = _read_cache(config.working_directory) ts_s = mc.make_seconds(NEOSSAT_START_DATE) temp, ignore_max_date = _append_source_listing(ts_s, config.working_directory, cached) mc.write_as_yaml(temp, list_fqn) # remove the fully-qualified path names from the validator list # while creating a dictionary where the file name is the key, and the # fully-qualified file name at the FTP site is the value validator_list = {ii.split('/')[-1]: ii for ii in temp} result = {ii.split('/')[-1]: temp[ii] for ii in temp} return result, validator_list
def _write_state(prior_timestamp=None, end_timestamp=None): # to ensure at least one spin through the execution loop, test case # must have a starting time greater than one config.interval prior # to 'now', default interval is 10 minutes if prior_timestamp is None: prior_s = datetime.utcnow().timestamp() - 15 * 60 else: if type(prior_timestamp) is float: prior_s = prior_timestamp else: prior_s = make_seconds(prior_timestamp) test_start_time = datetime.fromtimestamp(prior_s).isoformat() logging.error(f'test_start_time {test_start_time}') if end_timestamp is None: test_bookmark = { 'bookmarks': { 'gemini_timestamp': { 'last_record': test_start_time, }, }, } else: assert isinstance(end_timestamp, datetime), 'end_timestamp wrong type' test_bookmark = { 'bookmarks': { 'gemini_timestamp': { 'last_record': test_start_time, 'end_timestamp': end_timestamp, }, }, } write_as_yaml(test_bookmark, STATE_FILE)
def _initialize_content(self, fqn): """Initialize the internal data structures that represents the query list from the Gemini Science Archive. """ result = self._read_file(fqn) # result row structure: # 0 = data label # 1 = timestamp # 2 = file name temp_content = {} logging.info('Progress - file read ....') for ii in result: # re-organize to be able to answer list_observations queries ol_key = mc.make_seconds(ii[1]) if ol_key in temp_content: if ii[0] not in temp_content[ol_key]: temp_content[ol_key].append(ii[0]) else: temp_content[ol_key] = [ii[0]] # re-organize to be able to answer get_observation queries self.id_list[ii[0]].append(ii[2]) file_id = gem_name.GemName.remove_extensions(ii[2]) self.name_list[file_id].append([ii[0], ol_key]) # this structure means an observation ID occurs more than once with # different last modified times self.time_list = collections.OrderedDict(sorted(temp_content.items(), key=lambda t: t[0])) self.logger.info('Observation list initialized in memory.')
def get_provenance(almaca_name): # HK 14-08-19 # provenance: version - capture the information on what version of # CASA was used to run the calibration script. We might appreciate # having that information saved later on (as might an advanced user). # This would be possible to capture from the 'casa[date].log' file # generated automatically during processing - the second line # includes 'CASA version XXX'. version_result = None last_result = None log_dir = almaca_name.log_dir logging.error('checking {}'.format(log_dir)) if os.path.exists(log_dir): # logging.error('exists {}'.format(log_dir)) log_dir_contents = os.listdir(log_dir) for ii in log_dir_contents: if ii.startswith('casa-') and ii.endswith('.log'): log_fqn = '{}/{}'.format(log_dir, ii) if os.path.exists(log_fqn): with open(log_fqn, 'r') as f: temp = f.readlines() for jj in temp: if 'CASA Version' in jj: version_result = jj.split('CASA Version ')[1] # get the timestamp from the filename, use it as the # 'last_executed' temp = ii.replace('casa-', '').replace('.log', '') last_result = datetime.fromtimestamp(mc.make_seconds(temp)) # TODO time.Time(override.get('casa_run_date')).datetime # The rest of the MAG seemed less concerned about the various OUS IDs being # searchable within the archive. I think it would still be best to include # the information somewhere just in case. My guess is that the ASDM UID is # the most important one to be searchable, and that it would also be quite # appropriate to be listed as the 'reference' under 'provenance'. (It might # even eventually be linked directly to the associated raw data file.) The # rest of the science/group/member OUS IDs could perhaps be listed within # the keywords section like this: # # ScienceGoalOUSID: [ugly string]; GroupOUSID: [ugly string#2]; # MemberOUSID: [ugly string#3] (or whatever formatting will work within # the keyword field). provenance = Provenance(name='CASA', version=version_result, last_executed=last_result, reference='https://casa.nrao.edu/') provenance.keywords.add( f'ScienceGoalOUSID: {almaca_name._science_goal_id}') provenance.keywords.add(f'GroupOUSID: {almaca_name._group_id}') provenance.keywords.add(f'MemberOUSID: {almaca_name._mous_id}') provenance.keywords.add(f'ASDM ID: {almaca_name._asdm_id}') return provenance
def _update_cache(self, file_id, obs_id, dt_str): dt_s = mc.make_seconds(dt_str) # name_list: # key is file_id, # value is array # # array contents are: # 0 - data label / observation ID # 1 - timestamp self.name_list[file_id].append([obs_id, dt_s]) repaired_obs_id = repair_data_label(file_id, obs_id) return repaired_obs_id
def mock_write_state2(prior_timestamp=None): # to ensure at least one spin through the execution loop, test case # must have a starting time greater than one config.interval prior # to 'now', default interval is 10 minutes if prior_timestamp is None: prior_s = datetime.utcnow().timestamp() - 15 * 60 else: prior_s = mc.make_seconds(prior_timestamp) test_start_time = datetime.fromtimestamp(prior_s) test_bookmark = { 'bookmarks': { data_source.GEM_BOOKMARK: { 'last_record': test_start_time, }, }, } mc.write_as_yaml(test_bookmark, STATE_FILE)