def visit(observation, **kwargs): """ Clean up the issue described here (multiple planes for the same photons): https://github.com/opencadc-metadata-curation/omm2caom2/issues/3 """ mc.check_param(observation, Observation) logging.info(f'Begin cleanup augmentation for ' f'{observation.observation_id}') cadc_client = kwargs.get('cadc_client') count = 0 if cadc_client is None: logging.warning( 'Stopping. Need a CADC Client for cleanup augmentation.') else: if len(observation.planes) > 1: # from Daniel, Sylvie - 21-05-20 # How to figure out which plane is newer: # SB - I do not think that we should use the “VERSION” keyword. # I think we must go with the ingested date. # # Daniel Durand # Might be better indeed. Need to compare the SCI and the REJECT # file and see which one is the latest latest_plane_id = None latest_timestamp = None temp = [] for plane in observation.planes.values(): for artifact in plane.artifacts.values(): if OmmName.is_preview(artifact.uri): continue meta = cadc_client.info(artifact.uri) if meta is None: logging.warning( f'Did not find {artifact.uri} in CADC storage.') else: if latest_plane_id is None: latest_plane_id = plane.product_id latest_timestamp = mc.make_time(meta.lastmod) else: current_timestamp = mc.make_time(meta.lastmod) if current_timestamp > latest_timestamp: latest_timestamp = current_timestamp temp.append(latest_plane_id) latest_plane_id = plane.product_id else: temp.append(plane.product_id) delete_list = list(set(temp)) for entry in delete_list: logging.warning(f'Removing plane {entry} from observation ' f'{observation.observation_id}. There are ' f'duplicate photons.') count += 1 observation.planes.pop(entry) _send_slack_message(entry) logging.info(f'Completed cleanup augmentation for ' f'{observation.observation_id}') return observation
def _update_release_date(plane, max_meta_release, headers): logging.debug(f'Begin _update_release_date for {plane.product_id}') if plane.meta_release is None: plane.meta_release = mc.make_time(_get_keyword(headers, 'DATE')) if plane.meta_release is None: plane.meta_release = mc.make_time( _get_keyword(headers, 'REL_DATE')) if plane.meta_release is not None: max_meta_release = max(max_meta_release, plane.meta_release) if plane.data_release is None and plane.meta_release is not None: plane.data_release = plane.meta_release logging.debug('End _update_release_date') return max_meta_release
def test_make_time(): test_dict = { '2012-12-12T12:13:15': datetime(2012, 12, 12, 12, 13, 15), # %b %d %H:%M 'Mar 12 12:12': datetime(2021, 3, 12, 12, 12), # %Y-%m-%dHST%H:%M:%S '2020-12-12HST12:12:12': datetime(2020, 12, 12, 22, 12, 12), } for key, value in test_dict.items(): test_result = mc.make_time(key) assert test_result is not None, 'expect a result' assert isinstance(test_result, datetime), 'wrong result type' assert test_result == value, f'wrong result {test_result} want {value}'
def get_time_box_work(self, prev_exec_time, exec_time): """ :param prev_exec_time datetime start of the timestamp chunk :param exec_time datetime end of the timestamp chunk :return: a list of file names with time they were modified in /ams, structured as an astropy Table (for now). """ self._logger.debug('Entering get_time_box_work') # datetime format 2019-12-01T00:00:00.000000 prev_dt_str = datetime.fromtimestamp( prev_exec_time, tz=timezone.utc ).strftime(mc.ISO_8601_FORMAT) exec_dt_str = datetime.fromtimestamp( exec_time, tz=timezone.utc ).strftime(mc.ISO_8601_FORMAT) query = ( f"SELECT A.uri, A.lastModified " f"FROM caom2.Observation AS O " f"JOIN caom2.Plane AS P ON O.obsID = P.obsID " f"JOIN caom2.Artifact AS A ON P.planeID = A.planeID " f"WHERE P.planeID IN ( " f" SELECT A.planeID " f" FROM caom2.Observation AS O " f" JOIN caom2.Plane AS P ON O.obsID = P.obsID " f" JOIN caom2.Artifact AS A ON P.planeID = A.planeID " f" WHERE O.collection = '{self._config.collection}' " f" GROUP BY A.planeID " f" HAVING COUNT(A.artifactID) = 1 ) " f"AND P.dataRelease > '{prev_dt_str}' " f"AND P.dataRelease <= '{exec_dt_str}' " f"ORDER BY O.maxLastModified ASC " "" ) result = clc.query_tap_client(query, self._query_client) # results look like: # gemini:GEM/N20191202S0125.fits, ISO 8601 entries = deque() for row in result: entries.append( dsc.StateRunnerMeta( mc.CaomName(row['uri']).file_name, mc.make_time(row['lastModified']).timestamp(), ) ) return entries
def get_data_release_date(self, ext): """Use the 'DATE' keyword for the release date, if the 'RELEASE' keyword does not exist. Called to fill a blueprint value, must have a parameter named ext for import_module loading and execution.""" rel_date = self._headers[ext].get('RELEASE') if rel_date is None: rel_date = self._headers[ext].get('DATE') intent = self.get_obs_intent(ext) if ( rel_date is not None and intent is ObservationIntentType.SCIENCE ): # DD, SB - slack - 19-03-20 - if release is not in the header # observation date plus two years. This only applies to # science observations. temp = mc.make_time(rel_date) rel_date = temp.replace(year=temp.year + 2) return rel_date
def build_observation(db_content, observation, md_name): override = read_md_pk(md_name) fqn = override.get('fqn') almaca_name = AlmacaName(fname_on_disk=fqn) # logging.error(db_content.colnames) # logging.error('fqn is {}'.format(fqn)) field_index = _get_index(almaca_name, db_content) # field_index = 0 if observation is None: observation = _build_obs(override, db_content, fqn, field_index, almaca_name, md_name) provenance = get_provenance(almaca_name) provenance.inputs.add( PlaneURI('caom:ALMA/A001_X88b_X23/A001_X88b_X23-raw')) # HK 07-02-20 # I'm looking at the very first entry, A002_Xb999fd_X602.SCI.J1851+0035. # The time bounds listed under all of the second-level planes correspond # to a date of Oct 20, 2016, which agrees with the observing date I pull # up on listobs. But in the top level plane, the metaRelease date is # listed as Oct 12, 2016. As we discussed earlier this week, it doesn't # make sense to have the meta data released before the observation was # even taken. Using the 'end time' of the observation that's already # pulled for a lower plane, and putting that as the metaRelease date in # the top level would be a good solution. NB: since all spws are observed # simultaneously, you'll get the same answer for whichever of the # [high/low]res_spw[X] entries that you pull the information from. input_meta_data = read_md_pk(almaca_name.input_ms_metadata) meta_release = mc.to_float(input_meta_data.get('end_date')) meta_release = time.Time(meta_release, format='mjd') meta_release.format = 'isot' meta_release_dt = mc.make_time(meta_release.value) release_date = db_content['Release date'][field_index] if release_date is None: raise mc.CadcException('No release date for {}'.format(fqn)) else: release_date = time.Time(release_date).to_datetime() logging.error('Add plane {} to {}'.format(almaca_name.product_id, almaca_name.obs_id)) plane = Plane(product_id=almaca_name.product_id, data_release=release_date, meta_release=meta_release_dt, provenance=provenance) plane.position = build_position(db_content, field_index, md_name) plane.energy = build_energy(override) plane.polarization = None plane.time = build_time(override, almaca_name) # HK 14-08-2019 # dataProductType should be 'visibility' plane.data_product_type = DataProductType.VISIBILITY plane.calibration_level = CalibrationLevel.CALIBRATED observation.planes.add(plane) observation.meta_release = plane.meta_release # TODO hard-coded observation.members.add(ObservationURI('caom:ALMA/A001_X88b_X23')) # HK 29-07-19 # qa/ contains images, plots, and web page status views generated # during the original (non-CANFAR) calibration of the raw data. # We may want to consider retaining these files as well, as they give # a more advanced user an easier way to check on data quality, # potential issues with calibration, etc. I believe they come # packaged with the rest of the 'products' tarball on the archive, # so they would be obtainable even if we do not keep a copy. These # files are fairly small. # TODO override.get('artifact_uri') artifact = Artifact(uri=almaca_name.uri, product_type=almaca_name.intent, release_type=ReleaseType.DATA, content_type='application/x-tar', content_length=None) plane.artifacts.add(artifact) return observation