Esempio n. 1
0
    def get_time_box_work(self, prev_exec_time, exec_time):
        """
        Get a set of file names from an archive. Limit the entries by
        time-boxing on ingestDate, and don't include previews.

        :param prev_exec_time timestamp start of the time-boxed chunk
        :param exec_time timestamp end of the time-boxed chunk
        :return: a list of StateRunnerMeta instances in the CADC storage
            system
        """
        # container timezone is UTC, ad timezone is Pacific
        db_fmt = '%Y-%m-%d %H:%M:%S.%f'
        prev_exec_time_pz = datetime.strftime(
            datetime.utcfromtimestamp(prev_exec_time).astimezone(
                tz.gettz('US/Pacific')), db_fmt)
        exec_time_pz = datetime.strftime(
            datetime.utcfromtimestamp(exec_time).astimezone(
                tz.gettz('US/Pacific')), db_fmt)
        self._logger.debug(f'Begin get_work.')
        query = f"SELECT fileName, ingestDate FROM archive_files WHERE " \
                f"archiveName = '{self._config.archive}' " \
                f"AND fileName NOT LIKE '%{self._preview_suffix}' " \
                f"AND ingestDate > '{prev_exec_time_pz}' " \
                f"AND ingestDate <= '{exec_time_pz}' " \
                "ORDER BY ingestDate ASC "
        self._logger.debug(query)
        rows = mc.query_tap_client(query, self._client)
        result = deque()
        for row in rows:
            result.append(StateRunnerMeta(row['fileName'], row['ingestDate']))
        return result
Esempio n. 2
0
    def get_time_box_work(self, prev_exec_time, exec_time):
        """
        :param prev_exec_time datetime start of the timestamp chunk
        :param exec_time datetime end of the timestamp chunk
        :return: a list of file names with time they were modified in /ams,
            structured as an astropy Table (for now).
        """

        self._logger.debug('Entering get_time_box_work')
        # datetime format 2019-12-01T00:00:00.000000
        prev_dt_str = datetime.fromtimestamp(
            prev_exec_time, tz=timezone.utc).strftime(mc.ISO_8601_FORMAT)
        exec_dt_str = datetime.fromtimestamp(
            exec_time, tz=timezone.utc).strftime(mc.ISO_8601_FORMAT)
        query = f"SELECT A.uri, A.lastModified " \
                f"FROM caom2.Observation AS O " \
                f"JOIN caom2.Plane AS P ON O.obsID = P.obsID " \
                f"JOIN caom2.Artifact AS A ON P.planeID = A.planeID " \
                f"WHERE P.planeID IN ( " \
                f"  SELECT A.planeID " \
                f"  FROM caom2.Observation AS O " \
                f"  JOIN caom2.Plane AS P ON O.obsID = P.obsID " \
                f"  JOIN caom2.Artifact AS A ON P.planeID = A.planeID " \
                f"  WHERE O.collection = '{self._config.collection}' " \
                f"  GROUP BY A.planeID " \
                f"  HAVING COUNT(A.artifactID) = 1 ) " \
                f"AND P.dataRelease > '{prev_dt_str}' " \
                f"AND P.dataRelease <= '{exec_dt_str}' " \
                f"ORDER BY O.maxLastModified ASC " \
                ""
        result = mc.query_tap_client(query, self._client)
        # results look like:
        # gemini:GEM/N20191202S0125.fits, ISO 8601

        entries = deque()
        for row in result:
            entries.append(
                dsc.StateRunnerMeta(
                    mc.CaomName(row['uri']).file_name,
                    mc.make_time(row['lastModified']).timestamp()))
        return entries
def get_obs_id_from_cadc(file_id, tap_client, collection='GEMINI', 
                         update_cache=None):
    logging.debug(f'Begin get_obs_id_from_cadc for {file_id}')
    file_name = gem_name.GemName.get_file_name_from(file_id)
    query_string = f"""
    SELECT O.observationID, A.lastModified
    FROM caom2.Observation AS O
    JOIN caom2.Plane AS P on P.obsID = O.obsID
    JOIN caom2.Artifact AS A on A.planeID = P.planeID
    WHERE A.uri LIKE '%{file_name}' 
    AND O.collection = '{collection}'
    """
    table = mc.query_tap_client(query_string, tap_client)
    result = None
    if len(table) == 1:
        result = table[0]['observationID']
        ut_datetime_str = table[0]['lastModified']
        if update_cache is not None:
            update_cache(file_id, result, ut_datetime_str)
    logging.debug(f'End get_obs_id_from_cadc {result}')
    return result
Esempio n. 4
0
def get_obs_id_from_cadc(artifact_uri, tap_client):
    """
    Query CAOM using TAP for the observation ID, given a file ID.

    :param artifact_uri: URI a Artifact URI that may or may not
        exist in CAOM
    :param tap_client: CadcTapClient - used to execute the query
    :return: string representing an observation ID, or None, if no
        entry is found.
    """
    logging.debug(f'Begin get_obs_id_from_cadc for {artifact_uri}')
    query_string = f"""
    SELECT DISTINCT O.observationID 
    FROM caom2.Observation AS O
    JOIN caom2.Plane AS P on P.obsID = O.obsID
    JOIN caom2.Artifact AS A on A.planeID = P.planeID
    WHERE A.uri = '{artifact_uri}'
    """
    table = mc.query_tap_client(query_string, tap_client)
    result = None
    if len(table) >= 1:
        result = table[0]['observationID']
    logging.debug(f'End get_obs_id_from_cadc {result}')
    return result
Esempio n. 5
0
def build_temporal_wcs_bounds(tap_client, plane, collection):
    """Assemble a bounds/sample time definition, based on the inputs
    identified in the header.

    :param tap_client CadcTapClient for querying existing CAOM records for
        time metadata
    :param plane CAOM Plane instance, contains input metadata in the
        provenance.inputs list, and chunks will be updated with the resulting
        temporal WCS information
    :param collection str to scope the query
    """
    logging.debug(f'Begin build_temporal_wcs_bounds.')
    product_ids = []
    for input in plane.provenance.inputs:
        product_ids.append(input.get_product_id())
    logging.info(f'Finding temporal inputs for {len(product_ids)} inputs.')

    inputs = []
    # this query makes the assumption that there's a remarkable resemblance
    # between product_id values, and file names, minus all the extensions
    for product_id in product_ids:
        query_string = f"""
        SELECT C.time_axis_function_refCoord_val AS val,
               C.time_axis_function_delta AS delta,
               C.time_axis_axis_cunit AS cunit,
               C.time_axis_function_naxis AS naxis
        FROM caom2.Observation AS O
        JOIN caom2.Plane AS P on P.obsID = O.obsID
        JOIN caom2.Artifact AS A on P.planeID = A.planeID
        JOIN caom2.Part AS PT on A.artifactID = PT.artifactID
        JOIN caom2.Chunk AS C on PT.partID = C.partID
        WHERE A.uri like '%{product_id}%'
        AND O.collection = '{collection}'
        """

        table_result = mc.query_tap_client(query_string, tap_client)
        if len(table_result) > 0:
            for row in table_result:
                if row['cunit'] == 'd' and row['naxis'] == 1:
                    inputs.append([row['val'], row['delta']])
                else:
                    logging.warning(f'Could not make use of values for '
                                    f'{product_id}.NAXISi is {row["naxis"]} '
                                    f'and CUNITi is {row["cunit"]}')
        else:
            logging.warning(f'No CAOM record for {product_id} found at CADC.')
    logging.info(f'Building temporal bounds for {len(inputs)} inputs.')

    temporal_wcs = None
    for ip in inputs:
        temporal_wcs = build_temporal_wcs_append_sample(temporal_wcs,
                                                        lower=ip[0],
                                                        upper=(ip[0] + ip[1]))

    for artifact in plane.artifacts.values():
        for part in artifact.parts.values():
            if part.product_type == ProductType.SCIENCE:
                for chunk in part.chunks:
                    logging.debug(f'Adding TemporalWCS to chunks in artifact '
                                  f'{artifact.uri}, part {part.name}.')
                    chunk.time = temporal_wcs
    logging.debug(f'End build_temporal_wcs_bounds.')