Example #1
0
 def get_time_box_work(self, prev_exec_time, exec_time):
     result = []
     file_list = glob.glob('/caom2pipe_test/*')
     for entry in file_list:
         stats = os.stat(entry)
         if prev_exec_time <= stats.st_mtime <= exec_time:
             result.append(dsc.StateRunnerMeta(entry, stats.st_mtime))
     return result
Example #2
0
    def get_time_box_work(self, prev_exec_time, exec_time):
        """
        :param prev_exec_time float timestamp start of the time-boxed chunk
        :param exec_time float timestamp end of the time-boxed chunk
        :return: a deque of file names with time their associated JSON (DB)
            records were modified from archive.gemini.edu.
        """

        self._logger.debug(f'Begin get_time_box_work from {prev_exec_time} to '
                           f'{exec_time}.')
        # datetime format 2019-12-01T00:00:00.000000
        prev_dt_str = mc.make_time_tz(prev_exec_time).strftime(
            mc.ISO_8601_FORMAT)
        exec_dt_str = mc.make_time_tz(exec_time).strftime(mc.ISO_8601_FORMAT)
        url = f'https://archive.gemini.edu/jsonsummary/canonical/' \
              f'NotFail/notengineering/' \
              f'entrytimedaterange={prev_dt_str}%20{exec_dt_str}/' \
              f'?orderby=entrytime'

        # needs to be ordered by timestamps when processed
        self._logger.info(f'Querying {url}')
        entries = deque()
        response = None
        try:
            response = mc.query_endpoint(url)
            if response is None:
                logging.warning(f'Could not query {url}.')
            else:
                metadata = response.json()
                response.close()
                if metadata is not None:
                    if len(metadata) == 0:
                        self._logger.warning(f'No query results returned for '
                                             f'interval from {prev_exec_time} '
                                             f'to {exec_time}.')
                    else:
                        for entry in metadata:
                            file_name = entry.get('name')
                            entrytime = mc.make_time_tz(entry.get('entrytime'))
                            entries.append(
                                dsc.StateRunnerMeta(file_name,
                                                    entrytime.timestamp()))
        finally:
            if response is not None:
                response.close()
        if len(entries) == 10000:
            self._max_records_encountered = True
            self._encounter_start = prev_exec_time
            self._encounter_end = exec_time
        self._logger.debug('End get_time_box_work.')
        return entries
Example #3
0
    def get_time_box_work(self, prev_exec_time, exec_time):
        """
        Time-boxing the file url list returned from the site scrape.

        :param prev_exec_time timestamp start of the timestamp chunk
        :param exec_time timestamp end of the timestamp chunk
        :return: a list of StateRunnerMeta instances, for file names with
            time they were modified
        """
        temp = []
        for timestamp in self._todo_list.keys():
            if prev_exec_time < timestamp <= exec_time:
                for entry in self._todo_list[timestamp]:
                    temp.append(dsc.StateRunnerMeta(entry, timestamp))
        return temp
Example #4
0
    def get_time_box_work(self, prev_exec_time, exec_time):
        """
        :param prev_exec_time datetime start of the timestamp chunk
        :param exec_time datetime end of the timestamp chunk
        :return: a list of file names with time they were modified in /ams,
            structured as an astropy Table (for now).
        """

        self._logger.debug('Entering get_time_box_work')
        # datetime format 2019-12-01T00:00:00.000000
        prev_dt_str = datetime.fromtimestamp(
            prev_exec_time, tz=timezone.utc
        ).strftime(mc.ISO_8601_FORMAT)
        exec_dt_str = datetime.fromtimestamp(
            exec_time, tz=timezone.utc
        ).strftime(mc.ISO_8601_FORMAT)
        query = (
            f"SELECT A.uri, A.lastModified "
            f"FROM caom2.Observation AS O "
            f"JOIN caom2.Plane AS P ON O.obsID = P.obsID "
            f"JOIN caom2.Artifact AS A ON P.planeID = A.planeID "
            f"WHERE P.planeID IN ( "
            f"  SELECT A.planeID "
            f"  FROM caom2.Observation AS O "
            f"  JOIN caom2.Plane AS P ON O.obsID = P.obsID "
            f"  JOIN caom2.Artifact AS A ON P.planeID = A.planeID "
            f"  WHERE O.collection = '{self._config.collection}' "
            f"  GROUP BY A.planeID "
            f"  HAVING COUNT(A.artifactID) = 1 ) "
            f"AND P.dataRelease > '{prev_dt_str}' "
            f"AND P.dataRelease <= '{exec_dt_str}' "
            f"ORDER BY O.maxLastModified ASC "
            ""
        )
        result = clc.query_tap_client(query, self._query_client)
        # results look like:
        # gemini:GEM/N20191202S0125.fits, ISO 8601

        entries = deque()
        for row in result:
            entries.append(
                dsc.StateRunnerMeta(
                    mc.CaomName(row['uri']).file_name,
                    mc.make_time(row['lastModified']).timestamp(),
                )
            )
        return entries
Example #5
0
    def get_time_box_work(self, prev_exec_time, exec_time):
        """
        Time-boxing the file list returned from the site scrape, where the
        list is a dict, with keys the entries to retrieve, and values are the
        timestamp associated with the respective entry.

        :param prev_exec_time datetime start of the timestamp chunk
        :param exec_time datetime end of the timestamp chunk
        :return: a list of file names with time they were modified at CSA,
            structured as an astropy Table. The time format is ISO 8601.
        """
        self._logger.debug('Entering get_time_box_work')

        temp = []
        for entry, timestamp in self._todo_list.items():
            if prev_exec_time < timestamp <= exec_time:
                temp.append(dsc.StateRunnerMeta(entry, timestamp))
        return temp