Beispiel #1
0
    def _get_observations(self,
                          collection,
                          start=None,
                          end=None,
                          obs_file=None):
        """
        Returns a list of observations from the collection
        :param collection: name of the collection
        :param start: earliest observation
        :param end: latest observation
        :return: list of observation ids
        """
        assert collection is not None
        observations = []
        params = {'MAXREC': BATCH_SIZE}
        if start is not None:
            params['START'] = util.date2ivoa(start)
        if end is not None:
            params['END'] = util.date2ivoa(end)

        response = self._repo_client.get(
            (CAOM2REPO_OBS_CAPABILITY_ID, collection), params=params)
        last_datetime = None
        for line in response.text.splitlines():
            columns = line.split('\t')
            if len(columns) >= 3:
                obs = columns[1]
                last_datetime = columns[2]
                observations.append(obs)
            else:
                self.logger.warn('Incomplete listing line: {}'.format(line))
        if last_datetime is not None:
            self._start = util.str2ivoa(last_datetime)
        return observations
Beispiel #2
0
    def _get_obs_from_file(self, obs_file, start, end, halt_on_error):
        obs = []
        failed = []
        for l in obs_file:
            tokens = l.split()
            if len(tokens) > 0:
                obs_id = tokens[0]
                if len(tokens) > 1:
                    # we have at least two tokens in line
                    try:
                        last_mod_datetime = util.str2ivoa(tokens[1])
                        if len(tokens) > 2:
                            # we have more than two tokens in line
                            raise Exception(
                                'Extra token one line: {}'.format(l))
                        elif (start and last_mod_datetime<start) or \
                                (end and last_mod_datetime>end):
                            # last modified date is out of start/end range
                            self.logger.info(
                                'last modified date is out of start/end range: {}'
                                .format(l))
                        else:
                            # two tokens in line: <observation id> <last modification date>
                            obs.append(obs_id)
                    except Exception as e:
                        failed.append(obs_id)
                        self.logger.error('FAILED {} - Reason: {}'.format(
                            obs_id, e))
                        if halt_on_error:
                            raise e
                else:
                    # only one token in line, line should contain observationID only
                    obs.append(obs_id)

        return obs
Beispiel #3
0
    def test_get_obs_from_file(self):
        level = logging.DEBUG
        visitor = CAOM2RepoClient(auth.Subject(), level)

        # no start or end
        with open(os.path.join(THIS_DIR, 'data/obs_id.txt')) as obs_file:
            obs_id_list = visitor._get_obs_from_file(obs_file, None, None,
                                                     False)
            self.assertEqual('obs_id_1', obs_id_list[0])
            self.assertEqual('obs_id_2', obs_id_list[1])
            self.assertEqual('obs_id_3', obs_id_list[2])

        # last_modified_date is earlier than start
        with open(os.path.join(THIS_DIR, 'data/obs_id.txt')) as obs_file:
            obs_id_list = visitor._get_obs_from_file(
                obs_file, util.str2ivoa('2000-10-11T12:30:00.333'), None,
                False)
            self.assertEqual('obs_id_1', obs_id_list[0])

        # last_modified_date is between start and end
        with open(os.path.join(THIS_DIR, 'data/obs_id.txt')) as obs_file:
            obs_id_list = visitor._get_obs_from_file(
                obs_file, util.str2ivoa('2000-10-9T12:30:00.333'),
                util.str2ivoa('2016-10-11T12:30:00.333'), False)
            self.assertEqual('obs_id_1', obs_id_list[0])
            self.assertEqual('obs_id_2', obs_id_list[1])

        # last_modified_date is after end
        with open(os.path.join(THIS_DIR, 'data/obs_id.txt')) as obs_file:
            obs_id_list = visitor._get_obs_from_file(
                obs_file, util.str2ivoa('2000-10-9T12:30:00.333'),
                util.str2ivoa('2017-10-11T12:30:00.333'), False)
            self.assertEqual('obs_id_1', obs_id_list[0])
            self.assertEqual('obs_id_2', obs_id_list[1])
            self.assertEqual('obs_id_3', obs_id_list[2])

        # error in file
        with open(os.path.join(THIS_DIR, 'data/obs_id_error.txt')) as obs_file:
            with self.assertRaises(Exception):
                obs_id_list = visitor._get_obs_from_file(
                    obs_file, util.str2ivoa('2000-10-9T12:30:00.333'),
                    util.str2ivoa('2016-10-11T12:30:00.333'), True)
Beispiel #4
0
    def test_get_obs_from_file(self):
        level = logging.DEBUG
        visitor = CAOM2RepoClient(auth.Subject(), level)

        # no start or end
        with open(os.path.join(THIS_DIR, 'data/obs_id.txt')) as obs_file:
            obs_id_list = visitor._get_obs_from_file(obs_file, None, None,
                                                     False)
            self.assertEquals('obs_id_1', obs_id_list[0])
            self.assertEquals('obs_id_2', obs_id_list[1])
            self.assertEquals('obs_id_3', obs_id_list[2])

        # last_modified_date is earlier than start
        with open(os.path.join(THIS_DIR, 'data/obs_id.txt')) as obs_file:
            obs_id_list = visitor._get_obs_from_file(obs_file, util.str2ivoa(
                '2000-10-11T12:30:00.333'), None, False)
            self.assertEquals('obs_id_1', obs_id_list[0])

        # last_modified_date is between start and end
        with open(os.path.join(THIS_DIR, 'data/obs_id.txt')) as obs_file:
            obs_id_list = visitor._get_obs_from_file(
                obs_file, util.str2ivoa('2000-10-9T12:30:00.333'),
                util.str2ivoa('2016-10-11T12:30:00.333'), False)
            self.assertEquals('obs_id_1', obs_id_list[0])
            self.assertEquals('obs_id_2', obs_id_list[1])

        # last_modified_date is after end
        with open(os.path.join(THIS_DIR, 'data/obs_id.txt')) as obs_file:
            obs_id_list = visitor._get_obs_from_file(
                obs_file, util.str2ivoa('2000-10-9T12:30:00.333'),
                util.str2ivoa('2017-10-11T12:30:00.333'), False)
            self.assertEquals('obs_id_1', obs_id_list[0])
            self.assertEquals('obs_id_2', obs_id_list[1])
            self.assertEquals('obs_id_3', obs_id_list[2])

        # error in file
        with open(os.path.join(THIS_DIR, 'data/obs_id_error.txt')) as obs_file:
            with self.assertRaises(Exception):
                obs_id_list = visitor._get_obs_from_file(
                    obs_file, util.str2ivoa('2000-10-9T12:30:00.333'),
                    util.str2ivoa('2016-10-11T12:30:00.333'), True)
Beispiel #5
0
    def test_process(self):
        core.BATCH_SIZE = 3  # size of the batch is 3
        obs = [['a', 'b', 'c'], ['d'], []]
        level = logging.DEBUG
        visitor = CAOM2RepoClient(auth.Subject(), level)
        visitor.get_observation = MagicMock(return_value=MagicMock(
            spec=SimpleObservation))
        visitor.post_observation = MagicMock()
        visitor._get_observations = MagicMock(side_effect=obs)

        (visited, updated, skipped,
         failed) = visitor.visit(os.path.join(THIS_DIR, 'passplugin.py'),
                                 'cfht')
        self.assertEqual(4, len(visited))
        self.assertEqual(4, len(updated))
        self.assertEqual(0, len(skipped))
        self.assertEqual(0, len(failed))

        obs = [['a', 'b', 'c'], ['d', 'e', 'f'], []]
        visitor._get_observations = MagicMock(side_effect=obs)
        (visited, updated, skipped,
         failed) = visitor.visit(os.path.join(THIS_DIR, 'passplugin.py'),
                                 'cfht')
        self.assertEqual(6, len(visited))
        self.assertEqual(6, len(updated))
        self.assertEqual(0, len(skipped))
        self.assertEqual(0, len(failed))

        # make it return different status. errorplugin returns according to the
        # id of the observation: True for 'UPDATE', False for 'SKIP' and
        # raises exception for 'ERROR'
        obs_ids = [['UPDATE', 'SKIP', 'ERROR'], []]
        obs = [
            SimpleObservation(collection='TEST', observation_id='UPDATE'),
            SimpleObservation(collection='TEST', observation_id='SKIP'),
            SimpleObservation(collection='TEST', observation_id='ERROR')
        ]
        visitor._get_observations = MagicMock(side_effect=obs_ids)
        visitor.get_observation = MagicMock(side_effect=obs)
        (visited, updated, skipped,
         failed) = visitor.visit(os.path.join(THIS_DIR, 'errorplugin.py'),
                                 'cfht')
        self.assertEqual(3, len(visited))
        self.assertEqual(1, len(updated))
        self.assertEqual(1, len(skipped))
        self.assertEqual(1, len(failed))

        # repeat with other obs
        obs_ids = [['UPDATE', 'SKIP', 'ERROR'], ['UPDATE', 'SKIP']]
        obs = [
            SimpleObservation(collection='TEST', observation_id='UPDATE'),
            SimpleObservation(collection='TEST', observation_id='SKIP'),
            SimpleObservation(collection='TEST', observation_id='ERROR'),
            SimpleObservation(collection='TEST', observation_id='UPDATE'),
            SimpleObservation(collection='TEST', observation_id='SKIP')
        ]
        visitor._get_observations = MagicMock(side_effect=obs_ids)
        visitor.get_observation = MagicMock(side_effect=obs)
        (visited, updated, skipped,
         failed) = visitor.visit(os.path.join(THIS_DIR, 'errorplugin.py'),
                                 'cfht')
        self.assertEqual(5, len(visited))
        self.assertEqual(2, len(updated))
        self.assertEqual(2, len(skipped))
        self.assertEqual(1, len(failed))

        # repeat but halt on first ERROR -> process only 3 observations
        obs_ids = [['UPDATE', 'SKIP', 'ERROR'], ['UPDATE', 'SKIP']]
        obs = [
            SimpleObservation(collection='TEST', observation_id='UPDATE'),
            SimpleObservation(collection='TEST', observation_id='SKIP'),
            SimpleObservation(collection='TEST', observation_id='ERROR'),
            SimpleObservation(collection='TEST', observation_id='UPDATE'),
            SimpleObservation(collection='TEST', observation_id='SKIP')
        ]
        visitor._get_observations = MagicMock(side_effect=obs_ids)
        visitor.get_observation = MagicMock(side_effect=obs)
        with self.assertRaises(SystemError):
            visitor.visit(os.path.join(THIS_DIR, 'errorplugin.py'),
                          'cfht',
                          halt_on_error=True)

        # test with time boundaries
        core.BATCH_SIZE = 3  # size of the batch is 3
        response = MagicMock()
        response.text = """ARCHIVE\ta\t2011-01-01T11:00:00.000
                           ARCHIVE\tb\t211-01-01T11:00:10.000
                           ARCHIVE\tc\t2011-01-01T12:00:00.000"""
        response2 = MagicMock()
        response2.text = """ARCHIVE\td\t2011-02-02T11:00:00.000"""
        level = logging.DEBUG
        visitor = CAOM2RepoClient(auth.Subject(), level)
        visitor.get_observation = MagicMock(return_value=MagicMock(
            spec=SimpleObservation))
        visitor.post_observation = MagicMock()
        visitor._repo_client.get = MagicMock(side_effect=[response, response2])

        start = '2010-10-10T12:00:00.000'
        end = '2012-12-12T11:11:11.000'
        (visited, updated, skipped,
         failed) = visitor.visit(os.path.join(THIS_DIR, 'passplugin.py'),
                                 'cfht',
                                 start=util.str2ivoa(start),
                                 end=util.str2ivoa(end))

        self.assertEqual(4, len(visited))
        self.assertEqual(4, len(updated))
        self.assertEqual(0, len(skipped))
        self.assertEqual(0, len(failed))
        calls = [
            call((core.CURRENT_CAOM2REPO_OBS_CAPABILITY_ID, 'cfht'),
                 params={
                     'START': start,
                     'END': end,
                     'MAXREC': 3
                 }),
            call(
                (core.CURRENT_CAOM2REPO_OBS_CAPABILITY_ID, 'cfht'),
                params={
                    'START': '2011-01-01T12:00:00.000',
                    # datetime of the last record in the batch
                    'END': end,
                    'MAXREC': 3
                })
        ]
        visitor._repo_client.get.assert_has_calls(calls)
Beispiel #6
0
    def test_process(self):
        core.BATCH_SIZE = 3  # size of the batch is 3
        obs = [['a', 'b', 'c'], ['d'], []]
        level = logging.DEBUG
        visitor = CAOM2RepoClient(auth.Subject(), level)
        visitor.get_observation = MagicMock(
            return_value=MagicMock(spec=SimpleObservation))
        visitor.post_observation = MagicMock()
        visitor._get_observations = MagicMock(side_effect=obs)

        (visited, updated, skipped, failed) = visitor.visit(
            os.path.join(THIS_DIR, 'passplugin.py'), 'cfht')
        self.assertEqual(4, len(visited))
        self.assertEqual(4, len(updated))
        self.assertEqual(0, len(skipped))
        self.assertEqual(0, len(failed))

        obs = [['a', 'b', 'c'], ['d', 'e', 'f'], []]
        visitor._get_observations = MagicMock(side_effect=obs)
        (visited, updated, skipped, failed) = visitor.visit(
            os.path.join(THIS_DIR, 'passplugin.py'), 'cfht')
        self.assertEqual(6, len(visited))
        self.assertEqual(6, len(updated))
        self.assertEqual(0, len(skipped))
        self.assertEqual(0, len(failed))

        # make it return different status. errorplugin returns according to the
        # id of the observation: True for 'UPDATE', False for 'SKIP' and
        # raises exception for 'ERROR'
        obs_ids = [['UPDATE', 'SKIP', 'ERROR'], []]
        obs = [SimpleObservation(collection='TEST', observation_id='UPDATE'),
               SimpleObservation(collection='TEST', observation_id='SKIP'),
               SimpleObservation(collection='TEST', observation_id='ERROR')]
        visitor._get_observations = MagicMock(side_effect=obs_ids)
        visitor.get_observation = MagicMock(side_effect=obs)
        (visited, updated, skipped, failed) = visitor.visit(
            os.path.join(THIS_DIR, 'errorplugin.py'), 'cfht')
        self.assertEqual(3, len(visited))
        self.assertEqual(1, len(updated))
        self.assertEqual(1, len(skipped))
        self.assertEqual(1, len(failed))

        # repeat with other obs
        obs_ids = [['UPDATE', 'SKIP', 'ERROR'], ['UPDATE', 'SKIP']]
        obs = [SimpleObservation(collection='TEST', observation_id='UPDATE'),
               SimpleObservation(collection='TEST', observation_id='SKIP'),
               SimpleObservation(collection='TEST', observation_id='ERROR'),
               SimpleObservation(collection='TEST', observation_id='UPDATE'),
               SimpleObservation(collection='TEST', observation_id='SKIP')]
        visitor._get_observations = MagicMock(side_effect=obs_ids)
        visitor.get_observation = MagicMock(side_effect=obs)
        (visited, updated, skipped, failed) = visitor.visit(
            os.path.join(THIS_DIR, 'errorplugin.py'), 'cfht')
        self.assertEqual(5, len(visited))
        self.assertEqual(2, len(updated))
        self.assertEqual(2, len(skipped))
        self.assertEqual(1, len(failed))

        # repeat but halt on first ERROR -> process only 3 observations
        obs_ids = [['UPDATE', 'SKIP', 'ERROR'], ['UPDATE', 'SKIP']]
        obs = [SimpleObservation(collection='TEST', observation_id='UPDATE'),
               SimpleObservation(collection='TEST', observation_id='SKIP'),
               SimpleObservation(collection='TEST', observation_id='ERROR'),
               SimpleObservation(collection='TEST', observation_id='UPDATE'),
               SimpleObservation(collection='TEST', observation_id='SKIP')]
        visitor._get_observations = MagicMock(side_effect=obs_ids)
        visitor.get_observation = MagicMock(side_effect=obs)
        with self.assertRaises(SystemError):
            visitor.visit(os.path.join(
                THIS_DIR, 'errorplugin.py'), 'cfht', halt_on_error=True)

        # test with time boundaries
        core.BATCH_SIZE = 3  # size of the batch is 3
        response = MagicMock()
        response.text = """ARCHIVE\ta\t2011-01-01T11:00:00.000
                           ARCHIVE\tb\t211-01-01T11:00:10.000
                           ARCHIVE\tc\t2011-01-01T12:00:00.000"""
        response2 = MagicMock()
        response2.text = """ARCHIVE\td\t2011-02-02T11:00:00.000"""
        level = logging.DEBUG
        visitor = CAOM2RepoClient(auth.Subject(), level)
        visitor.get_observation = MagicMock(
            return_value=MagicMock(spec=SimpleObservation))
        visitor.post_observation = MagicMock()
        visitor._repo_client.get = MagicMock(side_effect=[response, response2])

        start = '2010-10-10T12:00:00.000'
        end = '2012-12-12T11:11:11.000'
        (visited, updated, skipped, failed) = visitor.visit(
            os.path.join(THIS_DIR, 'passplugin.py'), 'cfht',
            start=util.str2ivoa(start),
            end=util.str2ivoa(end))

        self.assertEqual(4, len(visited))
        self.assertEqual(4, len(updated))
        self.assertEqual(0, len(skipped))
        self.assertEqual(0, len(failed))
        calls = [call((core.CAOM2REPO_OBS_CAPABILITY_ID, 'cfht'),
                      params={'START': start, 'END': end, 'MAXREC': 3}),
                 call((core.CAOM2REPO_OBS_CAPABILITY_ID, 'cfht'),
                      params={'START': '2011-01-01T12:00:00.000',
                              # datetime of the last record in the batch
                              'END': end,
                              'MAXREC': 3})]
        visitor._repo_client.get.assert_has_calls(calls)