def test_job_metric_consumers(self): from foundations_contrib.global_state import message_router from foundations_internal.fast_serializer import deserialize from foundations_contrib.utils import byte_string from time import time project_name = self._str_random_uuid() job_id = self._str_random_uuid() key = "best_metric_ever" value = 42 message = { "project_name": project_name, "job_id": job_id, "key": key, "value": value, } message_router.push_message("job_metrics", message) current_time = time() job_metrics_key = "jobs:{}:metrics".format(job_id) job_metrics = self._redis.lrange(job_metrics_key, 0, -1) job_metrics = [deserialize(data) for data in job_metrics] first_job_metric = list(job_metrics)[0] self.assertLess(current_time - first_job_metric[0], 5) self.assertEqual(key, first_job_metric[1]) self.assertEqual(value, first_job_metric[2]) project_metrics_key = "project:{}:metrics".format(project_name) project_metric_name = self._redis.smembers(project_metrics_key) self.assertEqual(project_metric_name, set([byte_string(key)]))
def test_produces_completed_job_data(self): from foundations_internal.fast_serializer import deserialize from time import time self._deploy_job_file('acceptance/fixtures/job_data_production', entrypoint='success.py') current_time = time() serialized_metrics = self._redis.lrange( f'jobs:{self.job_id}:metrics', 0, -1) metrics = [deserialize(data) for data in serialized_metrics] metric_1, metric_2, metric_3 = metrics self.assertTrue(current_time - metric_1[0] < 2) self.assertTrue(current_time - metric_2[0] < 2) self.assertTrue(current_time - metric_3[0] < 2) self.assertEqual('hello', metric_1[1]) self.assertEqual('hello', metric_2[1]) self.assertEqual('world', metric_3[1]) self.assertEqual(1, metric_1[2]) self.assertEqual(2, metric_2[2]) self.assertEqual(3, metric_3[2]) metric_keys = self._redis.smembers( 'project:job_data_production:metrics') metric_keys = set([data.decode() for data in metric_keys]) self.assertEqual({'hello', 'world'}, metric_keys) state = self._redis.get(f'jobs:{self.job_id}:state').decode() self.assertEqual('completed', state) project_name = self._redis.get(f'jobs:{self.job_id}:project').decode() self.assertEqual('job_data_production', project_name) user_name = self._redis.get(f'jobs:{self.job_id}:user').decode() self.assertEqual(self.user_name, user_name) completed_time = self._redis.get( f'jobs:{self.job_id}:completed_time').decode() completed_time = float(completed_time) self.assertTrue(current_time - completed_time < 2) start_time = self._redis.get(f'jobs:{self.job_id}:start_time').decode() start_time = float(start_time) self.assertTrue(current_time - start_time > 0.01) self.assertTrue(current_time - start_time < 10) creation_time = self._redis.get( f'jobs:{self.job_id}:creation_time').decode() creation_time = float(creation_time) self.assertTrue(current_time - creation_time > 0.01) self.assertTrue(current_time - creation_time < 120) running_jobs = self._redis.smembers( 'project:job_data_production:jobs:running') running_jobs = set([data.decode() for data in running_jobs]) self.assertIn(self.job_id, running_jobs)
def test_metrics_are_logged(self): from foundations_internal.fast_serializer import deserialize key = f'jobs:{self.job_id}:metrics' serialized_metric = redis_connection.lrange(key, 0, -1)[0] _, metric_key, metric_value = deserialize(serialized_metric) self.assertEqual('ugh', metric_key) self.assertEqual(10, metric_value)
def test_log_metric_stores_singleton_list_metric_through_job_metric_consumer( self): self._run_job_and_log_metric('loss', [2]) expected_metric = ('loss', 2) metric_from_redis = self._redis.rpop( f'jobs:{self.fake_job_id}:metrics') actual_metric = deserialize(metric_from_redis) self.assertEqual(expected_metric, actual_metric[1:])
def test_log_metric_stores_metric_through_job_metric_consumer(self): self._run_job_and_log_metric(self.fake_metric_name, self.fake_metric_value) expected_metric = (self.fake_metric_name, self.fake_metric_value) metric_from_redis = self._redis.rpop( f'jobs:{self.fake_job_id}:metrics') actual_metric = deserialize(metric_from_redis) self.assertEqual(expected_metric, actual_metric[1:])
def _format_redis_data(self, redis_data): from foundations_internal.fast_serializer import deserialize deserialized_data = deserialize(redis_data) author_id = deserialized_data["author"] return { "date": str(deserialized_data["date"]), "message": deserialized_data["message"], "author": self._author_name_from_id(author_id), }
def deserialize(serialized_value): if serialized_value is None: return None magic = serialized_value[:4] if magic == HEADER_MAGIC: return pickle_serializer.deserialize(serialized_value[4:]) else: decoded_json = string_from_bytes(serialized_value) return json.loads(decoded_json)
def test_log_metric_stores_list_metric_through_job_metric_consumer(self): self._run_job_and_log_metric('loss', ["this", "that", "the other"]) expected_metrics = {('loss', 'this'), ('loss', 'that'), ('loss', 'the other')} metrics_from_redis = self._redis.lrange( f'jobs:{self.fake_job_id}:metrics', 0, 4) metrics = [deserialize(metric) for metric in metrics_from_redis] metric_values = set([metric[1:] for metric in metrics]) self.assertEqual(expected_metrics, metric_values)
def _deserialize_set_members(self, param_set): from foundations_internal.fast_serializer import deserialize if param_set is None: return [] decoded_param_list = [] for param in param_set: param = deserialize(param) decoded_param_list.append(param) return decoded_param_list
def _project_metrics(self): from foundations_internal.fast_serializer import deserialize for metric_key, serialized_metric in self._serialized_project_metrics( ).items(): job_id, metric_name = metric_key.decode().split(":", 1) timestamp, value = deserialize(serialized_metric) yield { "job_id": job_id, "metric_name": metric_name, "timestamp": timestamp, "value": value, }
def _assert_expected_metrics_for_project_metrics(self, expected_metrics): metrics_from_redis = set() metric_names = [metric_name for metric_name, _ in expected_metrics] metric_values = [metric_value for _, metric_value in expected_metrics] for metric_name in metric_names: metric_from_redis = self._redis.hget( f'projects:{self.fake_project_name}:metrics', f'{self.fake_job_id}:{metric_name}') processed_metric = deserialize(metric_from_redis)[1] metrics_from_redis.add(processed_metric) self.assertEqual(set(metric_values), metrics_from_redis)
def test_log_metric_can_log_multiple_messages_through_job_metric_consumer( self): self._run_job_and_log_two_metrics() metrics_from_redis = self._redis.lrange( f'jobs:{self.fake_job_id}:metrics', 0, 2) metrics = [deserialize(metric) for metric in metrics_from_redis] metric_values = set([metric[1:] for metric in metrics]) expected_metrics = set([ (self.fake_metric_name, self.fake_metric_value), (self.fake_metric_name_2, self.fake_metric_value_2) ]) self.assertEqual(expected_metrics, metric_values)
def test_call_adds_new_metric_to_redis_with_project_name_and_job_id(self): from foundations_internal.fast_serializer import deserialize message = { 'key': self.metric_key, 'value': self.metric_value, 'job_id': self.job_id, 'project_name': self.project_name } self.consumer.call(message, self.random_timestamp, None) project_metric = self.mock_redis.hgetall( f'projects:{self.project_name}:metrics:{self.metric_key}') serialized_metric = project_metric[self.job_id.encode()] metric = deserialize(serialized_metric) self.assertEqual((self.random_timestamp, self.metric_value), metric)
def _get_and_deserialize(self, key): from foundations_internal.foundations_serializer import deserialize serialized_data = self._redis.get(key) return deserialize(serialized_data)
def _deserialize_or_default(self, data, default): from foundations_internal.foundations_serializer import deserialize return deserialize(data) or default
def _foundations_deserialize(self, serialized_value): from foundations_internal.foundations_serializer import deserialize return deserialize(serialized_value)