def post(self): dao = MetricsBucketDao() resource = request.get_data() if resource: resource_json = json.loads(resource) start_date_str = resource_json.get('start_date') end_date_str = resource_json.get('end_date') if not start_date_str or not end_date_str: raise BadRequest("Start date and end date should not be empty") try: start_date = datetime.datetime.strptime( start_date_str, DATE_FORMAT).date() except ValueError: raise BadRequest("Invalid start date: %s" % start_date_str) try: end_date = datetime.datetime.strptime(end_date_str, DATE_FORMAT).date() except ValueError: raise BadRequest("Invalid end date: %s" % end_date_str) date_diff = abs((end_date - start_date).days) if date_diff > DAYS_LIMIT: raise BadRequest("Difference between start date and end date "\ "should not be greater than %s days" % DAYS_LIMIT) buckets = dao.get_active_buckets(start_date, end_date) if buckets is None: return [] return [dao.to_client_json(bucket) for bucket in buckets] else: raise BadRequest("Request data is empty")
def setUp(self): super(MetricsApiTest, self).setUp() self.version_dao = MetricsVersionDao() self.bucket_dao = MetricsBucketDao() self.today = datetime.date.today() self.tomorrow = self.today + datetime.timedelta(days=1) self.expected_bucket_1 = { 'facets': { 'date': self.today.isoformat() }, 'entries': { 'x': 'a' } } self.expected_bucket_2 = { 'facets': { 'date': self.today.isoformat(), 'hpoId': 'PITT' }, 'entries': { 'x': 'b' } } self.expected_bucket_3 = { 'facets': { 'date': self.tomorrow.isoformat() }, 'entries': { 'y': 'c' } }
def reduce_hpo_date_metric_counts_to_database_buckets(reducer_key, reducer_values, version_id=None): """Emits a metrics bucket with counts for metrics for a given hpoId + date to SQL Args: reducer_key: hpoId|date ('*' for hpoId for cross-HPO counts) reducer_values: list of participant_type|metric|count strings """ metrics_dict = collections.defaultdict(lambda: 0) (hpo_id, date_str) = parse_tuple(reducer_key) if hpo_id == '*': hpo_id = '' date = datetime.strptime(date_str, DATE_FORMAT) for reducer_value in reducer_values: (participant_type, metric_key, count) = parse_tuple(reducer_value) if metric_key == PARTICIPANT_KIND: if participant_type == _REGISTERED_PARTICIPANT: metrics_dict[metric_key] += int(count) else: kind = FULL_PARTICIPANT_KIND if participant_type == _FULL_PARTICIPANT else PARTICIPANT_KIND metrics_dict['%s.%s' % (kind, metric_key)] += int(count) version_id = version_id or context.get().mapreduce_spec.mapper.params.get('version_id') bucket = MetricsBucket(metricsVersionId=version_id, date=date, hpoId=hpo_id, metrics=json.dumps(metrics_dict)) # Use upsert here; when reducer shards retry, we will just replace any metrics bucket that was # written before, rather than failing. MetricsBucketDao().upsert(bucket)
def post(self): dao = MetricsBucketDao() resource = request.get_data() start_date = None end_date = None if resource: resource_json = json.loads(resource) start_date_str = resource_json.get('start_date') end_date_str = resource_json.get('end_date') if start_date_str: try: start_date = datetime.datetime.strptime(start_date_str, DATE_FORMAT).date() except ValueError: raise BadRequest("Invalid start date: %s" % start_date_str) if end_date_str: try: end_date = datetime.datetime.strptime(end_date_str, DATE_FORMAT).date() except ValueError: raise BadRequest("Invalid start date: %s" % end_date_str) buckets = dao.get_active_buckets(start_date, end_date) if buckets is None: return [] return [dao.to_client_json(bucket) for bucket in buckets]
class MetricsApiTest(FlaskTestBase): def setUp(self): super(MetricsApiTest, self).setUp() self.version_dao = MetricsVersionDao() self.bucket_dao = MetricsBucketDao() self.today = datetime.date.today() self.tomorrow = self.today + datetime.timedelta(days=1) self.expected_bucket_1 = {'facets': {'date': self.today.isoformat()}, 'entries': {'x': 'a'}} self.expected_bucket_2 = {'facets': {'date': self.today.isoformat(), 'hpoId': 'PITT'}, 'entries': {'x': 'b'}} self.expected_bucket_3 = {'facets': {'date': self.tomorrow.isoformat()}, 'entries': {'y': 'c'}} def test_get_metrics_no_data(self): response = self.send_post('Metrics', {'start_date': self.today.isoformat(), 'end_date': self.tomorrow.isoformat()}) self.assertEquals([], response) def test_get_metrics_no_buckets(self): self.version_dao.set_pipeline_in_progress() self.version_dao.set_pipeline_finished(True) response = self.send_post('Metrics', {'start_date': self.today.isoformat(), 'end_date': self.tomorrow.isoformat()}) self.assertEquals([], response) def setup_buckets(self): self.version_dao.set_pipeline_in_progress() metrics_bucket_1 = MetricsBucket(metricsVersionId=1, date=self.today, hpoId='', metrics='{ "x": "a" }') metrics_bucket_2 = MetricsBucket(metricsVersionId=1, date=self.today, hpoId='PITT', metrics='{ "x": "b" }') metrics_bucket_3 = MetricsBucket(metricsVersionId=1, date=self.tomorrow, hpoId='', metrics='{ "y": "c" }') self.bucket_dao.insert(metrics_bucket_1) self.bucket_dao.insert(metrics_bucket_2) self.bucket_dao.insert(metrics_bucket_3) self.version_dao.set_pipeline_finished(True) def test_get_metrics_with_buckets_and_today_start_date_tomorrow_end_date(self): self.setup_buckets() response = self.send_post('Metrics', {'start_date': self.today.isoformat(), 'end_date': self.tomorrow.isoformat()}) self.assertEquals([self.expected_bucket_1, self.expected_bucket_2, self.expected_bucket_3], response) def test_get_metrics_with_buckets_and_today_start_end_date(self): self.setup_buckets() response = self.send_post('Metrics', {'start_date': self.today.isoformat(), 'end_date': self.today.isoformat()}) self.assertEquals([self.expected_bucket_1, self.expected_bucket_2], response) def test_get_metrics_with_buckets_and_tomorrow_start_end_date(self): self.setup_buckets() response = self.send_post('Metrics', {'start_date': self.tomorrow.isoformat(), 'end_date': self.tomorrow.isoformat()}) self.assertEquals([self.expected_bucket_3], response) def test_get_metrics_with_no_buckets_and_tomorrow_start_date_today_end_date(self): self.setup_buckets() response = self.send_post('Metrics', {'start_date': self.tomorrow.isoformat(), 'end_date': self.today.isoformat()}) self.assertEquals([], response)
def setUp(self): super(MetricsDaoTest, self).setUp() self.metrics_version_dao = MetricsVersionDao() self.metrics_bucket_dao = MetricsBucketDao()
class MetricsDaoTest(SqlTestBase): def setUp(self): super(MetricsDaoTest, self).setUp() self.metrics_version_dao = MetricsVersionDao() self.metrics_bucket_dao = MetricsBucketDao() def test_get_before_insert(self): self.assertIsNone(self.metrics_version_dao.get(1)) self.assertIsNone(self.metrics_version_dao.get_with_children(1)) self.assertIsNone(self.metrics_bucket_dao.get([1, TIME, None])) self.assertIsNone(self.metrics_bucket_dao.get([1, TIME, PITT])) self.assertIsNone(self.metrics_version_dao.get_version_in_progress()) self.assertIsNone(self.metrics_version_dao.get_serving_version()) self.assertIsNone(self.metrics_bucket_dao.get_active_buckets()) def test_set_pipeline_in_progress(self): with FakeClock(TIME): self.metrics_version_dao.set_pipeline_in_progress() expected_mv = MetricsVersion(metricsVersionId=1, inProgress=True, complete=False, date=TIME, dataVersion=SERVING_METRICS_DATA_VERSION) self.assertEquals(expected_mv.asdict(), self.metrics_version_dao.get(1).asdict()) self.assertEquals( expected_mv.asdict(), self.metrics_version_dao.get_version_in_progress().asdict()) self.assertIsNone(self.metrics_version_dao.get_serving_version()) self.assertIsNone(self.metrics_bucket_dao.get_active_buckets()) def test_set_pipeline_in_progress_while_in_progress(self): with FakeClock(TIME): self.metrics_version_dao.set_pipeline_in_progress() with FakeClock(TIME_2): with self.assertRaises(PreconditionFailed): # Until a day passes, setting the pipeline in progress will raise an error. self.metrics_version_dao.set_pipeline_in_progress() # After a day passes, break the lock. with FakeClock(TIME_3): self.metrics_version_dao.set_pipeline_in_progress() expected_mv = MetricsVersion(metricsVersionId=1, inProgress=False, complete=False, date=TIME, dataVersion=SERVING_METRICS_DATA_VERSION) self.assertEquals(expected_mv.asdict(), self.metrics_version_dao.get(1).asdict()) expected_mv2 = MetricsVersion(metricsVersionId=2, inProgress=True, complete=False, date=TIME_3, dataVersion=SERVING_METRICS_DATA_VERSION) self.assertEquals(expected_mv2.asdict(), self.metrics_version_dao.get(2).asdict()) def test_set_pipeline_finished_not_in_progress(self): self.metrics_version_dao.set_pipeline_finished(True) def test_set_pipeline_finished_in_progress_no_buckets(self): with FakeClock(TIME): self.metrics_version_dao.set_pipeline_in_progress() with FakeClock(TIME_2): self.metrics_version_dao.set_pipeline_finished(True) expected_mv = MetricsVersion(metricsVersionId=1, inProgress=False, complete=True, date=TIME, dataVersion=SERVING_METRICS_DATA_VERSION) self.assertEquals(expected_mv.asdict(), self.metrics_version_dao.get(1).asdict()) self.assertEquals([], self.metrics_bucket_dao.get_active_buckets()) def test_set_pipeline_finished_in_progress_with_buckets(self): with FakeClock(TIME): self.metrics_version_dao.set_pipeline_in_progress() metrics_bucket_1 = MetricsBucket(metricsVersionId=1, date=datetime.date.today(), hpoId='', metrics='foo') metrics_bucket_2 = MetricsBucket(metricsVersionId=1, date=datetime.date.today(), hpoId=PITT, metrics='bar') tomorrow = datetime.date.today() + datetime.timedelta(days=1) metrics_bucket_3 = MetricsBucket(metricsVersionId=1, date=tomorrow, hpoId='', metrics='baz') self.metrics_bucket_dao.insert(metrics_bucket_1) self.metrics_bucket_dao.insert(metrics_bucket_2) self.metrics_bucket_dao.insert(metrics_bucket_3) with FakeClock(TIME_2): self.metrics_version_dao.set_pipeline_finished(True) expected_mv = MetricsVersion(metricsVersionId=1, inProgress=False, complete=True, date=TIME, dataVersion=SERVING_METRICS_DATA_VERSION) self.assertEquals( expected_mv.asdict(), self.metrics_version_dao.get_serving_version().asdict()) active_buckets = self.metrics_bucket_dao.get_active_buckets() self.assertEquals(3, len(active_buckets)) self.assertEquals(metrics_bucket_1.asdict(), active_buckets[0].asdict()) self.assertEquals(metrics_bucket_2.asdict(), active_buckets[1].asdict()) self.assertEquals(metrics_bucket_3.asdict(), active_buckets[2].asdict()) # Filter on start date. active_buckets = self.metrics_bucket_dao.get_active_buckets( start_date=datetime.date.today()) self.assertEquals(3, len(active_buckets)) self.assertEquals(metrics_bucket_1.asdict(), active_buckets[0].asdict()) self.assertEquals(metrics_bucket_2.asdict(), active_buckets[1].asdict()) self.assertEquals(metrics_bucket_3.asdict(), active_buckets[2].asdict()) active_buckets = self.metrics_bucket_dao.get_active_buckets( start_date=tomorrow) self.assertEquals(1, len(active_buckets)) self.assertEquals(metrics_bucket_3.asdict(), active_buckets[0].asdict()) # Filter on end date. active_buckets = self.metrics_bucket_dao.get_active_buckets( end_date=tomorrow) self.assertEquals(3, len(active_buckets)) self.assertEquals(metrics_bucket_1.asdict(), active_buckets[0].asdict()) self.assertEquals(metrics_bucket_2.asdict(), active_buckets[1].asdict()) self.assertEquals(metrics_bucket_3.asdict(), active_buckets[2].asdict()) active_buckets = self.metrics_bucket_dao.get_active_buckets( end_date=datetime.date.today()) self.assertEquals(2, len(active_buckets)) self.assertEquals(metrics_bucket_1.asdict(), active_buckets[0].asdict()) self.assertEquals(metrics_bucket_2.asdict(), active_buckets[1].asdict()) def test_insert_duplicate_bucket(self): with FakeClock(TIME): self.metrics_version_dao.set_pipeline_in_progress() metrics_bucket_1 = MetricsBucket(metricsVersionId=1, date=datetime.date.today(), hpoId=PITT, metrics='foo') metrics_bucket_2 = MetricsBucket(metricsVersionId=1, date=datetime.date.today(), hpoId=PITT, metrics='bar') self.metrics_bucket_dao.insert(metrics_bucket_1) with self.assertRaises(IntegrityError): self.metrics_bucket_dao.insert(metrics_bucket_2) # Upsert should work, and replace the bucket. self.metrics_bucket_dao.upsert(metrics_bucket_2) self.assertEquals( metrics_bucket_2.asdict(), self.metrics_bucket_dao.get([1, datetime.date.today(), PITT]).asdict()) def test_delete_old_metrics(self): with FakeClock(TIME): self.metrics_version_dao.set_pipeline_in_progress() metrics_bucket_1 = MetricsBucket(metricsVersionId=1, date=datetime.date.today(), hpoId='', metrics='foo') metrics_bucket_2 = MetricsBucket(metricsVersionId=1, date=datetime.date.today(), hpoId=PITT, metrics='bar') self.metrics_bucket_dao.insert(metrics_bucket_1) self.metrics_bucket_dao.insert(metrics_bucket_2) # For up to 3 days, the metrics stay around. with FakeClock(TIME_4): self.metrics_version_dao.delete_old_versions() expected_mv = MetricsVersion( metricsVersionId=1, inProgress=True, complete=False, date=TIME, dataVersion=SERVING_METRICS_DATA_VERSION) expected_mv.buckets.append(metrics_bucket_1) expected_mv.buckets.append(metrics_bucket_2) self.assertEquals( expected_mv.asdict(follow=['buckets']), self.metrics_version_dao.get_with_children(1).asdict( follow=['buckets'])) # After 3 days, the metrics are gone. with FakeClock(TIME_5): self.metrics_version_dao.delete_old_versions() self.assertIsNone(self.metrics_version_dao.get_with_children(1))