def testCountLocalDateMetrics(self): met_result = metrics.summarize_by_local_date(self.testUUID, ecwl.LocalDate({'year': 2015, 'month': 8}), ecwl.LocalDate({'year': 2015, 'month': 9}), 'MONTHLY', ['count'], True) self.assertEqual(list(met_result.keys()), ['aggregate_metrics', 'user_metrics']) user_met_result = met_result['user_metrics'][0] agg_met_result = met_result['aggregate_metrics'][0] logging.debug(met_result) # local timezone means that we only have one entry self.assertEqual(len(user_met_result), 1) self.assertEqual(user_met_result[0].nUsers, 1) self.assertEqual(user_met_result[0].ON_FOOT, 6) self.assertEqual(user_met_result[0].BICYCLING, 4) self.assertEqual(user_met_result[0].IN_VEHICLE, 5) # We are not going to make assertions about the aggregate values since # they are affected by other entries in the database but we expect them # to be at least as much as the user values self.assertEqual(len(agg_met_result), 1) self.assertEqual(agg_met_result[0].nUsers, 2) self.assertGreaterEqual(agg_met_result[0].BICYCLING, user_met_result[0].BICYCLING + 1) # 21s has one bike trip self.assertGreaterEqual(agg_met_result[0].ON_FOOT, user_met_result[0].ON_FOOT + 3) # 21s has three bike trips self.assertGreaterEqual(agg_met_result[0].IN_VEHICLE, user_met_result[0].IN_VEHICLE + 3) # 21s has three motorized trips
def testCountLocalDateMetrics(self): met_result = metrics.summarize_by_local_date( self.testUUID, ecwl.LocalDate({ 'year': 2015, 'month': 8 }), ecwl.LocalDate({ 'year': 2015, 'month': 9 }), 'MONTHLY', ['count'], True) self.assertEqual(list(met_result.keys()), ['aggregate_metrics', 'user_metrics']) user_met_result = met_result['user_metrics'][0] agg_met_result = met_result['aggregate_metrics'][0] logging.debug(met_result) # local timezone means that we only have one entry self.assertEqual(len(user_met_result), 1) self.assertEqual(user_met_result[0].nUsers, 1) self.assertEqual(user_met_result[0].WALKING, 12) self.assertNotIn('BICYCLING', user_met_result[0]) self.assertEqual(user_met_result[0].BUS, 4) # We are not going to make assertions about the aggregate values since # they are affected by other entries in the database but we expect them # to be at least as much as the user values self.assertEqual(len(agg_met_result), 1) self.assertEqual(agg_met_result[0].nUsers, 2) self.assertGreaterEqual(agg_met_result[0].WALKING, user_met_result[0].WALKING + 5) # 21s has three bike trips self.assertGreaterEqual(agg_met_result[0].BUS, user_met_result[0].BUS + 2) # 21s has three motorized trips
def is_matched_user(user_id, spec): metric_list = get_metric_list(spec["checks"]) time_type = spec['time_type'] if 'from_local_date' in spec and 'to_local_date' in spec: freq_metrics = enam.summarize_by_local_date(user_id, spec["from_local_date"], spec["to_local_date"], spec["freq"], metric_list, include_aggregate=False) elif 'start_time' in spec and 'end_time' in spec: freq_metrics = enam.summarize_by_timestamp(user_id, spec["start_time"], spec["end_time"], spec["freq"], metric_list, include_aggregate=False) else: # If no start and end times are specified, we assume that this is a # timestamp query because we can come up with a reasonable start and end # time for timestamps but not for local_dates, which are basically a filter. # so if we run this on the first of a month, for example, we won't find # anything, which seems bogus and not what people would expect assert time_type == "timestamp", "time_type = %s, expected timestamp" % time_type freq_metrics = enam.summarize_by_timestamp(user_id, 0, time.time(), spec["freq"], metric_list, include_aggregate=False) assert (freq_metrics is not None) assert ('user_metrics' in freq_metrics) curr_user_metrics = freq_metrics['user_metrics'] checks = spec['checks'] check_results = np.zeros(len(checks)) for i, check in enumerate(checks): curr_metric_result = curr_user_metrics[i] # curr_freq_result is a list of ModeStatTimeSummary objects, one for each # grouped time interval in the range # e.g. for daily, 2017-01-19, 2017-01-20, 2017-01-21, 2017-01-22, 2017-01-23, .... for msts in curr_metric_result: # We defined our check as being true if it is true for _any_ grouped time # period in the range. So as long as we find a match for that check, we are # good! if matches_check(check, msts): check_results[i] = True logging.info("For user_id %s, check result array = %s, all? %s" % (user_id, check_results, np.all(check_results))) return np.all(check_results)
def testCountNoEntries(self): # Ensure that we don't crash if we don't find any entries # Should return empty array instead # Unlike in https://amplab.cs.berkeley.edu/jenkins/job/e-mission-server-prb/591/ met_result_ld = metrics.summarize_by_local_date(self.testUUID, ecwl.LocalDate({'year': 2000}), ecwl.LocalDate({'year': 2001}), 'MONTHLY', ['count'], True) self.assertEqual(list(met_result_ld.keys()), ['aggregate_metrics', 'user_metrics']) self.assertEqual(met_result_ld['aggregate_metrics'][0], []) self.assertEqual(met_result_ld['user_metrics'][0], []) met_result_ts = metrics.summarize_by_timestamp(self.testUUID, arrow.get(2000,1,1).timestamp, arrow.get(2001,1,1).timestamp, 'm', ['count'], True) self.assertEqual(list(met_result_ts.keys()), ['aggregate_metrics', 'user_metrics']) self.assertEqual(met_result_ts['aggregate_metrics'][0], []) self.assertEqual(met_result_ts['user_metrics'][0], [])
def testCountNoEntries(self): # Ensure that we don't crash if we don't find any entries # Should return empty array instead # Unlike in https://amplab.cs.berkeley.edu/jenkins/job/e-mission-server-prb/591/ met_result_ld = metrics.summarize_by_local_date(self.testUUID, ecwl.LocalDate({'year': 2000}), ecwl.LocalDate({'year': 2001}), 'MONTHLY', ['count'], True) self.assertEqual(met_result_ld.keys(), ['aggregate_metrics', 'user_metrics']) self.assertEqual(met_result_ld['aggregate_metrics'][0], []) self.assertEqual(met_result_ld['user_metrics'][0], []) met_result_ts = metrics.summarize_by_timestamp(self.testUUID, arrow.get(2000,1,1).timestamp, arrow.get(2001,1,1).timestamp, 'm', ['count'], True) self.assertEqual(met_result_ts.keys(), ['aggregate_metrics', 'user_metrics']) self.assertEqual(met_result_ts['aggregate_metrics'][0], []) self.assertEqual(met_result_ts['user_metrics'][0], [])
def is_matched_user(user_id, spec): metric_list = get_metric_list(spec["checks"]) time_type = spec['time_type'] if 'from_local_date' in spec and 'to_local_date' in spec: freq_metrics = enam.summarize_by_local_date(user_id, spec["from_local_date"], spec["to_local_date"], spec["freq"], metric_list, include_aggregate=False) elif 'start_time' in spec and 'end_time' in spec: freq_metrics = enam.summarize_by_timestamp(user_id, spec["start_time"], spec["end_time"], spec["freq"], metric_list, include_aggregate=False) else: # If no start and end times are specified, we assume that this is a # timestamp query because we can come up with a reasonable start and end # time for timestamps but not for local_dates, which are basically a filter. # so if we run this on the first of a month, for example, we won't find # anything, which seems bogus and not what people would expect assert time_type == "timestamp", "time_type = %s, expected timestamp" % time_type freq_metrics = enam.summarize_by_timestamp(user_id, 0, time.time(), spec["freq"], metric_list, include_aggregate=False) assert(freq_metrics is not None) assert('user_metrics' in freq_metrics) curr_user_metrics = freq_metrics['user_metrics'] checks = spec['checks'] check_results = np.zeros(len(checks)) for i, check in enumerate(checks): curr_metric_result = curr_user_metrics[i] # curr_freq_result is a list of ModeStatTimeSummary objects, one for each # grouped time interval in the range # e.g. for daily, 2017-01-19, 2017-01-20, 2017-01-21, 2017-01-22, 2017-01-23, .... for msts in curr_metric_result: # We defined our check as being true if it is true for _any_ grouped time # period in the range. So as long as we find a match for that check, we are # good! if matches_check(check, msts): check_results[i] = True logging.info("For user_id %s, check result array = %s, all? %s" % (user_id, check_results, np.all(check_results))) return np.all(check_results)
def testCountLocalDateMetrics(self): met_result = metrics.summarize_by_local_date( self.testUUID, ecwl.LocalDate({ 'year': 2015, 'month': 8 }), ecwl.LocalDate({ 'year': 2015, 'month': 9 }), 'MONTHLY', 'count') self.assertEqual(met_result.keys(), ['aggregate_metrics', 'user_metrics']) user_met_result = met_result['user_metrics'] agg_met_result = met_result['aggregate_metrics'] logging.debug(met_result) # local timezone means that we only have one entry self.assertEqual(len(user_met_result), 1) self.assertEqual(user_met_result[0].nUsers, 1) self.assertEqual(user_met_result[0].ON_FOOT, 6) self.assertEqual(user_met_result[0].BICYCLING, 4) self.assertEqual(user_met_result[0].IN_VEHICLE, 5) # We are not going to make assertions about the aggregate values since # they are affected by other entries in the database but we expect them # to be at least as much as the user values self.assertEqual(len(agg_met_result), 1) self.assertEqual(agg_met_result[0].nUsers, 2) self.assertGreaterEqual(agg_met_result[0].BICYCLING, user_met_result[0].BICYCLING + 1) # 21s has one bike trip self.assertGreaterEqual(agg_met_result[0].ON_FOOT, user_met_result[0].ON_FOOT + 3) # 21s has one bike trip self.assertGreaterEqual(agg_met_result[0].IN_VEHICLE, user_met_result[0].IN_VEHICLE + 3) # 21s has one bike trip