Exemple #1
0
def is_matched_user(user_id, spec):
    metric_list = get_metric_list(spec["checks"])
    time_type = spec['time_type']
    if 'from_local_date' in spec and 'to_local_date' in spec:
        freq_metrics = enam.summarize_by_local_date(user_id,
                                                    spec["from_local_date"],
                                                    spec["to_local_date"],
                                                    spec["freq"],
                                                    metric_list,
                                                    include_aggregate=False)
    elif 'start_time' in spec and 'end_time' in spec:
        freq_metrics = enam.summarize_by_timestamp(user_id,
                                                   spec["start_time"],
                                                   spec["end_time"],
                                                   spec["freq"],
                                                   metric_list,
                                                   include_aggregate=False)
    else:
        # If no start and end times are specified, we assume that this is a
        # timestamp query because we can come up with a reasonable start and end
        # time for timestamps but not for local_dates, which are basically a filter.
        # so if we run this on the first of a month, for example, we won't find
        # anything, which seems bogus and not what people would expect
        assert time_type == "timestamp", "time_type = %s, expected timestamp" % time_type
        freq_metrics = enam.summarize_by_timestamp(user_id,
                                                   0,
                                                   time.time(),
                                                   spec["freq"],
                                                   metric_list,
                                                   include_aggregate=False)

    assert (freq_metrics is not None)
    assert ('user_metrics' in freq_metrics)
    curr_user_metrics = freq_metrics['user_metrics']
    checks = spec['checks']
    check_results = np.zeros(len(checks))
    for i, check in enumerate(checks):
        curr_metric_result = curr_user_metrics[i]
        # curr_freq_result is a list of ModeStatTimeSummary objects, one for each
        # grouped time interval in the range
        # e.g. for daily, 2017-01-19, 2017-01-20, 2017-01-21, 2017-01-22, 2017-01-23, ....

        for msts in curr_metric_result:
            # We defined our check as being true if it is true for _any_ grouped time
            # period in the range. So as long as we find a match for that check, we are
            # good!
            if matches_check(check, msts):
                check_results[i] = True

    logging.info("For user_id %s, check result array = %s, all? %s" %
                 (user_id, check_results, np.all(check_results)))
    return np.all(check_results)
    def testCountTimestampMetrics(self):
        met_result = metrics.summarize_by_timestamp(self.testUUID,
                                                    self.aug_start_ts, self.aug_end_ts,
                                       'd', ['count'], True)
        logging.debug(met_result)

        self.assertEqual(list(met_result.keys()), ['aggregate_metrics', 'user_metrics'])
        user_met_result = met_result['user_metrics'][0]
        agg_met_result = met_result['aggregate_metrics'][0]

        self.assertEqual(len(user_met_result), 2)
        self.assertEqual([m.nUsers for m in user_met_result], [1,1])
        self.assertEqual(user_met_result[0].local_dt.day, 27)
        self.assertEqual(user_met_result[1].local_dt.day, 28)
        self.assertEqual(user_met_result[0].ON_FOOT, 4)
        self.assertEqual(user_met_result[0].BICYCLING, 2)
        # Changed from 3 to 4 - investigation at
        # https://github.com/e-mission/e-mission-server/issues/288#issuecomment-242531798
        self.assertEqual(user_met_result[0].IN_VEHICLE, 4)
        # We are not going to make absolute value assertions about
        # the aggregate values since they are affected by other
        # entries in the database. However, because we have at least
        # data for two days in the database, the aggregate data
        # must be at least that much larger than the original data.
        self.assertEqual(len(agg_met_result), 8)
        # no overlap between users at the daily level
        # bunch of intermediate entries with no users since this binning works
        # by range
        self.assertEqual([m.nUsers for m in agg_met_result], [1,1,0,0,0,0,1,1])
        # If there are no users, there are no values for any of the fields
        # since these are never negative, it implies that their sum is zero
        self.assertTrue('ON_FOOT' not in agg_met_result[2] and
                         'BICYCLING' not in agg_met_result[2] and
                         'IN_VEHICLE' not in agg_met_result[2])
    def testCountTimestampMetrics(self):
        met_result = metrics.summarize_by_timestamp(self.testUUID,
                                                    self.aug_start_ts,
                                                    self.aug_end_ts, 'd',
                                                    'count')
        logging.debug(met_result)

        self.assertEqual(met_result.keys(),
                         ['aggregate_metrics', 'user_metrics'])
        user_met_result = met_result['user_metrics']
        agg_met_result = met_result['aggregate_metrics']

        self.assertEqual(len(user_met_result), 2)
        self.assertEqual([m.nUsers for m in user_met_result], [1, 1])
        self.assertEqual(user_met_result[0].local_dt.day, 27)
        self.assertEqual(user_met_result[1].local_dt.day, 28)
        self.assertEqual(user_met_result[0].ON_FOOT, 4)
        self.assertEqual(user_met_result[0].BICYCLING, 2)
        self.assertEqual(user_met_result[0].IN_VEHICLE, 3)
        # We are not going to make absolute value assertions about
        # the aggregate values since they are affected by other
        # entries in the database. However, because we have at least
        # data for two days in the database, the aggregate data
        # must be at least that much larger than the original data.
        self.assertEqual(len(agg_met_result), 8)
        # no overlap between users at the daily level
        # bunch of intermediate entries with no users since this binning works
        # by range
        self.assertEqual([m.nUsers for m in agg_met_result],
                         [1, 1, 0, 0, 0, 0, 1, 1])
        # If there are no users, there are no values for any of the fields
        # since these are never negative, it implies that their sum is zero
        self.assertTrue('ON_FOOT' not in agg_met_result[2]
                        and 'BICYCLING' not in agg_met_result[2]
                        and 'IN_VEHICLE' not in agg_met_result[2])
def is_matched_user(user_id, spec):
  metric_list = get_metric_list(spec["checks"])
  time_type = spec['time_type']
  if 'from_local_date' in spec and 'to_local_date' in spec:
    freq_metrics = enam.summarize_by_local_date(user_id,
        spec["from_local_date"], spec["to_local_date"],
        spec["freq"], metric_list, include_aggregate=False)
  elif 'start_time' in spec and 'end_time' in spec:
    freq_metrics = enam.summarize_by_timestamp(user_id,
        spec["start_time"], spec["end_time"],
        spec["freq"], metric_list, include_aggregate=False)
  else:
    # If no start and end times are specified, we assume that this is a
    # timestamp query because we can come up with a reasonable start and end
    # time for timestamps but not for local_dates, which are basically a filter.
    # so if we run this on the first of a month, for example, we won't find
    # anything, which seems bogus and not what people would expect
    assert time_type == "timestamp", "time_type = %s, expected timestamp" % time_type
    freq_metrics = enam.summarize_by_timestamp(user_id,
        0, time.time(), spec["freq"], metric_list, include_aggregate=False)

  assert(freq_metrics is not None)
  assert('user_metrics' in freq_metrics)
  curr_user_metrics = freq_metrics['user_metrics']
  checks = spec['checks']
  check_results = np.zeros(len(checks))
  for i, check in enumerate(checks):
    curr_metric_result = curr_user_metrics[i]
    # curr_freq_result is a list of ModeStatTimeSummary objects, one for each
    # grouped time interval in the range
    # e.g. for daily, 2017-01-19, 2017-01-20, 2017-01-21, 2017-01-22, 2017-01-23, ....
    
    for msts in curr_metric_result:
    # We defined our check as being true if it is true for _any_ grouped time
    # period in the range. So as long as we find a match for that check, we are
    # good!
      if matches_check(check, msts):
        check_results[i] = True

  logging.info("For user_id %s, check result array = %s, all? %s" % (user_id, check_results, np.all(check_results)))
  return np.all(check_results)
    def testCountNoEntries(self):
        # Ensure that we don't crash if we don't find any entries
        # Should return empty array instead
        # Unlike in https://amplab.cs.berkeley.edu/jenkins/job/e-mission-server-prb/591/
        met_result_ld = metrics.summarize_by_local_date(self.testUUID,
                                                     ecwl.LocalDate({'year': 2000}),
                                                     ecwl.LocalDate({'year': 2001}),
                                                     'MONTHLY', ['count'], True)
        self.assertEqual(list(met_result_ld.keys()), ['aggregate_metrics', 'user_metrics'])
        self.assertEqual(met_result_ld['aggregate_metrics'][0], [])
        self.assertEqual(met_result_ld['user_metrics'][0], [])

        met_result_ts = metrics.summarize_by_timestamp(self.testUUID,
                                                       arrow.get(2000,1,1).timestamp,
                                                       arrow.get(2001,1,1).timestamp,
                                                        'm', ['count'], True)
        self.assertEqual(list(met_result_ts.keys()), ['aggregate_metrics', 'user_metrics'])
        self.assertEqual(met_result_ts['aggregate_metrics'][0], [])
        self.assertEqual(met_result_ts['user_metrics'][0], [])
Exemple #6
0
    def testCountNoEntries(self):
        # Ensure that we don't crash if we don't find any entries
        # Should return empty array instead
        # Unlike in https://amplab.cs.berkeley.edu/jenkins/job/e-mission-server-prb/591/
        met_result_ld = metrics.summarize_by_local_date(self.testUUID,
                                                     ecwl.LocalDate({'year': 2000}),
                                                     ecwl.LocalDate({'year': 2001}),
                                                     'MONTHLY', ['count'], True)
        self.assertEqual(met_result_ld.keys(), ['aggregate_metrics', 'user_metrics'])
        self.assertEqual(met_result_ld['aggregate_metrics'][0], [])
        self.assertEqual(met_result_ld['user_metrics'][0], [])

        met_result_ts = metrics.summarize_by_timestamp(self.testUUID,
                                                       arrow.get(2000,1,1).timestamp,
                                                       arrow.get(2001,1,1).timestamp,
                                                        'm', ['count'], True)
        self.assertEqual(met_result_ts.keys(), ['aggregate_metrics', 'user_metrics'])
        self.assertEqual(met_result_ts['aggregate_metrics'][0], [])
        self.assertEqual(met_result_ts['user_metrics'][0], [])
Exemple #7
0
    def testCountTimestampMetrics(self):
        met_result = metrics.summarize_by_timestamp(self.testUUID,
                                                    self.aug_start_ts,
                                                    self.aug_end_ts, 'd',
                                                    ['count'], True)

        import json
        import bson.json_util as bju

        logging.debug(json.dumps(met_result, default=bju.default))

        self.assertEqual(list(met_result.keys()),
                         ['aggregate_metrics', 'user_metrics'])
        user_met_result = met_result['user_metrics'][0]
        agg_met_result = met_result['aggregate_metrics'][0]

        self.assertEqual(len(user_met_result), 2)
        self.assertEqual([m.nUsers for m in user_met_result], [1, 1])
        self.assertEqual(user_met_result[0].local_dt.day, 27)
        self.assertEqual(user_met_result[1].local_dt.day, 28)
        self.assertEqual(user_met_result[0].WALKING, 7)
        self.assertNotIn("BICYCLING", user_met_result[0])
        # Changed from 3 to 4 - investigation at
        # https://github.com/e-mission/e-mission-server/issues/288#issuecomment-242531798
        self.assertEqual(user_met_result[0].BUS, 4)
        # We are not going to make absolute value assertions about
        # the aggregate values since they are affected by other
        # entries in the database. However, because we have at least
        # data for two days in the database, the aggregate data
        # must be at least that much larger than the original data.
        self.assertEqual(len(agg_met_result), 8)
        # no overlap between users at the daily level
        # bunch of intermediate entries with no users since this binning works
        # by range
        self.assertEqual([m.nUsers for m in agg_met_result],
                         [1, 1, 0, 0, 0, 0, 1, 1])
        # If there are no users, there are no values for any of the fields
        # since these are never negative, it implies that their sum is zero
        self.assertTrue('WALKING' not in agg_met_result[2]
                        and 'BICYCLING' not in agg_met_result[2]
                        and 'IN_VEHICLE' not in agg_met_result[2])