def clearRelatedDb(self):
     edb.get_timeseries_db().remove({"user_id": self.androidUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.androidUUID})
     edb.get_pipeline_state_db().remove({"user_id": self.androidUUID})
     edb.get_timeseries_db().remove({"user_id": self.iosUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.iosUUID})
     edb.get_pipeline_state_db().remove({"user_id": self.iosUUID})
Example #2
0
 def testEmptyCall(self):
     # Check call to the entire filter accuracy with a zero length timeseries
     import emission.core.get_database as edb
     edb.get_timeseries_db().remove({"user_id": self.testUUID})
     # We expect that this should not throw
     eaicf.filter_accuracy(self.testUUID)
     self.assertEqual(len(self.ts.get_data_df("background/location")), 0)
Example #3
0
def move_all_filters_to_data():
    for entry in edb.get_timeseries_db().find():
        if "filter" in entry["metadata"]:
            curr_filter = entry["metadata"]["filter"]
            if is_location_entry(entry):
                entry["data"]["filter"] = curr_filter
                logging.debug("for entry %s, found key %s, moved filter %s into data" % 
                                (entry["_id"], get_curr_key(entry), curr_filter))

            # For all cases, including the location one, we want to delete the filter from metadata
            del entry["metadata"]["filter"]
            edb.get_timeseries_db().save(entry)
            logging.debug("for entry %s, for key %s, deleted filter %s from metadata" % 
                            (entry["_id"], get_curr_key(entry), curr_filter))
        else:
            pass
            # logging.warning("No filter found for entry %s, skipping" % entry)

        if "filter" not in entry["data"] and is_location_entry(entry):
            # This must be an entry from before the time that we started sending
            # entries to the server. At that time, we only sent time entries,
            # so set it to time in this case
            entry["data"]["filter"] = "time"
            logging.debug("No entry found in either data or metadata, for key %s setting to 'time'" % entry["metadata"]["key"])
            edb.get_timeseries_db().save(entry)
 def tearDown(self):
     edb.get_timeseries_db().remove({"user_id": self.testUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.testUUID})
     edb.get_timeseries_db().remove({"user_id": "new_fake"})
     edb.get_analysis_timeseries_db().remove({"user_id": "new_fake"})
     edb.get_common_trip_db().drop()
     edb.get_common_place_db().drop()
 def tearDown(self):
     edb.get_timeseries_db().remove({"user_id": self.androidUUID}) 
     edb.get_timeseries_db().remove({"user_id": self.iosUUID}) 
     edb.get_usercache_db().remove({"user_id": self.androidUUID}) 
     edb.get_usercache_db().remove({"user_id": self.iosUUID}) 
     edb.get_place_db().remove() 
     edb.get_trip_new_db().remove() 
    def clearRelatedDb(self):
        edb.get_timeseries_db().remove()
        edb.get_place_db().remove()
        edb.get_stop_db().remove()

        edb.get_trip_new_db().remove()
        edb.get_section_new_db().remove()
 def clearRelatedDb(self):
     edb.get_timeseries_db().delete_many({"user_id": self.testUUID})
     edb.get_analysis_timeseries_db().delete_many({"user_id": self.testUUID})
     edb.get_pipeline_state_db().delete_many({"user_id": self.testUUID})
     edb.get_timeseries_db().delete_many({"user_id": self.testUUID1})
     edb.get_analysis_timeseries_db().delete_many({"user_id": self.testUUID1})
     edb.get_pipeline_state_db().delete_many({"user_id": self.testUUID1})
 def clearRelatedDb(self):
     edb.get_timeseries_db().remove({"user_id": self.androidUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.androidUUID})
     edb.get_pipeline_state_db().remove({"user_id": self.androidUUID})
     edb.get_timeseries_db().remove({"user_id": self.iosUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.iosUUID})
     edb.get_pipeline_state_db().remove({"user_id": self.iosUUID})
 def tearDown(self):
     edb.get_timeseries_db().remove({"user_id": self.androidUUID}) 
     edb.get_timeseries_db().remove({"user_id": self.iosUUID}) 
     edb.get_usercache_db().remove({"user_id": self.androidUUID}) 
     edb.get_usercache_db().remove({"user_id": self.iosUUID}) 
     edb.get_analysis_timeseries_db().remove({"user_id": self.androidUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.iosUUID})
Example #10
0
def getPublicData():
    ids = request.json['phone_ids']
    all_uuids = map(lambda id: UUID(id), ids)
    uuids = [uuid for uuid in all_uuids if uuid in estag.TEST_PHONE_IDS]

    from_ts = request.query.from_ts
    to_ts = request.query.to_ts

    time_range = estt.TimeQuery("metadata.write_ts", float(from_ts),
                                float(to_ts))
    time_query = time_range.get_query()

    user_queries = map(lambda id: {'user_id': id}, uuids)

    for q in user_queries:
        q.update(time_query)

    num_entries = map(lambda q: edb.get_timeseries_db().find(q).count(),
                      user_queries)
    total_entries = sum(num_entries)
    logging.debug("Total entries requested: %d" % total_entries)

    threshold = 200000
    if total_entries > threshold:
        data_list = None
    else:
        data_list = map(
            lambda q: list(edb.get_timeseries_db().find(q).sort(
                "metadata.write_ts")), user_queries)

    return {'phone_data': data_list}
 def testEmptyCall(self):
     # Check call to the entire filter accuracy with a zero length timeseries
     import emission.core.get_database as edb
     edb.get_timeseries_db().remove({"user_id": self.testUUID})
     # We expect that this should not throw
     eaicf.filter_accuracy(self.testUUID)
     self.assertEqual(len(self.ts.get_data_df("background/location")), 0)
    def clearRelatedDb(self):
        edb.get_timeseries_db().remove({'user_id': self.testUUID})
        edb.get_place_db().remove({'user_id': self.testUUID})
        edb.get_stop_db().remove({'user_id': self.testUUID})

        edb.get_trip_new_db().remove({'user_id': self.testUUID})
        edb.get_section_new_db().remove({'user_id': self.testUUID})
Example #13
0
 def clearRelatedDb(self):
     edb.get_timeseries_db().remove({"user_id": {"$in": self.testUUIDList}})
     edb.get_analysis_timeseries_db().remove(
         {"user_id": {
             "$in": self.testUUIDList
         }})
     edb.get_usercache_db().remove({"user_id": {"$in": self.testUUIDList}})
    def testLocalRangeRolloverQuery(self):
        """
        Search for all entries between 8:18 and 8:20 local time, both inclusive
        """
        start_local_dt = ecwl.LocalDate({
            'year': 2015,
            'month': 8,
            'hour': 8,
            'minute': 18
        })
        end_local_dt = ecwl.LocalDate({
            'year': 2015,
            'month': 8,
            'hour': 9,
            'minute': 8
        })
        final_query = {"user_id": self.testUUID}
        final_query.update(
            esdl.get_range_query("data.local_dt", start_local_dt,
                                 end_local_dt))
        entries = edb.get_timeseries_db().find(final_query).sort(
            'data.ts', pymongo.ASCENDING)
        self.assertEqual(448,
                         edb.get_timeseries_db().count_documents(final_query))

        entries_list = list(entries)

        # Note that since this is a set of filters, as opposed to a range, this
        # returns all entries between 18 and 8 in both hours.
        # so 8:18 is valid, but so is 9:57
        self.assertEqual(ecwe.Entry(entries_list[0]).data.local_dt.hour, 8)
        self.assertEqual(ecwe.Entry(entries_list[0]).data.local_dt.minute, 18)
        self.assertEqual(ecwe.Entry(entries_list[-1]).data.local_dt.hour, 9)
        self.assertEqual(ecwe.Entry(entries_list[-1]).data.local_dt.minute, 57)
Example #15
0
 def tearDown(self):
     edb.get_timeseries_db().remove({"user_id": self.androidUUID})
     edb.get_timeseries_db().remove({"user_id": self.iosUUID})
     edb.get_usercache_db().remove({"user_id": self.androidUUID})
     edb.get_usercache_db().remove({"user_id": self.iosUUID})
     edb.get_place_db().remove()
     edb.get_trip_new_db().remove()
Example #16
0
    def clearRelatedDb(self):
        edb.get_timeseries_db().remove()
        edb.get_place_db().remove()
        edb.get_stop_db().remove()

        edb.get_trip_new_db().remove()
        edb.get_section_new_db().remove()
Example #17
0
 def tearDown(self):
     os.remove(self.analysis_conf_path)
     edb.get_timeseries_db().remove({"user_id": self.androidUUID}) 
     edb.get_timeseries_db().remove({"user_id": self.iosUUID})
     edb.get_pipeline_state_db().remove({"user_id": self.androidUUID})
     edb.get_pipeline_state_db().remove({"user_id": self.iosUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.androidUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.iosUUID}) 
    def loadPointsForTrip(self, trip_id):
        import emission.core.get_database as edb

        entries = json.load(open("emission/tests/data/smoothing_data/%s" % trip_id),
                                 object_hook=bju.object_hook)
        for entry in entries:
            entry["user_id"] = self.testUUID
            edb.get_timeseries_db().save(entry)
    def loadPointsForTrip(self, trip_id):
        import emission.core.get_database as edb

        entries = json.load(open("emission/tests/data/smoothing_data/%s" %
                                 trip_id),
                            object_hook=bju.object_hook)
        for entry in entries:
            entry["user_id"] = self.testUUID
            edb.get_timeseries_db().save(entry)
Example #20
0
 def clearRelatedDb(self):
     edb.get_timeseries_db().delete_many({"user_id": self.testUUID})
     edb.get_analysis_timeseries_db().delete_many(
         {"user_id": self.testUUID})
     edb.get_pipeline_state_db().delete_many({"user_id": self.testUUID})
     edb.get_timeseries_db().delete_many({"user_id": self.testUUID1})
     edb.get_analysis_timeseries_db().delete_many(
         {"user_id": self.testUUID1})
     edb.get_pipeline_state_db().delete_many({"user_id": self.testUUID1})
Example #21
0
def convert_collection(collection, key):
    result_cursor = collection.find()
    logging.info("About to convert %s entries" % result_cursor.count())
    for i, wrapper in enumerate(result_cursor):
        entry = convert_wrapper_to_entry(key, wrapper)
        if entry.get_id() != wrapper["_id"]:
            logging.warn("entry.id = %s, wrapper.id = %s" % (entry.get_id(), wrapper["_id"]))
        if i % 10000 == 0:
            print("converted %s -> %s" % (wrapper, entry))
        edb.get_timeseries_db().insert(entry)
    def testInsertFilters(self):
        edb.get_timeseries_db().remove({"user_id": self.testUUID,
                                        "metadata.key": "background/filtered_location"})
        for entry in edb.get_timeseries_db().find({'user_id': self.testUUID,
            'metadata.filter': 'time',
            "metadata.key": "background/location"}):
            del entry["_id"]
            del entry["metadata"]["filter"]
            entry["metadata"]["key"] = "background/filtered_location"
            edb.get_timeseries_db().insert(entry)

        # At this point, all the filtered_location entries will not have any filters
        self.assertEquals(edb.get_timeseries_db().find({'user_id': self.testUUID,
            'metadata.filter': 'time',
            "metadata.key": "background/filtered_location"}).count(), 0)

        self.assertEquals(edb.get_timeseries_db().find({'user_id': self.testUUID,
            'metadata.filter': 'time',
            "metadata.key": "background/filtered_location"}).count(), 0)

        # Now, move all filters
        estfm.move_all_filters_to_data()

        # The entries should now be set to "time"
        # At this point, all the filtered_location entries will not have any filters
        self.assertEquals(edb.get_timeseries_db().find({'user_id': self.testUUID,
            'data.filter': 'distance',
            "metadata.key": "background/filtered_location"}).count(), 0)

        self.assertEquals(edb.get_timeseries_db().find({'user_id': self.testUUID,
            'data.filter': 'time',
            "metadata.key": "background/filtered_location"}).count(), 738)
    def testOneOverride(self):
        cfg_1 = copy.copy(self.dummy_config)
        cfg_1['metadata']['write_ts'] = 1440700000
        edb.get_timeseries_db().insert(cfg_1)

        tq = estt.TimeQuery("metadata.write_ts", 1440658800, 1440745200)
        eacc.save_all_configs(self.androidUUID, tq)
        saved_entries = list(edb.get_usercache_db().find({'user_id': self.androidUUID, 'metadata.key': 'config/sensor_config'}))
        self.assertEqual(len(saved_entries), 1)
        logging.debug(saved_entries[0])
        self.assertEqual(saved_entries[0]['data']['is_duty_cycling'], cfg_1['data']['is_duty_cycling'])
    def insert(self, entry):
        """
        """
        logging.debug("insert called")
        if "user_id" not in entry:
            entry["user_id"] = self.user_id
        elif entry["user_id"] != self.user_id:
            raise AttributeError("Saving entry for %s in timeseries for %s" % (entry["user_id"], self.user_id))
        else:
            logging.debug("entry was fine, no need to fix it")

        logging.debug("Inserting entry %s into timeseries" % entry)
        edb.get_timeseries_db().insert(entry)
Example #25
0
def setupRealExampleWithEntries(testObj):
    tsdb = edb.get_timeseries_db()
    for entry in testObj.entries:
        entry["user_id"] = testObj.testUUID
        # print "Saving entry with write_ts = %s and ts = %s" % (entry["metadata"]["write_fmt_time"],
        #                                                        entry["data"]["fmt_time"])
        edb.save(tsdb, entry)
        
    logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count())
    logging.debug("First few entries = %s" % 
                    [e["data"]["fmt_time"] if "fmt_time" in e["data"] else e["metadata"]["write_fmt_time"] for e in 
                        list(edb.get_timeseries_db().find({"user_id": testObj.testUUID}).sort("data.write_ts",
                                                                                       pymongo.ASCENDING).limit(10))])
    def testOldOverride(self):
        cfg_1 = copy.copy(self.dummy_config)
        cfg_1['metadata']['write_ts'] = 1440500000
        edb.get_timeseries_db().insert(cfg_1)

        cfg_2 = copy.copy(self.dummy_config)
        cfg_2['metadata']['write_ts'] = 1440610000
        edb.get_timeseries_db().insert(cfg_2)

        tq = estt.TimeQuery("metadata.write_ts", 1440658800, 1440745200)
        eacc.save_all_configs(self.androidUUID, tq)
        saved_entries = list(edb.get_usercache_db().find({'user_id': self.androidUUID, 'metadata.key': 'config/sensor_config'}))
        self.assertEqual(len(saved_entries), 0)
Example #27
0
def setupRealExampleWithEntries(testObj):
    tsdb = edb.get_timeseries_db()
    for entry in testObj.entries:
        entry["user_id"] = testObj.testUUID
        # print "Saving entry with write_ts = %s and ts = %s" % (entry["metadata"]["write_fmt_time"],
        #                                                        entry["data"]["fmt_time"])
        tsdb.save(entry)
        
    logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count())
    logging.debug("First few entries = %s" % 
                    [e["data"]["fmt_time"] if "fmt_time" in e["data"] else e["metadata"]["write_fmt_time"] for e in 
                        list(edb.get_timeseries_db().find({"user_id": testObj.testUUID}).sort("data.write_ts",
                                                                                       pymongo.ASCENDING).limit(10))])
    def setUp(self):
        # We need to access the database directly sometimes in order to
        # forcibly insert entries for the tests to pass. But we put the import
        # in here to reduce the temptation to use the database directly elsewhere.
        import emission.core.get_database as edb
        import uuid

        self.testUUID = uuid.uuid4()
        self.entries = json.load(open("emission/tests/data/smoothing_data/tablet_2015-11-03"),
                                 object_hook=bju.object_hook)
        for entry in self.entries:
            entry["user_id"] = self.testUUID
            edb.get_timeseries_db().save(entry)
        self.ts = esta.TimeSeries.get_time_series(self.testUUID)
 def clearRelatedDb(self):
     edb.get_timeseries_db().delete_many(
         {"user_id": {
             "$in": self.testUUIDList
         }})
     edb.get_analysis_timeseries_db().delete_many(
         {"user_id": {
             "$in": self.testUUIDList
         }})
     edb.get_usercache_db().delete_many(
         {"user_id": {
             "$in": self.testUUIDList
         }})
     edb.get_uuid_db().delete_many({"user_id": {"$in": self.testUUIDList}})
Example #30
0
    def setUp(self):
        # We need to access the database directly sometimes in order to
        # forcibly insert entries for the tests to pass. But we put the import
        # in here to reduce the temptation to use the database directly elsewhere.
        import emission.core.get_database as edb
        import uuid

        self.testUUID = uuid.uuid4()
        self.entries = json.load(
            open("emission/tests/data/smoothing_data/tablet_2015-11-03"),
            object_hook=bju.object_hook)
        for entry in self.entries:
            entry["user_id"] = self.testUUID
            edb.get_timeseries_db().save(entry)
        self.ts = esta.TimeSeries.get_time_series(self.testUUID)
Example #31
0
def setupRealExample(testObj, dump_file):
    logging.info("Before loading, timeseries db size = %s" % edb.get_timeseries_db().count())
    testObj.entries = json.load(open(dump_file), object_hook = bju.object_hook)
    testObj.testUUID = uuid.uuid4()
    for entry in testObj.entries:
        entry["user_id"] = testObj.testUUID
        # print "Saving entry with write_ts = %s and ts = %s" % (entry["metadata"]["write_fmt_time"],
        #                                                        entry["data"]["fmt_time"])
        edb.get_timeseries_db().save(entry)
        
    logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count())
    logging.debug("First few entries = %s" % 
                    [e["data"]["fmt_time"] for e in 
                        list(edb.get_timeseries_db().find({"user_id": testObj.testUUID}).sort("data.write_ts",
                                                                                       pymongo.ASCENDING).limit(10))])
Example #32
0
def post_check(unique_user_list, all_rerun_list):
    import emission.core.get_database as edb
    import numpy as np

    logging.info(
        "For %s users, loaded %s raw entries, %s processed entries and %s pipeline states"
        % (len(unique_user_list), edb.get_timeseries_db().count_documents(
            {"user_id": {
                "$in": list(unique_user_list)
            }}), edb.get_analysis_timeseries_db().count_documents(
                {"user_id": {
                    "$in": list(unique_user_list)
                }}), edb.get_pipeline_state_db().count_documents({
                    "user_id": {
                        "$in": list(unique_user_list)
                    }
                })))

    all_rerun_arr = np.array(all_rerun_list)

    # want to check if no entry needs a rerun? In this case we are done
    # no entry needs a rerun = all entries are false, not(all entries) are true
    if np.all(np.logical_not(all_rerun_list)):
        logging.info(
            "all entries in the timeline contain analysis results, no need to run the intake pipeline"
        )
    # if all entries need to be re-run, we must have had raw data throughout
    elif np.all(all_rerun_list):
        logging.info(
            "all entries in the timeline contain only raw data, need to run the intake pipeline"
        )
    else:
        logging.info(
            "timeline contains a mixture of analysis results and raw data - complain to shankari!"
        )
def fix_trips_or_sections(collection):
    tsdb = edb.get_timeseries_db()
    for entry in collection.find():
        start_loc_entry = tsdb.find_one({'user_id': entry['user_id'],
            'metadata.key': 'background/location', 'data.ts': entry['start_ts']})
        end_loc_entry = tsdb.find_one({'user_id': entry['user_id'],
            'metadata.key': 'background/location', 'data.ts': entry['end_ts']})

        if start_loc_entry is not None:
            start_tz = start_loc_entry['metadata']['time_zone']
        else:
            logging.warn("No start_loc_entry found for trip %s, returning default" % entry)
            start_tz = "America/Los_Angeles"

        if end_loc_entry is not None:
            end_tz = end_loc_entry['metadata']['time_zone']
        else:
            logging.warn("No end_loc_entry found for trip %s, returning default" % entry)
            end_tz = "America/Los_Angeles"

        logging.debug("Found entries with metadata = %s, %s" % (start_tz, end_tz))

        entry['start_local_dt'] = get_local_date(entry['start_fmt_time'], start_tz)
        entry['end_local_dt'] = get_local_date(entry['end_fmt_time'], end_tz)

        collection.save(entry)
def fix_stops_or_places(collection):
    tsdb = edb.get_timeseries_db()
    for entry in collection.find():
        if 'enter_ts' in entry:
            enter_loc_entry = tsdb.find_one({'user_id': entry['user_id'],
                'metadata.key': 'background/location', 'data.ts': entry['enter_ts']})
            if enter_loc_entry is not None:
                enter_tz = enter_loc_entry['metadata']['time_zone']
            else:
                enter_tz = "America/Los_Angeles"
            logging.debug("entry metadata timezone = %s" % enter_tz)
            entry['enter_local_dt'] = get_local_date(entry['enter_fmt_time'],
                enter_tz)
        else:
            logging.warning("No entry timestamp found, skipping")
        
        if 'exit_ts' in entry:
            exit_loc_entry = tsdb.find_one({'user_id': entry['user_id'],
                'metadata.key': 'background/location', 'data.ts': entry['exit_ts']})
            if exit_loc_entry is not None:
                exit_tz = exit_loc_entry['metadata']['time_zone']
            else:
                exit_tz = "America/Los_Angeles"
            logging.debug("exit metadata timezone = %s" % exit_tz)
            entry['exit_local_dt'] = get_local_date(entry['exit_fmt_time'], exit_tz)
        else:
            logging.warning("No exit timestamp found, skipping")

        collection.save(entry)
Example #35
0
    def setUp(self):
        etc.setupRealExample(
            self, "emission/tests/data/real_examples/shankari_2015-aug-27")
        # eaicf.filter_accuracy(self.testUUID)
        etc.runIntakePipeline(self.testUUID)
        # estfm.move_all_filters_to_data()
        logging.info("After loading, timeseries db size = %s" %
                     edb.get_timeseries_db().count())
        self.day_start_ts = 1440658800
        self.day_end_ts = 1440745200
        self.day_start_dt = esdldq.get_local_date(self.day_start_ts,
                                                  "America/Los_Angeles")
        self.day_end_dt = esdldq.get_local_date(self.day_end_ts,
                                                "America/Los_Angeles")

        # If we don't delete the time components, we end up with the upper and
        # lower bounds = 0, which basically matches nothing.
        del self.day_start_dt['hour']
        del self.day_end_dt['hour']

        del self.day_start_dt['minute']
        del self.day_end_dt['minute']

        del self.day_start_dt['second']
        del self.day_end_dt['second']
Example #36
0
def getPublicData():
  ids = request.json['phone_ids']
  all_uuids = map(lambda id: UUID(id), ids)
  uuids = [uuid for uuid in all_uuids if uuid in estag.TEST_PHONE_IDS]

  from_ts = request.query.from_ts
  to_ts = request.query.to_ts

  time_range = estt.TimeQuery("metadata.write_ts", float(from_ts), float(to_ts))
  time_query = time_range.get_query()

  user_queries = map(lambda id: {'user_id': id}, uuids)

  for q in user_queries:
    q.update(time_query)

  num_entries_ts = map(lambda q: edb.get_timeseries_db().find(q).count(), user_queries)
  num_entries_uc = map(lambda q: edb.get_usercache_db().find(q).count(), user_queries)
  total_entries = sum(num_entries_ts + num_entries_uc)
  logging.debug("Total entries requested: %d" % total_entries)

  threshold = 200000
  if total_entries > threshold:
    data_list = None
  else:
    data_list = map(lambda u: esdc.find_entries(u, None, time_range), all_uuids)

  return {'phone_data': data_list}
def fix_stops_or_places(collection):
    tsdb = edb.get_timeseries_db()
    for entry in collection.find():
        if 'enter_ts' in entry:
            enter_loc_entry = tsdb.find_one({
                'user_id': entry['user_id'],
                'metadata.key': 'background/location',
                'data.ts': entry['enter_ts']
            })
        else:
            logging.info("No entry timestamp found, skipping")

        if 'exit_ts' in entry:
            exit_loc_entry = tsdb.find_one({
                'user_id': entry['user_id'],
                'metadata.key': 'background/location',
                'data.ts': entry['exit_ts']
            })
        else:
            logging.info("No exit timestamp found, skipping")

        logging.debug("Found entries with metadata = %s, %s" %
                      (enter_loc_entry['metadata']['time_zone'],
                       exit_loc_entry['metadata']['time_zone']))

        if 'enter_local_dt' in entry:
            entry['enter_local_dt'] = get_local_date(
                entry['enter_fmt_time'],
                enter_loc_entry['metadata']['time_zone'])
        if 'exit_local_dt' in entry:
            entry['exit_local_dt'] = get_local_date(
                entry['exit_fmt_time'],
                exit_loc_entry['metadata']['time_zone'])

        collection.save(entry)
Example #38
0
def export_timeline(user_id_str, day_str, file_name):
    logging.info(
        "Extracting timeline for user %s day %s and saving to file %s" %
        (user_id_str, day_str, file))

    # day_dt = pydt.datetime.strptime(day_str, "%Y-%m-%d").date()
    day_dt = pydt.datetime.strptime(day_str, "%Y-%m-%d")
    logging.debug("day_dt is %s" % day_dt)
    day_end_dt = day_dt + pydt.timedelta(days=1)
    # TODO: Convert to call to get_timeseries once we get that working
    # Or should we even do that?
    entry_list = list(edb.get_timeseries_db().find({
        'user_id':
        uuid.UUID(user_id_str),
        'metadata.write_local_dt': {
            '$gt': day_dt,
            "$lt": day_end_dt
        }
    }))
    logging.info("Found %d entries" % len(entry_list))
    json.dump(entry_list,
              open(file_name, "w"),
              default=bju.default,
              allow_nan=False,
              indent=4)
Example #39
0
def setupRealExample(testObj, dump_file):
    logging.info("Before loading, timeseries db size = %s" % edb.get_timeseries_db().count())
    with open(dump_file) as dfp:
        testObj.entries = json.load(dfp, object_hook = bju.object_hook)
        testObj.testUUID = uuid.uuid4()
        print("Setting up real example for %s" % testObj.testUUID)
        setupRealExampleWithEntries(testObj)
def fix_stops_or_places(collection):
    tsdb = edb.get_timeseries_db()
    for entry in collection.find():
        if 'enter_ts' in entry:
            enter_loc_entry = tsdb.find_one({'user_id': entry['user_id'],
                'metadata.key': 'background/location', 'data.ts': entry['enter_ts']})
        else:
            logging.info("No entry timestamp found, skipping")
        
        if 'exit_ts' in entry:
            exit_loc_entry = tsdb.find_one({'user_id': entry['user_id'],
                'metadata.key': 'background/location', 'data.ts': entry['exit_ts']})
        else:
            logging.info("No exit timestamp found, skipping")

        logging.debug("Found entries with metadata = %s, %s" %
            (enter_loc_entry['metadata']['time_zone'],
             exit_loc_entry['metadata']['time_zone']))

        if 'enter_local_dt' in entry:
            entry['enter_local_dt'] = get_local_date(entry['enter_fmt_time'],
                enter_loc_entry['metadata']['time_zone'])
        if 'exit_local_dt' in entry:
            entry['exit_local_dt'] = get_local_date(entry['exit_fmt_time'],
                exit_loc_entry['metadata']['time_zone'])

        collection.save(entry)
def export_timeline(user_id_str, day_str, file_name):
    logging.info(
        "Extracting timeline for user %s day %s and saving to file %s" %
        (user_id_str, day_str, file_name))

    # day_dt = pydt.datetime.strptime(day_str, "%Y-%m-%d").date()
    day_dt = pydt.datetime.strptime(day_str, "%Y-%m-%d")
    logging.debug("day_dt is %s" % day_dt)
    # TODO: Convert to call to get_timeseries once we get that working
    # Or should we even do that?
    user_query = {'user_id': uuid.UUID(user_id_str)}
    date_query = {
        'metadata.write_local_dt.year': day_dt.year,
        'metadata.write_local_dt.month': day_dt.month,
        'metadata.write_local_dt.day': day_dt.day
    }
    final_query = user_query
    final_query.update(date_query)
    entry_list = list(edb.get_timeseries_db().find(final_query))
    logging.info("Found %d entries" % len(entry_list))
    json.dump(entry_list,
              open(file_name, "w"),
              default=bju.default,
              allow_nan=False,
              indent=4)
Example #42
0
def purge_entries_for_user(curr_uuid, is_purge_state, db_array=None):
    logging.info("For uuid = %s, deleting entries from the timeseries" % curr_uuid)
    if db_array is not None:
        [ts_db, ats_db, udb, psdb] = db_array
        logging.debug("db_array passed in with databases %s" % db_array)
    else:
        import emission.core.get_database as edb

        ts_db = edb.get_timeseries_db()
        ats_db = edb.get_analysis_timeseries_db()
        udb = edb.get_uuid_db()
        psdb = edb.get_pipeline_state_db()
        logging.debug("db_array not passed in, looking up databases")

    timeseries_del_result = ts_db.remove({"user_id": curr_uuid})
    logging.info("result = %s" % timeseries_del_result)

    logging.info("For uuid = %s, deleting entries from the analysis_timeseries" % curr_uuid)
    analysis_timeseries_del_result = ats_db.remove({"user_id": curr_uuid})
    logging.info("result = %s" % analysis_timeseries_del_result)

    logging.info("For uuid %s, deleting entries from the user_db" % curr_uuid)
    user_db_del_result = udb.remove({"uuid": curr_uuid})
    logging.info("result = %s" % user_db_del_result)

    if is_purge_state:
        logging.info("For uuid %s, deleting entries from the pipeline_state_db" % curr_uuid)
        psdb_del_result = psdb.remove({"user_id": curr_uuid})
        logging.info("result = %s" % psdb_del_result)
 def __init__(self, user_id):
     super(BuiltinTimeSeries, self).__init__(user_id)
     self.key_query = lambda(key): {"metadata.key": key}
     self.type_query = lambda(entry_type): {"metadata.type": entry_type}
     self.user_query = {"user_id": self.user_id} # UUID is mandatory for this version
     self.timeseries_db = edb.get_timeseries_db()
     self.analysis_timeseries_db = edb.get_analysis_timeseries_db()
     self.ts_map = {
             "background/location": self.timeseries_db,
             "background/filtered_location": self.timeseries_db,
             "background/motion_activity": self.timeseries_db,
             "background/battery": self.timeseries_db,
             "statemachine/transition": self.timeseries_db,
             "config/sensor_config": self.timeseries_db,
             "segmentation/raw_trip": self.analysis_timeseries_db,
             "segmentation/raw_place": self.analysis_timeseries_db,
             "segmentation/raw_section": self.analysis_timeseries_db,
             "segmentation/raw_stop": self.analysis_timeseries_db,
             "analysis/smoothing": self.analysis_timeseries_db,
             "analysis/cleaned_trip": self.analysis_timeseries_db,
             "analysis/cleaned_place": self.analysis_timeseries_db,
             "analysis/cleaned_section": self.analysis_timeseries_db,
             "analysis/cleaned_stop": self.analysis_timeseries_db,
             "analysis/recreated_location": self.analysis_timeseries_db,
         }
Example #44
0
def setupRealExample(testObj, dump_file):
    logging.info("Before loading, timeseries db size = %s" %
                 edb.get_timeseries_db().count())
    with open(dump_file) as dfp:
        testObj.entries = json.load(dfp, object_hook=bju.object_hook)
        testObj.testUUID = uuid.uuid4()
        setupRealExampleWithEntries(testObj)
    def testMoveToLongTerm(self):
        # 5 mins of data, every 30 secs = 10 entries per entry type. There are
        # 3 entry types, so 30 entries

        # First all the entries are in the usercache
        self.assertEqual(len(self.uc1.getMessage()), 30)
        self.assertEqual(len(list(self.ts1.find_entries())), 0)

        self.assertEqual(len(self.uc2.getMessage()), 30)
        self.assertEqual(len(list(self.ts2.find_entries())), 0)
        
        self.assertEqual(len(self.ucios.getMessage()), 30)
        self.assertEqual(len(list(self.tsios.find_entries())), 0)


        # Then we move entries for user1 into longterm
        enuah.UserCacheHandler.getUserCacheHandler(self.testUserUUID1).moveToLongTerm()

        # So we end up with all user1 entries in longterm
        self.assertEqual(len(self.uc1.getMessage()), 0)
        self.assertEqual(len(list(self.ts1.find_entries())), 30)
        
        # Then, we move entries for the ios user into longterm
        enuah.UserCacheHandler.getUserCacheHandler(self.testUserUUIDios).moveToLongTerm()
        
        self.assertEqual(len(self.ucios.getMessage()), 0)
        self.assertEqual(len(list(self.tsios.find_entries())), 30)
        
        # 30 entries from android + 30 entries from ios = 60
        self.assertEqual(edb.get_timeseries_db().find().count(), 60)
        self.assertEqual(edb.get_timeseries_error_db().find().count(), 0)

        # But all existing entries still in usercache for the second user
        self.assertEqual(len(self.uc2.getMessage()), 30)
        self.assertEqual(len(list(self.ts2.find_entries())), 0)
 def __init__(self, user_id):
     super(BuiltinTimeSeries, self).__init__(user_id)
     self.key_query = lambda (key): {"metadata.key": key}
     self.type_query = lambda (entry_type): {"metadata.type": entry_type}
     self.user_query = {
         "user_id": self.user_id
     }  # UUID is mandatory for this version
     self.timeseries_db = edb.get_timeseries_db()
     self.analysis_timeseries_db = edb.get_analysis_timeseries_db()
     self.ts_map = {
         "background/location": self.timeseries_db,
         "background/filtered_location": self.timeseries_db,
         "background/motion_activity": self.timeseries_db,
         "background/battery": self.timeseries_db,
         "statemachine/transition": self.timeseries_db,
         "config/sensor_config": self.timeseries_db,
         "segmentation/raw_trip": self.analysis_timeseries_db,
         "segmentation/raw_place": self.analysis_timeseries_db,
         "segmentation/raw_section": self.analysis_timeseries_db,
         "segmentation/raw_stop": self.analysis_timeseries_db,
         "analysis/smoothing": self.analysis_timeseries_db,
         "analysis/cleaned_trip": self.analysis_timeseries_db,
         "analysis/cleaned_place": self.analysis_timeseries_db,
         "analysis/cleaned_section": self.analysis_timeseries_db,
         "analysis/cleaned_stop": self.analysis_timeseries_db,
         "analysis/recreated_location": self.analysis_timeseries_db,
     }
Example #47
0
def request_data(server_url, from_ts, to_ts, phone_ids, debug):
	url = server_url + "/eval/publicData/timeseries?from_ts=" + str(from_ts) + "&to_ts=" + str(to_ts)
	ids = {'phone_ids': phone_ids}
	headers = {'Content-Type': 'application/json'}

	r = requests.get(url, data=json.dumps(ids), headers = headers)

	dic = json.loads(r.text, object_hook = bju.object_hook)
	phone_list = dic['phone_data']

	if phone_list == None:
		print "Requested amount of data exceeds the threshold value."
	else:  
		# Load data to the local server 
		tsdb = edb.get_timeseries_db()

		for index, entry_list in enumerate(phone_list):
			if debug:
				logging.debug("phone" + str(index+1) + " first entry (in Pacific Time):")

				if len(entry_list) == 0:
					logging.debug("...has no data...")
				else:
					logging.debug(str(entry_list[0].get('metadata').get('write_fmt_time')))

			for entry in entry_list:
				tsdb.save(entry)
Example #48
0
 def setUp(self):
     self.clearRelatedDb()
     etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
     eaicf.filter_accuracy(self.testUUID)
     estfm.move_all_filters_to_data()        
     logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count())
     self.day_start_ts = 1440658800
     self.day_end_ts = 1440745200
def fix_key(check_field, new_key):
    print("First entry for "+new_key+" is %s" % list(edb.get_timeseries_db().find(
                                    {"metadata.key": "config/sensor_config",
                                    check_field: {"$exists": True}}).sort(
                                        "metadata/write_ts").limit(1)))
    udb = edb.get_usercache_db()
    tdb = edb.get_timeseries_db()
    for i, entry in enumerate(edb.get_timeseries_db().find(
                                    {"metadata.key": "config/sensor_config",
                                    check_field: {"$exists": True}})):
        entry["metadata"]["key"] = new_key
        if i % 10000 == 0:
            print(udb.insert(entry))
            print(tdb.remove(entry["_id"]))
        else:
            udb.insert(entry)
            tdb.remove(entry["_id"])
 def get_data_df(self, key, time_query = None):
     sort_key = self._get_sort_key(time_query)
     logging.debug("curr_query = %s, sort_key = %s" % (self._get_query([key], time_query), sort_key))
     result_it = edb.get_timeseries_db().find(self._get_query([key], time_query), {"data": True,
             "metadata.write_ts": True}).sort(sort_key, pymongo.ASCENDING)
     logging.debug("Found %s results" % result_it.count())
     # Dataframe doesn't like to work off an iterator - it wants everything in memory
     return pd.DataFrame([BuiltinTimeSeries._to_df_entry(e) for e in list(result_it)])