Exemple #1
0
 def tearDown(self):
     edb.get_timeseries_db().remove({"user_id": self.androidUUID})
     edb.get_timeseries_db().remove({"user_id": self.iosUUID})
     edb.get_usercache_db().remove({"user_id": self.androidUUID})
     edb.get_usercache_db().remove({"user_id": self.iosUUID})
     edb.get_place_db().remove()
     edb.get_trip_new_db().remove()
    def clearRelatedDb(self):
        edb.get_timeseries_db().remove()
        edb.get_place_db().remove()
        edb.get_stop_db().remove()

        edb.get_trip_new_db().remove()
        edb.get_section_new_db().remove()
Exemple #3
0
    def clearRelatedDb(self):
        edb.get_timeseries_db().remove()
        edb.get_place_db().remove()
        edb.get_stop_db().remove()

        edb.get_trip_new_db().remove()
        edb.get_section_new_db().remove()
 def testSavePlace(self):
     new_place = esdp.create_new_place(self.testUserId)
     new_place.enter_ts = 5
     esdp.save_place(new_place)
     self.assertEqual(edb.get_place_db().find({"enter_ts": 5}).count(), 1)
     self.assertEqual(edb.get_place_db().find_one({"enter_ts": 5})["_id"], new_place.get_id())
     self.assertEqual(edb.get_place_db().find_one({"enter_ts": 5})["user_id"], self.testUserId)
    def clearRelatedDb(self):
        edb.get_timeseries_db().remove({'user_id': self.testUUID})
        edb.get_place_db().remove({'user_id': self.testUUID})
        edb.get_stop_db().remove({'user_id': self.testUUID})

        edb.get_trip_new_db().remove({'user_id': self.testUUID})
        edb.get_section_new_db().remove({'user_id': self.testUUID})
 def tearDown(self):
     edb.get_timeseries_db().remove({"user_id": self.androidUUID}) 
     edb.get_timeseries_db().remove({"user_id": self.iosUUID}) 
     edb.get_usercache_db().remove({"user_id": self.androidUUID}) 
     edb.get_usercache_db().remove({"user_id": self.iosUUID}) 
     edb.get_place_db().remove() 
     edb.get_trip_new_db().remove() 
def del_objects(args):
    del_query = {}
    if args.user_id != "all":
        del_query['user_id'] = uuid.UUID(args.user_id)
    
    if args.date is None:
        trip_query = del_query
        place_query = del_query
    else:
        day_dt = pydt.datetime.strptime(args.date, "%Y-%m-%d")
        logging.debug("day_dt is %s" % day_dt)
        day_ts = time.mktime(day_dt.timetuple())
        logging.debug("day_ts is %s" % day_ts)
        trip_query = copy.copy(del_query)
        trip_query.update({"start_ts": {"$gt": day_ts}})
        place_query = copy.copy(del_query)
        place_query.update({"exit_ts": {"$gt": day_ts}})

    print "trip_query = %s" % trip_query
    print "place_query = %s" % place_query

    # Since sections have the same basic structure as trips and stops have the
    # same basic structure as places, we can reuse the queries
    print "Deleting trips for %s after %s" % (args.user_id, args.date)
    print edb.get_trip_new_db().remove(trip_query)
    print "Deleting sections for %s after %s" % (args.user_id, args.date)
    print edb.get_section_new_db().remove(trip_query)
    print "Deleting places for %s after %s" % (args.user_id, args.date)
    print edb.get_place_db().remove(place_query)
    print "Deleting stops for %s after %s" % (args.user_id, args.date)
    print edb.get_stop_db().remove(place_query)
def get_places(user_id, time_query):
    curr_query = _get_ts_query(time_query)
    curr_query.update({"user_id": user_id})
    place_doc_cursor = edb.get_place_db().find(curr_query).sort(time_query.timeType, pymongo.ASCENDING)
    logging.debug("%d places found in database" % place_doc_cursor.count())
    # TODO: Fix "TripIterator" and return it instead of this list
    return [ecwp.Place(doc) for doc in place_doc_cursor]
Exemple #9
0
def set_up_trips(list_of_cluster_data, user_id):
    # Import in here to avoid recursive imports
    # TODO: This should really be moved to a separate class that creates the
    # entire graph at one time
    import emission.storage.decorations.common_place_queries as esdcpq
    clear_existing_trips(user_id)
    for dct in list_of_cluster_data:
        start_loc = gj.Point(dct['start_coords'].coordinate_list())
        end_loc = gj.Point(dct['end_coords'].coordinate_list())
        start_place_id = esdcpq.get_common_place_at_location(
            start_loc).get_id()
        end_place_id = esdcpq.get_common_place_at_location(end_loc).get_id()
        #print 'dct["sections"].trip_id %s is' % dct["sections"][0]
        probabilites = np.zeros((DAYS_IN_WEEK, HOURS_IN_DAY))
        for sec in dct["sections"]:
            probabilites[get_day(sec), get_start_hour(sec)] += 1

        trip = make_new_common_trip()
        trip.user_id = user_id
        trip.start_place = start_place_id
        trip.end_place = end_place_id
        trip.start_loc = start_loc
        trip.end_loc = end_loc
        trip.probabilites = probabilites
        trip.trips = [unc_trip.get_id() for unc_trip in dct["sections"]]
        place_db = edb.get_place_db()

        save_common_trip(trip)
Exemple #10
0
def get_aggregate_places(time_query, box=None):
    curr_query = _get_ts_query(time_query)
    if box:
        curr_query.update({"location": {"$geoWithin": {"$box": box}}})
    place_doc_cursor = edb.get_place_db().find(curr_query).sort(
        time_query.timeType, pymongo.ASCENDING)
    return [ecwp.Place(doc) for doc in place_doc_cursor]
def set_up_trips(list_of_cluster_data, user_id):
    # Import in here to avoid recursive imports
    # TODO: This should really be moved to a separate class that creates the
    # entire graph at one time
    import emission.storage.decorations.common_place_queries as esdcpq
    clear_existing_trips(user_id)
    for dct in list_of_cluster_data:
        start_loc = gj.Point(dct['start_coords'].coordinate_list())
        end_loc = gj.Point(dct['end_coords'].coordinate_list())
        start_place_id = esdcpq.get_common_place_at_location(start_loc).get_id()
        end_place_id = esdcpq.get_common_place_at_location(end_loc).get_id()
        #print 'dct["sections"].trip_id %s is' % dct["sections"][0]
        probabilites = np.zeros((DAYS_IN_WEEK, HOURS_IN_DAY))
        for sec in dct["sections"]:
            probabilites[get_day(sec), get_start_hour(sec)] += 1

        trip = make_new_common_trip()
        trip.user_id = user_id
        trip.start_place = start_place_id
        trip.end_place = end_place_id
        trip.start_loc = start_loc
        trip.end_loc = end_loc
        trip.probabilites = probabilites
        trip.trips = [unc_trip.get_id() for unc_trip in dct["sections"]]
        place_db = edb.get_place_db()
        
        
        save_common_trip(trip)
Exemple #12
0
def get_places(user_id, time_query):
    curr_query = _get_ts_query(time_query)
    curr_query.update({"user_id": user_id})
    place_doc_cursor = edb.get_place_db().find(curr_query).sort(
        time_query.timeType, pymongo.ASCENDING)
    logging.debug("%d places found in database" % place_doc_cursor.count())
    # TODO: Fix "TripIterator" and return it instead of this list
    return [ecwp.Place(doc) for doc in place_doc_cursor]
def get_all_place_objs(common_place):
    trip.trips = [unc_trip.get_id() for unc_trip in dct["sections"]]
    place_db = edb.get_place_db()
    start_places = []
    end_places = []
    for t in trip.trips:
        start = place_db.find_one({"_id" : t.start_place})
        end = place_db.find_one({"_id" : t.end_place})
        start_places.append(start)
        end_places.append(end)
Exemple #14
0
def get_all_place_objs(common_place):
    trip.trips = [unc_trip.get_id() for unc_trip in dct["sections"]]
    place_db = edb.get_place_db()
    start_places = []
    end_places = []
    for t in trip.trips:
        start = place_db.find_one({"_id": t.start_place})
        end = place_db.find_one({"_id": t.end_place})
        start_places.append(start)
        end_places.append(end)
def get_last_place(user_id):
    """
    There are many ways to find the last place.  One would be to find the one
    with the max enter_ts.  But that is not performant because we would need to
    retrieve all the enter_ts and find their max, which is expensive. Instead, we
    use the property that we process data in chunks of trips, so the last place
    would have been created and entered but not exited.
    """
    ret_place_doc = edb.get_place_db().find_one({"user_id": user_id, "exit_ts": {"$exists": False}})
    logging.debug("last place doc = %s" % ret_place_doc)
    if ret_place_doc is None:
        return None
    ret_place = ecwp.Place(ret_place_doc)
    assert "exit_ts" not in ret_place
    assert "exit_fmt_time" not in ret_place
    assert "starting_trip" not in ret_place
    return ret_place
Exemple #16
0
def get_last_place(user_id):
    """
    There are many ways to find the last place.  One would be to find the one
    with the max enter_ts.  But that is not performant because we would need to
    retrieve all the enter_ts and find their max, which is expensive. Instead, we
    use the property that we process data in chunks of trips, so the last place
    would have been created and entered but not exited.
    """
    ret_place_doc = edb.get_place_db().find_one({
        'user_id': user_id,
        'exit_ts': {
            '$exists': False
        }
    })
    logging.debug("last place doc = %s" % ret_place_doc)
    if ret_place_doc is None:
        return None
    ret_place = ecwp.Place(ret_place_doc)
    assert ('exit_ts' not in ret_place)
    assert ('exit_fmt_time' not in ret_place)
    assert ('starting_trip' not in ret_place)
    return ret_place
    logging.info("About to convert %s entries" % result_cursor.count())
    for i, wrapper in enumerate(result_cursor):
        entry = convert_wrapper_to_entry(key, wrapper)
        if entry.get_id() != wrapper["_id"]:
            logging.warn("entry.id = %s, wrapper.id = %s" % (entry.get_id(), wrapper["_id"]))
        if i % 10000 == 0:
            print "converted %s -> %s" % (wrapper, entry)
        edb.get_timeseries_db().insert(entry)
        collection.remove(wrapper)

def move_ts_entries(key):
    tdb = edb.get_timeseries_db()
    atdb = edb.get_analysis_timeseries_db()

    result_cursor = tdb.find({'metadata.key': key})
    logging.info("About to convert %s entries" % result_cursor.count())

    for i, entry_doc in enumerate(result_cursor):
        if i % 10000 == 0:
            print "moved %s from one ts to the other" % (entry_doc)
        atdb.insert(entry_doc)
        tdb.remove(entry_doc)

if __name__ == '__main__':
    # No arguments - muahahahaha. Just going to copy known fields over.
    convert_collection(edb.get_trip_new_db(), "segmentation/raw_trip")
    convert_collection(edb.get_place_db(), "segmentation/raw_place")
    convert_collection(edb.get_section_new_db(), "segmentation/raw_section")
    convert_collection(edb.get_stop_db(), "segmentation/raw_stop")
    move_ts_entries("analysis/smoothing")
Exemple #18
0
def create_new_place(user_id):
    _id = edb.get_place_db().save({'user_id': user_id})
    logging.debug("Created new place %s for user %s" % (_id, user_id))
    return ecwp.Place({"_id": _id, 'user_id': user_id})
def save_place(place):
    edb.get_place_db().save(place)
def create_new_place(user_id):
    _id = edb.get_place_db().save({'user_id': user_id})
    logging.debug("Created new place %s for user %s" % (_id, user_id))
    return ecwp.Place({"_id": _id, 'user_id': user_id})
Exemple #21
0
def save_place(place):
    edb.get_place_db().save(place)
        else:
            logging.warning("No exit timestamp found, skipping")

        collection.save(entry)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "key", help="the key representing the stream that we want to fix")
    parser.add_argument(
        "-f",
        "--filename",
        help=
        "a saved timeline whose local_dt needs to be fixed. If this is specified, key is ignored"
    )

    args = parser.parse_args()
    if args.filename is not None:
        fix_file(args.filename)
    elif args.key == "trips":
        fix_trips_or_sections(edb.get_trip_new_db())
    elif args.key == "sections":
        fix_trips_or_sections(edb.get_section_new_db())
    elif args.key == "places":
        fix_stops_or_places(edb.get_place_db())
    elif args.key == "stops":
        fix_stops_or_places(edb.get_stop_db())
    else:
        fix_timeseries(args.key)
Exemple #23
0
def get_place(place_id):
    return ecwp.Place(edb.get_place_db().find_one({"_id": place_id}))
            else:
                exit_tz = "America/Los_Angeles"
            logging.debug("exit metadata timezone = %s" % exit_tz)
            entry['exit_local_dt'] = get_local_date(entry['exit_fmt_time'], exit_tz)
        else:
            logging.warning("No exit timestamp found, skipping")

        collection.save(entry)

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("key",
        help="the key representing the stream that we want to fix")
    parser.add_argument("-f", "--filename",
        help="a saved timeline whose local_dt needs to be fixed. If this is specified, key is ignored")

    args = parser.parse_args()
    if args.filename is not None:
        fix_file(args.filename)
    elif args.key == "trips":
        fix_trips_or_sections(edb.get_trip_new_db())
    elif args.key == "sections":
        fix_trips_or_sections(edb.get_section_new_db())
    elif args.key == "places":
        fix_stops_or_places(edb.get_place_db())
    elif args.key == "stops":
        fix_stops_or_places(edb.get_stop_db())
    else:
        fix_timeseries(args.key)

def get_place(place_id):
    return ecwp.Place(edb.get_place_db().find_one({"_id": place_id}))
 def setUp(self):
     self.testUserId = uuid.uuid4()
     edb.get_place_db().remove()
def get_aggregate_places(time_query, box=None):
    curr_query = _get_ts_query(time_query)
    if box:
        curr_query.update({"location": {"$geoWithin": {"$box": box}}})
    place_doc_cursor = edb.get_place_db().find(curr_query).sort(time_query.timeType, pymongo.ASCENDING)
    return [ecwp.Place(doc) for doc in place_doc_cursor]