Ejemplo n.º 1
0
def create_places_and_trips(user_id, segmentation_points, segmentation_method_name):
    # new segments, need to deal with them
    # First, retrieve the last place so that we can stitch it to the newly created trip.
    # Again, there are easy and hard. In the easy case, the trip was
    # continuous, was stopped when the trip end was detected, and there is
    # no gap between the start of the trip and the last place. But there
    # can be other issues caused by gaps in tracking. A more detailed
    # description of dealing with gaps in tracking can be found in the wiki.
    # Let us first deal with the easy case.
    # restart_events_df = get_restart_events(ts, time_query)
    ts = esta.TimeSeries.get_time_series(user_id)
    last_place_entry = esdp.get_last_place_entry(esda.RAW_PLACE_KEY, user_id)
    if last_place_entry is None:
        last_place = start_new_chain(user_id)
        last_place.source = segmentation_method_name
        last_place_entry = ecwe.Entry.create_entry(user_id,
                                "segmentation/raw_place", last_place, create_id = True)
    else:
        last_place = last_place_entry.data

    # if is_easy_case(restart_events_df):
    # Theoretically, we can do some sanity checks here to make sure
    # that we are fairly close to the last point. Maybe mark some kind
    # of confidence level based on that?
    logging.debug("segmentation_point_list has length %s" % len(segmentation_points))
    for (start_loc_doc, end_loc_doc) in segmentation_points:
        logging.debug("start_loc_doc = %s, end_loc_doc = %s" % (start_loc_doc, end_loc_doc))
        get_loc_for_row = lambda row: ts.df_row_to_entry("background/filtered_location", row).data
        start_loc = get_loc_for_row(start_loc_doc)
        end_loc = get_loc_for_row(end_loc_doc)
        logging.debug("start_loc = %s, end_loc = %s" % (start_loc, end_loc))

        # Stitch together the last place and the current trip
        curr_trip = ecwrt.Rawtrip()
        curr_trip.source = segmentation_method_name
        curr_trip_entry = ecwe.Entry.create_entry(user_id,
                            "segmentation/raw_trip", curr_trip, create_id = True)

        new_place = ecwrp.Rawplace()
        new_place.source = segmentation_method_name
        new_place_entry = ecwe.Entry.create_entry(user_id,
                            "segmentation/raw_place", new_place, create_id = True)

        stitch_together_start(last_place_entry, curr_trip_entry, start_loc)
        stitch_together_end(new_place_entry, curr_trip_entry, end_loc)

        ts.insert(curr_trip_entry)
        # last_place is a copy of the data in this entry. So after we fix it
        # the way we want, we need to assign it back to the entry, otherwise
        # it will be lost
        ts.update(last_place_entry)
        last_place_entry = new_place_entry

    # The last last_place hasn't been stitched together yet, but we
    # need to save it so that it can be the last_place for the next run
    ts.insert(last_place_entry)
def create_and_link_timeline(tl, user_id, trip_map):
    last_cleaned_place = esdp.get_last_place_entry(esda.CLEANED_PLACE_KEY, user_id)
    cleaned_places = []
    curr_cleaned_start_place = last_cleaned_place
    if curr_cleaned_start_place is None:
        # If it is not present - maybe this user is getting started for the first
        # time, we create an entry based on the first trip from the timeline
        curr_cleaned_start_place = get_filtered_place(tl.first_place())
        logging.debug("no last cleaned place found, created place with id %s" % curr_cleaned_start_place.get_id())
        # We just created this place here, so lets add it to the created places
        # and insert rather than update it
        cleaned_places.append(curr_cleaned_start_place)
    else:
        logging.debug("Cleaned place %s found, using it" % curr_cleaned_start_place.get_id())

    if curr_cleaned_start_place is None:
        # If the timeline has no entries, we give up and return
        return (None, None)

    unsquished_trips = []

    for raw_trip in tl.trips:
        if raw_trip.get_id() in trip_map:
            # there is a clean representation for this trip, so we can link its
            # start to the curr_cleaned_start_place
            curr_cleaned_trip = trip_map[raw_trip.get_id()]
            raw_start_place = tl.get_object(raw_trip.data.start_place)
            link_trip_start(curr_cleaned_trip, curr_cleaned_start_place, raw_start_place)

            raw_end_place = tl.get_object(raw_trip.data.end_place)
            curr_cleaned_end_place = get_filtered_place(raw_end_place)
            cleaned_places.append(curr_cleaned_end_place)
            link_trip_end(curr_cleaned_trip, curr_cleaned_end_place, raw_end_place)

            curr_cleaned_start_place = curr_cleaned_end_place
            logging.debug("Found mapping %s -> %s, added links" %
                          (raw_trip.get_id(), curr_cleaned_trip.get_id()))
            unsquished_trips.append(curr_cleaned_trip)
        else:
            # this is a squished trip, so we combine the start place with the
            # current start place we do not need to combine both start and end
            # places, since the end place of one trip is the start place of another. We combine start places instead of end places
            # because when the squishy part ends, we combine the start place of the un-squished trip
            # with the existing cleaned start and create a new entry for the un-squished end
            logging.debug("Found squished trip, linking raw start place %s to new cleaned place %s" %
                          (raw_trip.data.start_place, curr_cleaned_start_place.get_id()))
            link_squished_place(curr_cleaned_start_place,
                                tl.get_object(raw_trip.data.start_place))

    logging.debug("Finished creating and linking timeline, returning %d places and %d trips" % (len(cleaned_places), len(trip_map.values())))
    return (last_cleaned_place, esdtl.Timeline(esda.CLEANED_PLACE_KEY,
                                               esda.CLEANED_TRIP_KEY,
                                               cleaned_places,
                                               unsquished_trips))
    def testGetLastPlace(self):
        old_place = ecwrp.Rawplace()
        old_place.enter_ts = 5
        old_place_id = esta.TimeSeries.get_time_series(
            self.testUserId).insert_data(
            self.testUserId, "segmentation/raw_place", old_place)
        old_place_entry = esda.get_entry(esda.RAW_PLACE_KEY, old_place_id)
        logging.debug("old place entry is %s "% old_place_entry)
        esta.TimeSeries.get_time_series(self.testUserId).update(old_place_entry)
        # The place saved in the previous step has no exit_ts set, so it is the
        # last place
        last_place_entry = esdp.get_last_place_entry(esda.RAW_PLACE_KEY,
                                                     self.testUserId)
        last_place_entry["data"]["exit_ts"] = 6
        logging.debug("About to update entry to %s" % last_place_entry)
        esta.TimeSeries.get_time_series(self.testUserId).update(last_place_entry)

        # Now that I have set the exit_ts and saved it, there is no last place
        last_place_entry = esdp.get_last_place_entry(esda.RAW_PLACE_KEY,
                                                     self.testUserId)
        self.assertIsNone(last_place_entry)
Ejemplo n.º 4
0
    def testGetLastPlace(self):
        old_place = ecwrp.Rawplace()
        old_place.enter_ts = 5
        old_place_id = esta.TimeSeries.get_time_series(
            self.testUserId).insert_data(self.testUserId,
                                         "segmentation/raw_place", old_place)
        old_place_entry = esda.get_entry(esda.RAW_PLACE_KEY, old_place_id)
        logging.debug("old place entry is %s " % old_place_entry)
        esta.TimeSeries.get_time_series(
            self.testUserId).update(old_place_entry)
        # The place saved in the previous step has no exit_ts set, so it is the
        # last place
        last_place_entry = esdp.get_last_place_entry(esda.RAW_PLACE_KEY,
                                                     self.testUserId)
        last_place_entry["data"]["exit_ts"] = 6
        logging.debug("About to update entry to %s" % last_place_entry)
        esta.TimeSeries.get_time_series(
            self.testUserId).update(last_place_entry)

        # Now that I have set the exit_ts and saved it, there is no last place
        last_place_entry = esdp.get_last_place_entry(esda.RAW_PLACE_KEY,
                                                     self.testUserId)
        self.assertIsNone(last_place_entry)
Ejemplo n.º 5
0
def create_and_link_timeline(tl, user_id, trip_map):
    last_cleaned_place = esdp.get_last_place_entry(esda.CLEANED_PLACE_KEY,
                                                   user_id)
    cleaned_places = []
    curr_cleaned_start_place = last_cleaned_place
    if curr_cleaned_start_place is None:
        # If it is not present - maybe this user is getting started for the first
        # time, we create an entry based on the first trip from the timeline
        curr_cleaned_start_place = get_filtered_place(tl.first_place())
        # We just created this place here, so lets add it to the created places
        # and insert rather than update it
        cleaned_places.append(curr_cleaned_start_place)

    if curr_cleaned_start_place is None:
        # If the timeline has no entries, we give up and return
        return (None, None)

    for raw_trip in tl.trips:
        if raw_trip.get_id() in trip_map:
            # there is a clean representation for this trip, so we can link its
            # start to the curr_cleaned_start_place
            curr_cleaned_trip = trip_map[raw_trip.get_id()]
            raw_start_place = tl.get_object(raw_trip.data.start_place)
            link_trip_start(curr_cleaned_trip, curr_cleaned_start_place,
                            raw_start_place)

            raw_end_place = tl.get_object(raw_trip.data.end_place)
            curr_cleaned_end_place = get_filtered_place(raw_end_place)
            cleaned_places.append(curr_cleaned_end_place)
            link_trip_end(curr_cleaned_trip, curr_cleaned_end_place,
                          raw_end_place)

            curr_cleaned_start_place = curr_cleaned_end_place
        else:
            # this is a squished trip, so we combine the start place with the
            # current start place we do not need to combine both start and end
            # places, since the end place of one trip is the start place of another. We combine start places instead of end places
            # because when the squishy part ends, we combine the start place of the un-squished trip
            # with the existing cleaned start and create a new entry for the un-squished end
            link_squished_place(curr_cleaned_start_place,
                                tl.get_object(raw_trip.data.start_place))

    return (last_cleaned_place,
            esdtl.Timeline(esda.CLEANED_PLACE_KEY, esda.CLEANED_TRIP_KEY,
                           cleaned_places, trip_map.values()))
def create_places_and_trips(user_id, segmentation_points,
                            segmentation_method_name):
    # new segments, need to deal with them
    # First, retrieve the last place so that we can stitch it to the newly created trip.
    # Again, there are easy and hard. In the easy case, the trip was
    # continuous, was stopped when the trip end was detected, and there is
    # no gap between the start of the trip and the last place. But there
    # can be other issues caused by gaps in tracking. A more detailed
    # description of dealing with gaps in tracking can be found in the wiki.
    # Let us first deal with the easy case.
    # restart_events_df = get_restart_events(ts, time_query)
    ts = esta.TimeSeries.get_time_series(user_id)
    last_place_entry = esdp.get_last_place_entry(esda.RAW_PLACE_KEY, user_id)
    if last_place_entry is None:
        last_place = start_new_chain(user_id)
        last_place.source = segmentation_method_name
        last_place_entry = ecwe.Entry.create_entry(user_id,
                                                   "segmentation/raw_place",
                                                   last_place,
                                                   create_id=True)
    else:
        last_place = last_place_entry.data

    # if is_easy_case(restart_events_df):
    # Theoretically, we can do some sanity checks here to make sure
    # that we are fairly close to the last point. Maybe mark some kind
    # of confidence level based on that?
    logging.debug("segmentation_point_list has length %s" %
                  len(segmentation_points))
    for (start_loc_doc, end_loc_doc) in segmentation_points:
        logging.debug("start_loc_doc = %s, end_loc_doc = %s" %
                      (start_loc_doc, end_loc_doc))
        get_loc_for_row = lambda row: ts.df_row_to_entry(
            "background/filtered_location", row).data
        start_loc = get_loc_for_row(start_loc_doc)
        end_loc = get_loc_for_row(end_loc_doc)
        logging.debug("start_loc = %s, end_loc = %s" % (start_loc, end_loc))

        # Stitch together the last place and the current trip
        curr_trip = ecwrt.Rawtrip()
        curr_trip.source = segmentation_method_name
        curr_trip_entry = ecwe.Entry.create_entry(user_id,
                                                  "segmentation/raw_trip",
                                                  curr_trip,
                                                  create_id=True)

        new_place = ecwrp.Rawplace()
        new_place.source = segmentation_method_name
        new_place_entry = ecwe.Entry.create_entry(user_id,
                                                  "segmentation/raw_place",
                                                  new_place,
                                                  create_id=True)

        if found_untracked_period(ts, last_place_entry.data, start_loc):
            # Fill in the gap in the chain with an untracked period
            curr_untracked = ecwut.Untrackedtime()
            curr_untracked.source = segmentation_method_name
            curr_untracked_entry = ecwe.Entry.create_entry(
                user_id,
                "segmentation/raw_untracked",
                curr_untracked,
                create_id=True)

            restarted_place = ecwrp.Rawplace()
            restarted_place.source = segmentation_method_name
            restarted_place_entry = ecwe.Entry.create_entry(
                user_id,
                "segmentation/raw_place",
                restarted_place,
                create_id=True)

            untracked_start_loc = ecwe.Entry(
                ts.get_entry_at_ts("background/filtered_location", "data.ts",
                                   last_place_entry.data.enter_ts)).data
            untracked_start_loc[
                "ts"] = untracked_start_loc.ts + epq.END_FUZZ_AVOID_LTE
            _link_and_save(ts, last_place_entry, curr_untracked_entry,
                           restarted_place_entry, untracked_start_loc,
                           start_loc)
            logging.debug("Created untracked period %s from %s to %s" %
                          (curr_untracked_entry.get_id(),
                           curr_untracked_entry.data.start_ts,
                           curr_untracked_entry.data.end_ts))
            logging.debug("Resetting last_place_entry from %s to %s" %
                          (last_place_entry, restarted_place_entry))
            last_place_entry = restarted_place_entry

        _link_and_save(ts, last_place_entry, curr_trip_entry, new_place_entry,
                       start_loc, end_loc)
        last_place_entry = new_place_entry

    # The last last_place hasn't been stitched together yet, but we
    # need to save it so that it can be the last_place for the next run
    ts.insert(last_place_entry)
def create_places_and_trips(user_id, segmentation_points, segmentation_method_name):
    # new segments, need to deal with them
    # First, retrieve the last place so that we can stitch it to the newly created trip.
    # Again, there are easy and hard. In the easy case, the trip was
    # continuous, was stopped when the trip end was detected, and there is
    # no gap between the start of the trip and the last place. But there
    # can be other issues caused by gaps in tracking. A more detailed
    # description of dealing with gaps in tracking can be found in the wiki.
    # Let us first deal with the easy case.
    # restart_events_df = get_restart_events(ts, time_query)
    ts = esta.TimeSeries.get_time_series(user_id)
    last_place_entry = esdp.get_last_place_entry(esda.RAW_PLACE_KEY, user_id)
    if last_place_entry is None:
        last_place = start_new_chain(user_id)
        last_place.source = segmentation_method_name
        last_place_entry = ecwe.Entry.create_entry(user_id,
                                "segmentation/raw_place", last_place, create_id = True)
    else:
        last_place = last_place_entry.data

    # if is_easy_case(restart_events_df):
    # Theoretically, we can do some sanity checks here to make sure
    # that we are fairly close to the last point. Maybe mark some kind
    # of confidence level based on that?
    logging.debug("segmentation_point_list has length %s" % len(segmentation_points))
    for (start_loc_doc, end_loc_doc) in segmentation_points:
        logging.debug("start_loc_doc = %s, end_loc_doc = %s" % (start_loc_doc, end_loc_doc))
        get_loc_for_row = lambda row: ts.df_row_to_entry("background/filtered_location", row).data
        start_loc = get_loc_for_row(start_loc_doc)
        end_loc = get_loc_for_row(end_loc_doc)
        logging.debug("start_loc = %s, end_loc = %s" % (start_loc, end_loc))

        # Stitch together the last place and the current trip
        curr_trip = ecwrt.Rawtrip()
        curr_trip.source = segmentation_method_name
        curr_trip_entry = ecwe.Entry.create_entry(user_id,
                            "segmentation/raw_trip", curr_trip, create_id = True)

        new_place = ecwrp.Rawplace()
        new_place.source = segmentation_method_name
        new_place_entry = ecwe.Entry.create_entry(user_id,
                            "segmentation/raw_place", new_place, create_id = True)

        if found_untracked_period(ts, last_place_entry.data, start_loc):
            # Fill in the gap in the chain with an untracked period
            curr_untracked = ecwut.Untrackedtime()
            curr_untracked.source = segmentation_method_name
            curr_untracked_entry = ecwe.Entry.create_entry(user_id,
                            "segmentation/raw_untracked", curr_untracked, create_id=True)

            restarted_place = ecwrp.Rawplace()
            restarted_place.source = segmentation_method_name
            restarted_place_entry = ecwe.Entry.create_entry(user_id,
                            "segmentation/raw_place", restarted_place, create_id=True)

            untracked_start_loc = ecwe.Entry(ts.get_entry_at_ts("background/filtered_location",
                                                     "data.ts", last_place_entry.data.enter_ts)).data
            untracked_start_loc["ts"] = untracked_start_loc.ts + epq.END_FUZZ_AVOID_LTE
            _link_and_save(ts, last_place_entry, curr_untracked_entry, restarted_place_entry,
                           untracked_start_loc, start_loc)
            logging.debug("Created untracked period %s from %s to %s" %
                          (curr_untracked_entry.get_id(), curr_untracked_entry.data.start_ts, curr_untracked_entry.data.end_ts))
            logging.debug("Resetting last_place_entry from %s to %s" %
                          (last_place_entry, restarted_place_entry))
            last_place_entry = restarted_place_entry

        _link_and_save(ts, last_place_entry, curr_trip_entry, new_place_entry, start_loc, end_loc)
        last_place_entry = new_place_entry

    # The last last_place hasn't been stitched together yet, but we
    # need to save it so that it can be the last_place for the next run
    ts.insert(last_place_entry)