Example #1
0
    def segment_into_sections(self, timeseries, time_query):
        """
        Determine locations within the specified time that represent segmentation points for a trip.
        :param timeseries: the time series for this user
        :param time_query: the range to consider for segmentation
        :return: a list of tuples [(start1, end1), (start2, end2), ...] that represent the start and end of sections
        in this time range. end[n] and start[n+1] are typically assumed to be adjacent.
        """
        motion_changes = self.segment_into_motion_changes(timeseries, time_query)
        location_points = timeseries.get_data_df("background/filtered_location", time_query)
        if len(location_points) == 0:
            logging.debug("There are no points in the trip. How the heck did we segment it?")
            return []

        if len(motion_changes) == 0:
            dummy_sec = self.get_section_if_applicable(timeseries, time_query, location_points) 
            if dummy_sec is not None:
                return [dummy_sec]
            else:
                return []
        
        # Now, we know that we have location points and we have motion_changes.
        section_list = []
        # Sometimes, on iOS, we have no overlap between motion detection
        # and location points.
        # In a concrete example, the motion points are:
        # 13         100             high    10  2016-02-22T15:36:06.491621-08:00
        # 14         100             high     0  2016-02-22T15:36:09.353743-08:00
        # 15         100             high    10  2016-02-22T15:36:13.169997-08:00
        # 16          75           medium     0  2016-02-22T15:36:13.805993-08:00
        # while the trip points are 2016-02-22T15:36:00 and then
        # 2016-02-22T15:36:23. So there are no location points within
        # that very narrow range. And there are no more motion points
        # until the trip end at 15:37:35. This is because, unlike android,
        # we cannot specify a sampling frequency for the motion activity
        # So let us extend the first motion change to the beginning of the
        # trip, and the last motion change to the end of the trip
        motion_changes[0] = (self.extend_activity_to_location(motion_changes[0][0],
            location_points.iloc[0]), motion_changes[0][1])
        motion_changes[-1] = (motion_changes[-1][0],
            self.extend_activity_to_location(motion_changes[-1][1], location_points.iloc[-1]))

        for (start_motion, end_motion) in motion_changes:
            logging.debug("Considering %s from %s -> %s" %
                          (start_motion.type, start_motion.fmt_time, end_motion.fmt_time))
            # Find points that correspond to this section
            raw_section_df = location_points[(location_points.ts >= start_motion.ts) &
                                             (location_points.ts <= end_motion.ts)]
            if len(raw_section_df) == 0:
                logging.warn("Found no location points between %s and %s" % (start_motion, end_motion))
            else:
                logging.debug("with iloc, section start point = %s, section end point = %s" %
                              (ecwl.Location(raw_section_df.iloc[0]), ecwl.Location(raw_section_df.iloc[-1])))
                section_list.append((raw_section_df.iloc[0], raw_section_df.iloc[-1], start_motion.type))
            # if this lack of overlap is part of an existing set of sections,
            # then it is fine, because in the section segmentation code, we
            # will mark it as a transition
        return section_list
    def segment_into_sections(self, timeseries, distance_from_place, time_query):
        """
        Determine locations within the specified time that represent segmentation points for a trip.
        :param timeseries: the time series for this user
        :param time_query: the range to consider for segmentation
        :return: a list of tuples [(start1, end1), (start2, end2), ...] that represent the start and end of sections
        in this time range. end[n] and start[n+1] are typically assumed to be adjacent.
        """
        motion_changes = self.segment_into_motion_changes(timeseries, time_query)
        location_points = timeseries.get_data_df("background/filtered_location", time_query)

        if len(location_points) == 0:
            logging.debug("No location points found for query %s, returning []" % time_query)
            return []

        fp = location_points.iloc[0]
        lp = location_points.iloc[-1]

        # Create sections for each motion. At this point, we need to decide a policy on how to deal with the gaps.
        # Let's pick a reasonable default for now.
        # TODO: Restructure into policy that can be passed in.
        section_list = []
        for (start_motion, end_motion) in motion_changes:
            logging.debug("Considering %s from %s -> %s" %
                          (start_motion.type, start_motion.fmt_time, end_motion.fmt_time))
            # Find points that correspond to this section
            raw_section_df = location_points[(location_points.ts >= start_motion.ts) &
                                             (location_points.ts <= end_motion.ts)]
            if len(raw_section_df) == 0:
                logging.info("Found no location points between %s and %s" % (start_motion, end_motion))
            else:
                logging.debug("with iloc, section start point = %s, section end point = %s" %
                              (ecwl.Location(raw_section_df.iloc[0]), ecwl.Location(raw_section_df.iloc[-1])))
                section_list.append((raw_section_df.iloc[0], raw_section_df.iloc[-1], start_motion.type))

        logging.debug("len(section_list) == %s" % len(section_list))
        if len(section_list) == 0:
            if len(motion_changes) == 1:
                (start_motion, end_motion) = motion_changes[0]

                if start_motion.type == end_motion.type:
                    logging.debug("No section because start_motion == end_motion, creating one dummy section")
                    section_list.append((fp, lp, start_motion.type))

            if len(motion_changes) == 0:
            # there are no high confidence motions useful motions, so we add a section of type NONE
            # as long as it is a discernable trip (end != start) and not a spurious trip
                if distance_from_place > self.distance_threshold:
                    logging.debug("No high confidence motions, but "
                        "distance %s > threshold %s, creating dummy section of type UNKNOWN" %
                                  (distance_from_place, self.distance_threshold))
                    section_list.append((fp, lp, ecwm.MotionTypes.UNKNOWN))

        return section_list
Example #3
0
def create_measurement(coordinate, timestamp, velocity, altitude, user_id):
    #TODO: Rename to create_location_measurement
    """
    Creates location entry.
    """
    new_loc = ecwl.Location(
        ts=timestamp,
        latitude=coordinate[0],
        longitude=coordinate[1],
        sensed_speed=velocity,
        accuracy=0,
        bearing=0,
        filter='distance',
        fmt_time=arrow.get(timestamp).to('UTC').format(),
        #This should not be neseceary. TODO: Figure out how we can avoind this.
        loc=gj.Point((coordinate[1], coordinate[0])),
        local_dt=ecwld.LocalDate.get_local_date(timestamp, 'UTC'),
        altitude=altitude)
    entry = ecwe.Entry.create_entry(user_id,
                                    "background/filtered_location",
                                    new_loc,
                                    create_id=True)
    #This field ('type') is required by the server when we push the entry to the user cache
    # so we add it here. Also we just chose an abritrary formater. In the future we might want to
    # create a fromater group called fake user.
    entry['metadata']['type'] = 'sensor-data'
    entry['metadata']['platform'] = 'android'
    #entry['data']['bearing'] = 0
    return entry
Example #4
0
 def testFeatureGenWithOnePoint(self):
     # ensure that the start and end datetimes are the same, since the average calculation uses
     # the total distance and the total duration
     ts = esta.TimeSeries.get_time_series(self.testUUID)
     trackpoint1 = ecwlo.Location({u'coordinates': [0, 0], 'type': 'Point'})
     ts.insert_data(self.testUUID, "analysis/recreated_location",
                    trackpoint1)
     testSeg = ecws.Section({
         "start_loc": trackpoint1,
         "end_loc": trackpoint1,
         "distance": 500,
         "sensed_mode": 1,
         "duration": 150,
         "start_ts": arrow.now().timestamp,
         "end_ts": arrow.now().timestamp,
         "_id": 2,
         "speeds": [],
         "distances": [],
     })
     testSegEntry = ecwe.Entry.create_entry(self.testUUID,
                                            "analysis/cleaned_section",
                                            testSeg)
     d = testSegEntry.data
     m = testSegEntry.metadata
     enufc.expand_start_end_data_times(d, m)
     testSegEntry["data"] = d
     testSegEntry["metadata"] = m
     inserted_id = ts.insert(testSegEntry)
     featureMatrix = np.zeros([1, len(self.pipeline.featureLabels)])
     resultVector = np.zeros(1)
     self.pipeline.updateFeatureMatrixRowWithSection(
         featureMatrix, 0, testSegEntry)
     logging.debug("featureMatrix = %s" % featureMatrix)
     self.assertEqual(np.count_nonzero(featureMatrix[0][5:16]), 0)
     self.assertEqual(np.count_nonzero(featureMatrix[0][19:21]), 0)
Example #5
0
def _get_tz_ranges(loc_df):
    tz_ranges = []
    if len(loc_df) == 0:
        return tz_ranges

    # We know that there is at least one entry, so we can access it with impunity
    curr_start_ts = loc_df.ts.iloc[0]
    curr_tz = loc_df.local_dt.iloc[0]["timezone"]
    for row in loc_df.to_dict('records'):
        loc_data = ecwl.Location(row)
        if loc_data.local_dt["timezone"] != curr_tz:
            tz_ranges.append({
                'timezone': curr_tz,
                'start_ts': curr_start_ts,
                'end_ts': loc_data.ts
            })
            curr_start_ts = loc_data.ts
            curr_tz = loc_data.local_dt["timezone"]

    # At the end, always add an entry
    # For cases in which there is only one timezone (common case),
    # this will be the only entry
    tz_ranges.append({
        'timezone': curr_tz,
        'start_ts': curr_start_ts,
        'end_ts': loc_df.ts.iloc[-1]
    })
    logging.debug("tz_ranges = %s" % tz_ranges)
    return pd.DataFrame(tz_ranges)
Example #6
0
def create_places_and_trips(user_id, segmentation_points,
                            segmentation_method_name):
    # new segments, need to deal with them
    # First, retrieve the last place so that we can stitch it to the newly created trip.
    # Again, there are easy and hard. In the easy case, the trip was
    # continuous, was stopped when the trip end was detected, and there is
    # no gap between the start of the trip and the last place. But there
    # can be other issues caused by gaps in tracking. A more detailed
    # description of dealing with gaps in tracking can be found in the wiki.
    # Let us first deal with the easy case.
    # restart_events_df = get_restart_events(ts, time_query)
    last_place = esdp.get_last_place(user_id)
    if last_place is None:
        last_place = start_new_chain(user_id)
        last_place.source = segmentation_method_name

    # if is_easy_case(restart_events_df):
    # Theoretically, we can do some sanity checks here to make sure
    # that we are fairly close to the last point. Maybe mark some kind
    # of confidence level based on that?
    logging.debug("segmentation_point_list has length %s" %
                  len(segmentation_points))
    for (start_loc_doc, end_loc_doc) in segmentation_points:
        logging.debug("start_loc_doc = %s, end_loc_doc = %s" %
                      (start_loc_doc, end_loc_doc))
        start_loc = ecwl.Location(start_loc_doc)
        end_loc = ecwl.Location(end_loc_doc)
        logging.debug("start_loc = %s, end_loc = %s" % (start_loc, end_loc))

        # Stitch together the last place and the current trip
        curr_trip = esdt.create_new_trip(user_id)
        curr_trip.source = segmentation_method_name
        new_place = esdp.create_new_place(user_id)
        new_place.source = segmentation_method_name

        stitch_together_start(last_place, curr_trip, start_loc)
        stitch_together_end(new_place, curr_trip, end_loc)

        esdp.save_place(last_place)
        esdt.save_trip(curr_trip)

        last_place = new_place

    # The last last_place hasn't been stitched together yet, but we
    # need to save it so that it can be the last_place for the next run
    esdp.save_place(last_place)
Example #7
0
def get_location_entry_list_from_df(loc_time_df, ts="ts", latitude="latitude", longitude="longitude", fmt_time="fmt_time"):
    location_entry_list = []
    for idx, row in loc_time_df.iterrows():
        retVal = {"latitude": row[latitude], "longitude": row[longitude], "ts": row[ts],
                  "_id": str(idx), "fmt_time": row[fmt_time], "loc": gj.Point(coordinates=[row[longitude], row[latitude]])}
        location_entry_list.append(ecwe.Entry.create_entry(
            "dummy_user", "background/location", ecwl.Location(retVal)))
    return location_entry_list
Example #8
0
def remove_outliers(raw_loc_entry_list, filtered_point_id_list):
    filtered_loc_list = []
    for loc_entry in raw_loc_entry_list:
        if loc_entry.get_id() not in filtered_point_id_list:
            filtered_location_data = ecwl.Location()
            _copy_non_excluded(old_data=loc_entry.data,
                               new_data=filtered_location_data,
                               excluded_list=filtered_location_excluded)
            filtered_loc_list.append(filtered_location_data)
    return filtered_loc_list
def get_all_points_for_range(user_id, key, start_ts, end_ts):
    import emission.net.usercache.abstract_usercache as enua
    #     import emission.core.wrapper.location as ecwl

    tq = enua.UserCache.TimeQuery("write_ts", start_ts, end_ts)
    ts = esta.TimeSeries.get_time_series(user_id)
    entry_it = ts.find_entries([key], tq)
    points_array = [
        ecwl.Location(ts._to_df_entry(entry)) for entry in entry_it
    ]

    return get_feature_list_for_point_array(points_array)
Example #10
0
def create_end_location_from_leg(leg):
    #TODO: Old function. Should be removed
    converted_time = otp_time_to_ours(leg['endTime'])
    time_stamp = converted_time.timestamp
    local_dt = ecwld.LocalDate.get_local_date(time_stamp, 'UTC')
    fmt_time = converted_time.to("UTC").format()
    loc = gj.Point((float(leg["to"]["lon"]), float(leg["to"]["lat"])))
    end_loc = ecwl.Location(ts=time_stamp,
                            local_dt=local_dt,
                            fmt_time=fmt_time,
                            loc=loc)
    return end_loc
Example #11
0
def create_start_location_from_trip_plan(plan):
    #TODO: Old function. Should be removed
    converted_time = otp_time_to_ours(plan['itineraries'][0]["startTime"])
    time_stamp = converted_time.timestamp
    local_dt = ecwld.LocalDate.get_local_date(time_stamp, 'UTC')
    fmt_time = converted_time.to("UTC").format()
    loc = gj.Point((float(plan["from"]["lon"]), float(plan["from"]["lat"])))
    start_loc = ecwl.Location(ts=time_stamp,
                              local_dt=local_dt,
                              fmt_time=fmt_time,
                              loc=loc)
    return start_loc
    def segment_into_sections(self, timeseries, time_query):
        """
        Determine locations within the specified time that represent segmentation points for a trip.
        :param timeseries: the time series for this user
        :param time_query: the range to consider for segmentation
        :return: a list of tuples [(start1, end1), (start2, end2), ...] that represent the start and end of sections
        in this time range. end[n] and start[n+1] are typically assumed to be adjacent.
        """
        motion_changes = self.segment_into_motion_changes(
            timeseries, time_query)
        location_points = timeseries.get_data_df(
            "background/filtered_location", time_query)

        # Create sections for each motion. At this point, we need to decide a policy on how to deal with the gaps.
        # Let's pick a reasonable default for now.
        # TODO: Restructure into policy that can be passed in.
        section_list = []
        for (start_motion, end_motion) in motion_changes:
            logging.debug("Considering %s from %s -> %s" %
                          (start_motion.type, start_motion.fmt_time,
                           end_motion.fmt_time))
            # Find points that correspond to this section
            raw_section_df = location_points[
                (location_points.ts >= start_motion.ts)
                & (location_points.ts <= end_motion.ts)]
            if len(raw_section_df) == 0:
                logging.warn("Found no location points between %s and %s" %
                             (start_motion, end_motion))
            else:
                logging.debug(
                    "with iloc, section start point = %s, section end point = %s"
                    % (ecwl.Location(raw_section_df.iloc[0]),
                       ecwl.Location(raw_section_df.iloc[-1])))
                section_list.append(
                    (raw_section_df.iloc[0], raw_section_df.iloc[-1],
                     start_motion.type))
        return section_list
Example #13
0
    def testRemoveOutliers(self):
        TS_START = 12345
        for i in range(0,10):
            dummy_loc = ecwl.Location({
                "ts": TS_START + i,
                "lat": 50 + i,
                "lng": 180 + i
            })
            self.ts.insert(ecwe.Entry.create_entry(self.testUUID,
                                                   "background/filtered_location",
                                                   dummy_loc))

        tq = estt.TimeQuery("data.ts", TS_START - 10, TS_START + 10 + 10)
        loc_entries = list(self.ts.find_entries(["background/filtered_location"], tq))
        loc_df = self.ts.get_data_df("background/filtered_location", tq)
        filtered_loc_df = eaicc.remove_outliers(loc_entries, loc_df["_id"])
        self.assertEqual(len(loc_entries), len(loc_df))
        self.assertEqual(len(filtered_loc_df), 0)
Example #14
0
def segment_trip_into_sections(user_id, trip_entry, trip_source):
    ts = esta.TimeSeries.get_time_series(user_id)
    time_query = esda.get_time_query_for_trip_like(esda.RAW_TRIP_KEY,
                                                   trip_entry.get_id())
    distance_from_place = _get_distance_from_start_place_to_end(trip_entry)

    if (trip_source == "DwellSegmentationTimeFilter"):
        import emission.analysis.intake.segmentation.section_segmentation_methods.smoothed_high_confidence_motion as shcm
        shcmsm = shcm.SmoothedHighConfidenceMotion(60, 100, [
            ecwm.MotionTypes.TILTING, ecwm.MotionTypes.UNKNOWN,
            ecwm.MotionTypes.STILL
        ])
    else:
        assert (trip_source == "DwellSegmentationDistFilter")
        import emission.analysis.intake.segmentation.section_segmentation_methods.smoothed_high_confidence_with_visit_transitions as shcmvt
        shcmsm = shcmvt.SmoothedHighConfidenceMotionWithVisitTransitions(
            49,
            50,
            [
                ecwm.MotionTypes.TILTING,
                ecwm.MotionTypes.UNKNOWN,
                ecwm.MotionTypes.STILL,
                ecwm.MotionTypes.NONE,  # iOS only
                ecwm.MotionTypes.STOPPED_WHILE_IN_VEHICLE
            ])  # iOS only

    segmentation_points = shcmsm.segment_into_sections(ts, distance_from_place,
                                                       time_query)

    # Since we are segmenting an existing trip into sections, we do not need to worry about linking with
    # a prior place, since it will be linked through the trip object.
    # So this is much simpler than the trip case.
    # Again, since this is segmenting a trip, we can just start with a section

    prev_section_entry = None

    # TODO: Should we link the locations to the trips this way, or by using a foreign key?
    # If we want to use a foreign key, then we need to include the object id in the data df as well so that we can
    # set it properly.
    ts = esta.TimeSeries.get_time_series(user_id)

    get_loc_for_ts = lambda time: ecwl.Location(
        ts.get_entry_at_ts("background/filtered_location", "data.ts", time)[
            "data"])
    trip_start_loc = get_loc_for_ts(trip_entry.data.start_ts)
    trip_end_loc = get_loc_for_ts(trip_entry.data.end_ts)
    logging.debug("trip_start_loc = %s, trip_end_loc = %s" %
                  (trip_start_loc, trip_end_loc))

    for (i, (start_loc_doc, end_loc_doc,
             sensed_mode)) in enumerate(segmentation_points):
        logging.debug("start_loc_doc = %s, end_loc_doc = %s" %
                      (start_loc_doc, end_loc_doc))
        get_loc_for_row = lambda row: ts.df_row_to_entry(
            "background/filtered_location", row).data
        start_loc = get_loc_for_row(start_loc_doc)
        end_loc = get_loc_for_row(end_loc_doc)
        logging.debug("start_loc = %s, end_loc = %s" % (start_loc, end_loc))

        section = ecwc.Section()
        section.trip_id = trip_entry.get_id()
        if prev_section_entry is None:
            # This is the first point, so we want to start from the start of the trip, not the start of this segment
            start_loc = trip_start_loc
        if i == len(segmentation_points) - 1:
            # This is the last point, so we want to end at the end of the trip, not at the end of this segment
            # Particularly in this case, if we don't do this, then the trip end may overshoot the section end
            end_loc = trip_end_loc

        fill_section(section, start_loc, end_loc, sensed_mode)
        # We create the entry after filling in the section so that we know
        # that the data is included properly
        section_entry = ecwe.Entry.create_entry(user_id,
                                                esda.RAW_SECTION_KEY,
                                                section,
                                                create_id=True)

        if prev_section_entry is not None:
            # If this is not the first section, create a stop to link the two sections together
            # The expectation is prev_section -> stop -> curr_section
            stop = ecws.Stop()
            stop.trip_id = trip_entry.get_id()
            stop_entry = ecwe.Entry.create_entry(user_id,
                                                 esda.RAW_STOP_KEY,
                                                 stop,
                                                 create_id=True)
            logging.debug("stop = %s, stop_entry = %s" % (stop, stop_entry))
            stitch_together(prev_section_entry, stop_entry, section_entry)
            ts.insert(stop_entry)
            ts.update(prev_section_entry)

        # After we go through the loop, we will be left with the last section,
        # which does not have an ending stop. We insert that too.
        ts.insert(section_entry)
        prev_section_entry = section_entry
Example #15
0
def segment_trip_into_sections(user_id, trip_id, trip_source):
    ts = esta.TimeSeries.get_time_series(user_id)
    trip = esdt.get_trip(trip_id)
    time_query = esdt.get_time_query_for_trip(trip_id)

    if (trip_source == "DwellSegmentationTimeFilter"):
        import emission.analysis.intake.segmentation.section_segmentation_methods.smoothed_high_confidence_motion as shcm
        shcmsm = shcm.SmoothedHighConfidenceMotion(60, [
            ecwm.MotionTypes.TILTING, ecwm.MotionTypes.UNKNOWN,
            ecwm.MotionTypes.STILL
        ])
    else:
        assert (trip_source == "DwellSegmentationDistFilter")
        import emission.analysis.intake.segmentation.section_segmentation_methods.smoothed_high_confidence_with_visit_transitions as shcmvt
        shcmsm = shcmvt.SmoothedHighConfidenceMotionWithVisitTransitions(
            49,
            [
                ecwm.MotionTypes.TILTING,
                ecwm.MotionTypes.UNKNOWN,
                ecwm.MotionTypes.STILL,
                ecwm.MotionTypes.NONE,  # iOS only
                ecwm.MotionTypes.STOPPED_WHILE_IN_VEHICLE
            ])  # iOS only

    segmentation_points = shcmsm.segment_into_sections(ts, time_query)

    # Since we are segmenting an existing trip into sections, we do not need to worry about linking with
    # a prior place, since it will be linked through the trip object.
    # So this is much simpler than the trip case.
    # Again, since this is segmenting a trip, we can just start with a section

    prev_section = None

    # TODO: Should we link the locations to the trips this way, or by using a foreign key?
    # If we want to use a foreign key, then we need to include the object id in the data df as well so that we can
    # set it properly.
    trip_start_loc = ecwl.Location(
        ts.get_entry_at_ts("background/filtered_location", "data.ts",
                           trip.start_ts)["data"])
    trip_end_loc = ecwl.Location(
        ts.get_entry_at_ts("background/filtered_location", "data.ts",
                           trip.end_ts)["data"])
    logging.debug("trip_start_loc = %s, trip_end_loc = %s" %
                  (trip_start_loc, trip_end_loc))

    for (i, (start_loc_doc, end_loc_doc,
             sensed_mode)) in enumerate(segmentation_points):
        logging.debug("start_loc_doc = %s, end_loc_doc = %s" %
                      (start_loc_doc, end_loc_doc))
        start_loc = ecwl.Location(start_loc_doc)
        end_loc = ecwl.Location(end_loc_doc)
        logging.debug("start_loc = %s, end_loc = %s" % (start_loc, end_loc))

        section = esds.create_new_section(user_id, trip_id)
        if prev_section is None:
            # This is the first point, so we want to start from the start of the trip, not the start of this segment
            start_loc = trip_start_loc
        if i == len(segmentation_points) - 1:
            # This is the last point, so we want to end at the end of the trip, not at the end of this segment
            # Particularly in this case, if we don't do this, then the trip end may overshoot the section end
            end_loc = trip_end_loc

        fill_section(section, start_loc, end_loc, sensed_mode)

        if prev_section is not None:
            # If this is not the first section, create a stop to link the two sections together
            # The expectation is prev_section -> stop -> curr_section
            stop = esdst.create_new_stop(user_id, trip_id)
            stitch_together(prev_section, stop, section)
            esdst.save_stop(stop)
            esds.save_section(
                prev_section
            )  # Because we have now linked it to the stop, we need to save it again

        esds.save_section(section)
        prev_section = section
def section_to_geojson(section, tl):
    """
    This is the trickiest part of the visualization.
    The section is basically a collection of points with a line through them.
    So the representation is a feature in which one feature which is the line, and one feature collection which is the set of point features.
    :param section: the section to be converted
    :return: a feature collection which is the geojson version of the section
    """

    ts = esta.TimeSeries.get_time_series(section.user_id)
    entry_it = ts.find_entries(["background/filtered_location"],
                               esds.get_time_query_for_section(
                                   section.get_id()))
    # points_df = ts.get_data_df("background/filtered_location", esds.get_time_query_for_section(section.get_id()))
    # points_df = points_df.drop("elapsedRealTimeNanos", axis=1)
    # logging.debug("points_df.columns = %s" % points_df.columns)

    # TODO: Decide whether we want to use Rewrite to use dataframes throughout instead of python arrays.
    # dataframes insert nans. We could use fillna to fill with default values, but if we are not actually
    # using dataframe features here, it is unclear how much that would help.
    feature_array = []
    section_location_array = [
        ecwl.Location(ts._to_df_entry(entry)) for entry in entry_it
    ]
    if len(section_location_array) != 0:
        logging.debug("first element in section_location_array = %s" %
                      section_location_array[0])

        # Fudge the end point so that we don't have a gap because of the ts != write_ts mismatch
        # TODO: Fix this once we are able to query by the data timestamp instead of the metadata ts
        if section_location_array[-1].loc != section.end_loc:
            last_loc_doc = ts.get_entry_at_ts("background/filtered_location",
                                              "data.ts", section.end_ts)
            last_loc_data = ecwe.Entry(last_loc_doc).data
            last_loc_data["_id"] = last_loc_doc["_id"]
            section_location_array.append(last_loc_data)
            logging.debug(
                "Adding new entry %s to fill the end point gap between %s and %s"
                % (last_loc_data.loc, section_location_array[-2].loc,
                   section.end_loc))

    # Find the list of points to filter
    filtered_points_entry_doc = ts.get_entry_at_ts("analysis/smoothing",
                                                   "data.section",
                                                   section.get_id())
    if filtered_points_entry_doc is None:
        logging.debug("No filtered_points_entry, returning unchanged array")
        filtered_section_location_array = section_location_array
    else:
        # TODO: Figure out how to make collections work for the wrappers and then change this to an Entry
        filtered_points_entry = ad.AttrDict(filtered_points_entry_doc)
        filtered_point_list = list(filtered_points_entry.data.deleted_points)
        logging.debug("deleting %s points from section points" %
                      len(filtered_point_list))
        filtered_section_location_array = [
            l for l in section_location_array
            if l.get_id() not in filtered_point_list
        ]

    with_speeds = eaicl.add_dist_heading_speed(
        pd.DataFrame(filtered_section_location_array))
    speeds = list(with_speeds.speed)
    distances = list(with_speeds.distance)

    if len(filtered_section_location_array) != 0:
        for idx, row in with_speeds.iterrows():
            # TODO: Remove instance of setting value without going through wrapper class
            filtered_section_location_array[idx]["speed"] = row["speed"]
            filtered_section_location_array[idx]["distance"] = row["distance"]

    points_feature_array = [
        location_to_geojson(l) for l in filtered_section_location_array
    ]

    points_line_feature = point_array_to_line(filtered_section_location_array)
    # If this is the first section, we already start from the trip start. But we actually need to start from the
    # prior place. Fudge this too. Note also that we may want to figure out how to handle this properly in the model
    # without needing fudging. TODO: Unclear how exactly to do this
    if section.start_stop is None:
        # This is the first section. So we need to find the start place of the parent trip
        parent_trip = tl.get_object(section.trip_id)
        start_place_of_parent_trip = tl.get_object(parent_trip.start_place)
        points_line_feature.geometry.coordinates.insert(
            0, start_place_of_parent_trip.location.coordinates)

    for i, point_feature in enumerate(points_feature_array):
        point_feature.properties["idx"] = i

    points_line_feature.id = str(section.get_id())
    points_line_feature.properties = copy.copy(section)
    points_line_feature.properties["feature_type"] = "section"
    points_line_feature.properties["sensed_mode"] = str(
        points_line_feature.properties.sensed_mode)
    points_line_feature.properties["distance"] = sum(distances)
    points_line_feature.properties["speeds"] = speeds
    points_line_feature.properties["distances"] = distances

    _del_non_derializable(points_line_feature.properties,
                          ["start_loc", "end_loc"])

    feature_array.append(gj.FeatureCollection(points_feature_array))
    feature_array.append(points_line_feature)

    return gj.FeatureCollection(feature_array)
Example #17
0
def create_places_and_trips(user_id, segmentation_points,
                            segmentation_method_name):
    # new segments, need to deal with them
    # First, retrieve the last place so that we can stitch it to the newly created trip.
    # Again, there are easy and hard. In the easy case, the trip was
    # continuous, was stopped when the trip end was detected, and there is
    # no gap between the start of the trip and the last place. But there
    # can be other issues caused by gaps in tracking. A more detailed
    # description of dealing with gaps in tracking can be found in the wiki.
    # Let us first deal with the easy case.
    # restart_events_df = get_restart_events(ts, time_query)
    ts = esta.TimeSeries.get_time_series(user_id)
    last_place_entry = esdp.get_last_place_entry(esda.RAW_PLACE_KEY, user_id)
    if last_place_entry is None:
        last_place = start_new_chain(user_id)
        last_place.source = segmentation_method_name
        last_place_entry = ecwe.Entry.create_entry(user_id,
                                                   "segmentation/raw_place",
                                                   last_place,
                                                   create_id=True)
    else:
        last_place = last_place_entry.data

    # if is_easy_case(restart_events_df):
    # Theoretically, we can do some sanity checks here to make sure
    # that we are fairly close to the last point. Maybe mark some kind
    # of confidence level based on that?
    logging.debug("segmentation_point_list has length %s" %
                  len(segmentation_points))
    for (start_loc_doc, end_loc_doc) in segmentation_points:
        logging.debug("start_loc_doc = %s, end_loc_doc = %s" %
                      (start_loc_doc, end_loc_doc))
        start_loc = ecwl.Location(start_loc_doc)
        end_loc = ecwl.Location(end_loc_doc)
        logging.debug("start_loc = %s, end_loc = %s" % (start_loc, end_loc))

        # Stitch together the last place and the current trip
        curr_trip = ecwrt.Rawtrip()
        curr_trip.source = segmentation_method_name
        curr_trip_entry = ecwe.Entry.create_entry(user_id,
                                                  "segmentation/raw_trip",
                                                  curr_trip,
                                                  create_id=True)

        new_place = ecwrp.Rawplace()
        new_place.source = segmentation_method_name
        new_place_entry = ecwe.Entry.create_entry(user_id,
                                                  "segmentation/raw_place",
                                                  new_place,
                                                  create_id=True)

        stitch_together_start(last_place_entry, curr_trip_entry, start_loc)
        stitch_together_end(new_place_entry, curr_trip_entry, end_loc)

        ts.insert(curr_trip_entry)
        # last_place is a copy of the data in this entry. So after we fix it
        # the way we want, we need to assign it back to the entry, otherwise
        # it will be lost
        ts.update(last_place_entry)
        last_place_entry = new_place_entry

    # The last last_place hasn't been stitched together yet, but we
    # need to save it so that it can be the last_place for the next run
    ts.insert(last_place_entry)