def segment_current_trips(user_id): ts = esta.TimeSeries.get_time_series(user_id) time_query = epq.get_time_range_for_segmentation(user_id) import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_time_filter as dstf import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_dist_filter as dsdf dstfsm = dstf.DwellSegmentationTimeFilter( time_threshold=5 * 60, # 5 mins point_threshold=9, distance_threshold=100) # 100 m dsdfsm = dsdf.DwellSegmentationDistFilter( time_threshold=10 * 60, # 10 mins point_threshold=9, distance_threshold=50) # 50 m filter_methods = {"time": dstfsm, "distance": dsdfsm} filter_method_names = { "time": "DwellSegmentationTimeFilter", "distance": "DwellSegmentationDistFilter" } # We need to use the appropriate filter based on the incoming data # So let's read in the location points for the specified query loc_df = ts.get_data_df("background/filtered_location", time_query) if len(loc_df) == 0: # no new segments, no need to keep looking at these again logging.debug("len(loc_df) == 0, early return") epq.mark_segmentation_done(user_id, None) return filters_in_df = loc_df["filter"].unique() logging.debug("Filters in the dataframe = %s" % filters_in_df) if len(filters_in_df) == 1: # Common case - let's make it easy segmentation_points = filter_methods[ filters_in_df[0]].segment_into_trips(ts, time_query) else: segmentation_points = get_combined_segmentation_points( ts, loc_df, time_query, filters_in_df, filter_methods) # Create and store trips and places based on the segmentation points if segmentation_points is None: epq.mark_segmentation_failed(user_id) elif len(segmentation_points) == 0: # no new segments, no need to keep looking at these again logging.debug("len(segmentation_points) == 0, early return") epq.mark_segmentation_done(user_id, None) else: try: create_places_and_trips(user_id, segmentation_points, filter_method_names[filters_in_df[0]]) epq.mark_segmentation_done(user_id, get_last_ts_processed(filter_methods)) except: logging.exception("Trip generation failed for user %s" % user_id) epq.mark_segmentation_failed(user_id)
def segment_current_trips(user_id): ts = esta.TimeSeries.get_time_series(user_id) time_query = epq.get_time_range_for_segmentation(user_id) import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_time_filter as dstf import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_dist_filter as dsdf dstfsm = dstf.DwellSegmentationTimeFilter(time_threshold = 5 * 60, # 5 mins point_threshold = 9, distance_threshold = 100) # 100 m dsdfsm = dsdf.DwellSegmentationDistFilter(time_threshold = 10 * 60, # 10 mins point_threshold = 9, distance_threshold = 50) # 50 m filter_methods = {"time": dstfsm, "distance": dsdfsm} filter_method_names = {"time": "DwellSegmentationTimeFilter", "distance": "DwellSegmentationDistFilter"} # We need to use the appropriate filter based on the incoming data # So let's read in the location points for the specified query loc_df = ts.get_data_df("background/filtered_location", time_query) if len(loc_df) == 0: # no new segments, no need to keep looking at these again logging.debug("len(loc_df) == 0, early return") epq.mark_segmentation_done(user_id, None) return filters_in_df = loc_df["filter"].unique() logging.debug("Filters in the dataframe = %s" % filters_in_df) if len(filters_in_df) == 1: # Common case - let's make it easy segmentation_points = filter_methods[filters_in_df[0]].segment_into_trips(ts, time_query) else: segmentation_points = get_combined_segmentation_points(ts, loc_df, time_query, filters_in_df, filter_methods) # Create and store trips and places based on the segmentation points if segmentation_points is None: epq.mark_segmentation_failed(user_id) elif len(segmentation_points) == 0: # no new segments, no need to keep looking at these again logging.debug("len(segmentation_points) == 0, early return") epq.mark_segmentation_done(user_id, None) else: try: create_places_and_trips(user_id, segmentation_points, filter_method_names[filters_in_df[0]]) epq.mark_segmentation_done(user_id, get_last_ts_processed(filter_methods)) except: logging.exception("Trip generation failed for user %s" % user_id) epq.mark_segmentation_failed(user_id)
def segment_current_trips(user_id): ts = esta.TimeSeries.get_time_series(user_id) time_query = epq.get_time_range_for_segmentation(user_id) import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_time_filter as dstf import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_dist_filter as dsdf dstfsm = dstf.DwellSegmentationTimeFilter( time_threshold=5 * 60, # 5 mins point_threshold=9, distance_threshold=100) # 100 m dsdfsm = dsdf.DwellSegmentationDistFilter( time_threshold=10 * 60, # 10 mins point_threshold=9, distance_threshold=50) # 50 m filter_methods = {"time": dstfsm, "distance": dsdfsm} filter_method_names = { "time": "DwellSegmentationTimeFilter", "distance": "DwellSegmentationDistFilter" } # We need to use the appropriate filter based on the incoming data # So let's read in the location points for the specified query loc_df = ts.get_data_df("background/filtered_location", time_query) if len(loc_df) == 0: # no new segments, no need to keep looking at these again logging.debug("len(loc_df) == 0, early return") epq.mark_segmentation_done(user_id, None) return out_of_order_points = loc_df[loc_df.ts.diff() < 0] if len(out_of_order_points) > 0: logging.info("Found out of order points!") logging.info("%s" % out_of_order_points) # drop from the table loc_df = loc_df.drop(out_of_order_points.index.tolist()) # delete from the database. Should be generally discouraged, so we # are kindof putting it in here secretively import emission.core.get_database as edb out_of_order_id_list = out_of_order_points["_id"].tolist() logging.debug("out_of_order_id_list = %s" % out_of_order_id_list) edb.get_timeseries_db().remove({"_id": {"$in": out_of_order_id_list}}) filters_in_df = loc_df["filter"].dropna().unique() logging.debug("Filters in the dataframe = %s" % filters_in_df) if len(filters_in_df) == 1: # Common case - let's make it easy segmentation_points = filter_methods[ filters_in_df[0]].segment_into_trips(ts, time_query) else: segmentation_points = get_combined_segmentation_points( ts, loc_df, time_query, filters_in_df, filter_methods) # Create and store trips and places based on the segmentation points if segmentation_points is None: epq.mark_segmentation_failed(user_id) elif len(segmentation_points) == 0: # no new segments, no need to keep looking at these again logging.debug("len(segmentation_points) == 0, early return") epq.mark_segmentation_done(user_id, None) else: try: create_places_and_trips(user_id, segmentation_points, filter_method_names[filters_in_df[0]]) epq.mark_segmentation_done(user_id, get_last_ts_processed(filter_methods)) except: logging.exception("Trip generation failed for user %s" % user_id) epq.mark_segmentation_failed(user_id)
def segment_current_trips(user_id): ts = esta.TimeSeries.get_time_series(user_id) time_query = epq.get_time_range_for_segmentation(user_id) import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_time_filter as dstf import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_dist_filter as dsdf dstfsm = dstf.DwellSegmentationTimeFilter(time_threshold = 5 * 60, # 5 mins point_threshold = 9, distance_threshold = 100) # 100 m dsdfsm = dsdf.DwellSegmentationDistFilter(time_threshold = 10 * 60, # 10 mins point_threshold = 9, distance_threshold = 50) # 50 m filter_methods = {"time": dstfsm, "distance": dsdfsm} filter_method_names = {"time": "DwellSegmentationTimeFilter", "distance": "DwellSegmentationDistFilter"} # We need to use the appropriate filter based on the incoming data # So let's read in the location points for the specified query loc_df = ts.get_data_df("background/filtered_location", time_query) if len(loc_df) == 0: # no new segments, no need to keep looking at these again logging.debug("len(loc_df) == 0, early return") epq.mark_segmentation_done(user_id, None) return out_of_order_points = loc_df[loc_df.ts.diff() < 0] if len(out_of_order_points) > 0: logging.info("Found out of order points!") logging.info("%s" % out_of_order_points) # drop from the table loc_df = loc_df.drop(out_of_order_points.index.tolist()) # delete from the database. Should be generally discouraged, so we # are kindof putting it in here secretively import emission.core.get_database as edb out_of_order_id_list = out_of_order_points["_id"].tolist() logging.debug("out_of_order_id_list = %s" % out_of_order_id_list) edb.get_timeseries_db().remove({"_id": {"$in": out_of_order_id_list}}) filters_in_df = loc_df["filter"].dropna().unique() logging.debug("Filters in the dataframe = %s" % filters_in_df) if len(filters_in_df) == 1: # Common case - let's make it easy segmentation_points = filter_methods[filters_in_df[0]].segment_into_trips(ts, time_query) else: segmentation_points = get_combined_segmentation_points(ts, loc_df, time_query, filters_in_df, filter_methods) # Create and store trips and places based on the segmentation points if segmentation_points is None: epq.mark_segmentation_failed(user_id) elif len(segmentation_points) == 0: # no new segments, no need to keep looking at these again logging.debug("len(segmentation_points) == 0, early return") epq.mark_segmentation_done(user_id, None) else: try: create_places_and_trips(user_id, segmentation_points, filter_method_names[filters_in_df[0]]) epq.mark_segmentation_done(user_id, get_last_ts_processed(filter_methods)) except: logging.exception("Trip generation failed for user %s" % user_id) epq.mark_segmentation_failed(user_id)