コード例 #1
0
def segment_current_trips(user_id):
    ts = esta.TimeSeries.get_time_series(user_id)
    time_query = epq.get_time_range_for_segmentation(user_id)

    import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_time_filter as dstf
    import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_dist_filter as dsdf
    dstfsm = dstf.DwellSegmentationTimeFilter(
        time_threshold=5 * 60,  # 5 mins
        point_threshold=9,
        distance_threshold=100)  # 100 m

    dsdfsm = dsdf.DwellSegmentationDistFilter(
        time_threshold=10 * 60,  # 10 mins
        point_threshold=9,
        distance_threshold=50)  # 50 m

    filter_methods = {"time": dstfsm, "distance": dsdfsm}
    filter_method_names = {
        "time": "DwellSegmentationTimeFilter",
        "distance": "DwellSegmentationDistFilter"
    }
    # We need to use the appropriate filter based on the incoming data
    # So let's read in the location points for the specified query
    loc_df = ts.get_data_df("background/filtered_location", time_query)
    if len(loc_df) == 0:
        # no new segments, no need to keep looking at these again
        logging.debug("len(loc_df) == 0, early return")
        epq.mark_segmentation_done(user_id, None)
        return

    filters_in_df = loc_df["filter"].unique()
    logging.debug("Filters in the dataframe = %s" % filters_in_df)
    if len(filters_in_df) == 1:
        # Common case - let's make it easy

        segmentation_points = filter_methods[
            filters_in_df[0]].segment_into_trips(ts, time_query)
    else:
        segmentation_points = get_combined_segmentation_points(
            ts, loc_df, time_query, filters_in_df, filter_methods)
    # Create and store trips and places based on the segmentation points
    if segmentation_points is None:
        epq.mark_segmentation_failed(user_id)
    elif len(segmentation_points) == 0:
        # no new segments, no need to keep looking at these again
        logging.debug("len(segmentation_points) == 0, early return")
        epq.mark_segmentation_done(user_id, None)
    else:
        try:
            create_places_and_trips(user_id, segmentation_points,
                                    filter_method_names[filters_in_df[0]])
            epq.mark_segmentation_done(user_id,
                                       get_last_ts_processed(filter_methods))
        except:
            logging.exception("Trip generation failed for user %s" % user_id)
            epq.mark_segmentation_failed(user_id)
コード例 #2
0
def segment_current_trips(user_id):
    ts = esta.TimeSeries.get_time_series(user_id)
    time_query = epq.get_time_range_for_segmentation(user_id)

    import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_time_filter as dstf
    import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_dist_filter as dsdf
    dstfsm = dstf.DwellSegmentationTimeFilter(time_threshold = 5 * 60, # 5 mins
                                              point_threshold = 9,
                                              distance_threshold = 100) # 100 m

    dsdfsm = dsdf.DwellSegmentationDistFilter(time_threshold = 10 * 60, # 10 mins
                                              point_threshold = 9,
                                              distance_threshold = 50) # 50 m

    filter_methods = {"time": dstfsm, "distance": dsdfsm}
    filter_method_names = {"time": "DwellSegmentationTimeFilter", "distance": "DwellSegmentationDistFilter"}
    # We need to use the appropriate filter based on the incoming data
    # So let's read in the location points for the specified query
    loc_df = ts.get_data_df("background/filtered_location", time_query)
    if len(loc_df) == 0:
        # no new segments, no need to keep looking at these again
        logging.debug("len(loc_df) == 0, early return")
        epq.mark_segmentation_done(user_id, None)
        return

    filters_in_df = loc_df["filter"].unique()
    logging.debug("Filters in the dataframe = %s" % filters_in_df)
    if len(filters_in_df) == 1:
        # Common case - let's make it easy
        
        segmentation_points = filter_methods[filters_in_df[0]].segment_into_trips(ts,
            time_query)
    else:
        segmentation_points = get_combined_segmentation_points(ts, loc_df, time_query,
                                                               filters_in_df,
                                                               filter_methods)
    # Create and store trips and places based on the segmentation points
    if segmentation_points is None:
        epq.mark_segmentation_failed(user_id)
    elif len(segmentation_points) == 0:
        # no new segments, no need to keep looking at these again
        logging.debug("len(segmentation_points) == 0, early return")
        epq.mark_segmentation_done(user_id, None)
    else:
        try:
            create_places_and_trips(user_id, segmentation_points, filter_method_names[filters_in_df[0]])
            epq.mark_segmentation_done(user_id, get_last_ts_processed(filter_methods))
        except:
            logging.exception("Trip generation failed for user %s" % user_id)
            epq.mark_segmentation_failed(user_id)
コード例 #3
0
def segment_current_trips(user_id):
    ts = esta.TimeSeries.get_time_series(user_id)
    time_query = epq.get_time_range_for_segmentation(user_id)

    import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_time_filter as dstf
    import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_dist_filter as dsdf
    dstfsm = dstf.DwellSegmentationTimeFilter(
        time_threshold=5 * 60,  # 5 mins
        point_threshold=9,
        distance_threshold=100)  # 100 m

    dsdfsm = dsdf.DwellSegmentationDistFilter(
        time_threshold=10 * 60,  # 10 mins
        point_threshold=9,
        distance_threshold=50)  # 50 m

    filter_methods = {"time": dstfsm, "distance": dsdfsm}
    filter_method_names = {
        "time": "DwellSegmentationTimeFilter",
        "distance": "DwellSegmentationDistFilter"
    }
    # We need to use the appropriate filter based on the incoming data
    # So let's read in the location points for the specified query
    loc_df = ts.get_data_df("background/filtered_location", time_query)
    if len(loc_df) == 0:
        # no new segments, no need to keep looking at these again
        logging.debug("len(loc_df) == 0, early return")
        epq.mark_segmentation_done(user_id, None)
        return

    out_of_order_points = loc_df[loc_df.ts.diff() < 0]
    if len(out_of_order_points) > 0:
        logging.info("Found out of order points!")
        logging.info("%s" % out_of_order_points)
        # drop from the table
        loc_df = loc_df.drop(out_of_order_points.index.tolist())
        # delete from the database. Should be generally discouraged, so we
        # are kindof putting it in here secretively
        import emission.core.get_database as edb

        out_of_order_id_list = out_of_order_points["_id"].tolist()
        logging.debug("out_of_order_id_list = %s" % out_of_order_id_list)
        edb.get_timeseries_db().remove({"_id": {"$in": out_of_order_id_list}})

    filters_in_df = loc_df["filter"].dropna().unique()
    logging.debug("Filters in the dataframe = %s" % filters_in_df)
    if len(filters_in_df) == 1:
        # Common case - let's make it easy

        segmentation_points = filter_methods[
            filters_in_df[0]].segment_into_trips(ts, time_query)
    else:
        segmentation_points = get_combined_segmentation_points(
            ts, loc_df, time_query, filters_in_df, filter_methods)
    # Create and store trips and places based on the segmentation points
    if segmentation_points is None:
        epq.mark_segmentation_failed(user_id)
    elif len(segmentation_points) == 0:
        # no new segments, no need to keep looking at these again
        logging.debug("len(segmentation_points) == 0, early return")
        epq.mark_segmentation_done(user_id, None)
    else:
        try:
            create_places_and_trips(user_id, segmentation_points,
                                    filter_method_names[filters_in_df[0]])
            epq.mark_segmentation_done(user_id,
                                       get_last_ts_processed(filter_methods))
        except:
            logging.exception("Trip generation failed for user %s" % user_id)
            epq.mark_segmentation_failed(user_id)
コード例 #4
0
def segment_current_trips(user_id):
    ts = esta.TimeSeries.get_time_series(user_id)
    time_query = epq.get_time_range_for_segmentation(user_id)

    import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_time_filter as dstf
    import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_dist_filter as dsdf
    dstfsm = dstf.DwellSegmentationTimeFilter(time_threshold = 5 * 60, # 5 mins
                                              point_threshold = 9,
                                              distance_threshold = 100) # 100 m

    dsdfsm = dsdf.DwellSegmentationDistFilter(time_threshold = 10 * 60, # 10 mins
                                              point_threshold = 9,
                                              distance_threshold = 50) # 50 m

    filter_methods = {"time": dstfsm, "distance": dsdfsm}
    filter_method_names = {"time": "DwellSegmentationTimeFilter", "distance": "DwellSegmentationDistFilter"}
    # We need to use the appropriate filter based on the incoming data
    # So let's read in the location points for the specified query
    loc_df = ts.get_data_df("background/filtered_location", time_query)
    if len(loc_df) == 0:
        # no new segments, no need to keep looking at these again
        logging.debug("len(loc_df) == 0, early return")
        epq.mark_segmentation_done(user_id, None)
        return

    out_of_order_points = loc_df[loc_df.ts.diff() < 0]
    if len(out_of_order_points) > 0:
        logging.info("Found out of order points!")
        logging.info("%s" % out_of_order_points)
        # drop from the table
        loc_df = loc_df.drop(out_of_order_points.index.tolist())
        # delete from the database. Should be generally discouraged, so we
        # are kindof putting it in here secretively
        import emission.core.get_database as edb

        out_of_order_id_list = out_of_order_points["_id"].tolist()
        logging.debug("out_of_order_id_list = %s" % out_of_order_id_list)
        edb.get_timeseries_db().remove({"_id": {"$in": out_of_order_id_list}})

    filters_in_df = loc_df["filter"].dropna().unique()
    logging.debug("Filters in the dataframe = %s" % filters_in_df)
    if len(filters_in_df) == 1:
        # Common case - let's make it easy
        
        segmentation_points = filter_methods[filters_in_df[0]].segment_into_trips(ts,
            time_query)
    else:
        segmentation_points = get_combined_segmentation_points(ts, loc_df, time_query,
                                                               filters_in_df,
                                                               filter_methods)
    # Create and store trips and places based on the segmentation points
    if segmentation_points is None:
        epq.mark_segmentation_failed(user_id)
    elif len(segmentation_points) == 0:
        # no new segments, no need to keep looking at these again
        logging.debug("len(segmentation_points) == 0, early return")
        epq.mark_segmentation_done(user_id, None)
    else:
        try:
            create_places_and_trips(user_id, segmentation_points, filter_method_names[filters_in_df[0]])
            epq.mark_segmentation_done(user_id, get_last_ts_processed(filter_methods))
        except:
            logging.exception("Trip generation failed for user %s" % user_id)
            epq.mark_segmentation_failed(user_id)