Ejemplo n.º 1
0
def test_great_circle_dist():
    """
    test great circle distance funtion
    """
    point1 = (1.3, 103.0)
    # distance from a point to itself is zero
    assert_almost_equal(0.0, great_circle_dist(point1, point1))
    # distance from CREATE to SUTD (rounded to 100ms)
    p_create = (1.303826, 103.773890)
    p_sutd = (1.341221, 103.963234)
    assert_almost_equal(21.5, round(great_circle_dist(p_create, p_sutd), ndigits=1))
    assert_almost_equal(21.5, round(great_circle_dist(p_sutd, p_create), ndigits=1))
Ejemplo n.º 2
0
def find_nearest_station(lat, lon, rtree, threshold=100):
    """Find neares station(s) given a lat/lon position.  Input is a
    lat/lon location and the Rtree spatial index of stations. Return a
    list of tuples. Each tuple contains (station id, distance to the
    station) if the distance is smaller threshold in meters. If there
    is more than one nearest equidistant stations the list contains
    more than one tuple otherwise just one tuple.

    """
    # rtree_entry.object is the string station id
    # NOTE: bounds coordinates are 'interleaved' by default
    # insert as [x, y, x, y] and returned as [xmin, xmax, ymin, ymax]
    # See http://toblerity.org/rtree/tutorial.html
    return [ (rtree_entry.object,
              great_circle_dist((lat,lon),(rtree_entry.bounds[0],
                                           rtree_entry.bounds[2]), unit="meters")) for rtree_entry
             in rtree.nearest((lat,lon,lat,lon), num_results=1,
                              objects=True) if
             great_circle_dist((lat,lon),(rtree_entry.bounds[0],
                                          rtree_entry.bounds[2]), unit="meters") < threshold ]
Ejemplo n.º 3
0
def notWalkSegProcess(hw_mode_refined, ave_vel, delta_t, timestamp, lat, lon, dist, num_pt_total, NOT_STOP_V, IS_STOP_V, TIME_SET_STOPPED, VC_MIN_DIST):
    # function used to pick up all the none walking segments in the given mode vector
    # and call notWalkSegRefine() function to refine those modes
    
    # input: whole modes of the device
    # output: refined modes of the device
    
    # get the start and end idx of non walking segment
    idx_nonwalking = np.where((hw_mode_refined!=MODE_WALK_IN) & (hw_mode_refined!=MODE_WALK_OUT))[0]
    if len(idx_nonwalking)==0:
        num_nonwalking_seg = 0
    else:
        start_idx_nonwalking,end_idx_nonwalking,num_nonwalking_seg = getStartEndIdx(idx_nonwalking)
    
    # go through each not walking segment
    #print "nonwalking_seg:",num_nonwalking_seg
    for i_seg in range(0,num_nonwalking_seg):
        #print i_seg,start_idx_nonwalking[i_seg],end_idx_nonwalking[i_seg]
        timestamp_seg = timestamp[start_idx_nonwalking[i_seg]:end_idx_nonwalking[i_seg]+1]
        
        # check whether it's indoor or outdoor
        if start_idx_nonwalking[i_seg]>0: # set indoor/outdoor as previous state
            if hw_mode_refined[start_idx_nonwalking[i_seg]-1]==MODE_WALK_IN:
                indoor_seg = 1
            elif hw_mode_refined[start_idx_nonwalking[i_seg]-1]==MODE_WALK_OUT:
                indoor_seg = 0
            else:
                indoor_seg = 1 # assign default value
                raise Exception("The segment before this none walking segment is not walking segment!")

        elif end_idx_nonwalking[i_seg]<num_pt_total-1: # set indoor/outdoor as next state
            if hw_mode_refined[end_idx_nonwalking[i_seg]+1]==MODE_WALK_IN:
                indoor_seg = 1
            elif hw_mode_refined[end_idx_nonwalking[i_seg]+1]==MODE_WALK_OUT:
                indoor_seg = 0
            else:
                indoor_seg = 1 # assign default value
                raise Exception("The segment before this none walking segment is not walking segment!")
        else:
            indoor_seg = 1 # assign default value
        
        # refine the modes of none-walking period
        hw_mode_refined_seg = hw_mode_refined[start_idx_nonwalking[i_seg]:end_idx_nonwalking[i_seg]+1]
        v_pt_filtered_seg = ave_vel[start_idx_nonwalking[i_seg]: end_idx_nonwalking[i_seg]+1]
        delta_t_seg = delta_t[start_idx_nonwalking[i_seg]: end_idx_nonwalking[i_seg]+1]
        travel_dist = np.nansum(dist[start_idx_nonwalking[i_seg]: end_idx_nonwalking[i_seg]+1])
        jump_dist = great_circle_dist([lat[start_idx_nonwalking[i_seg]],lon[start_idx_nonwalking[i_seg]]],[lat[end_idx_nonwalking[i_seg]],lon[end_idx_nonwalking[i_seg]]],'meters')

        # call the function to refind the non-walking modes
        hw_mode_refined_seg = notWalkSegRefine(timestamp_seg,hw_mode_refined_seg,v_pt_filtered_seg,delta_t_seg,travel_dist,jump_dist,NOT_STOP_V,IS_STOP_V,indoor_seg,TIME_SET_STOPPED,VC_MIN_DIST)
        # update the modes into the entire trip
        hw_mode_refined[start_idx_nonwalking[i_seg]:end_idx_nonwalking[i_seg]+1] = hw_mode_refined_seg 

    return hw_mode_refined
Ejemplo n.º 4
0
    def distanceFcn(df,lat,lon,tripStart=True,homeIdx=True):
        #finds the index and value of the min distance from a dataframe to a certain point
    
        search_range_school = 50; # m, the radios of searching for the start/end point of a trip (school)
        search_range_home = 0; # m, the radios of searching for the start/end point of a trip (home)

        latlon = df[['WLATITUDE','WLONGITUDE']].values.tolist()
        dist_list = map(lambda x: great_circle_dist(x, [lat,lon], unit="meters"), latlon)
        dist_array = np.array(dist_list)
        if homeIdx:
            # if the location is home, search range is smaller
            idx_in_range = np.where(dist_array<=search_range_home)[0]
            if len(idx_in_range)==0:
                dist = np.nanmin(dist_array)
                idx = dist_list.index(dist)
                if tripStart:
                    dist_col = np.array(dist_list)
                    idx = max(np.where(dist_col== dist)[0])
            else:
                if tripStart:
                    idx = idx_in_range[len(idx_in_range)-1]
                    dist = dist_array[idx]
                else:
                    idx = idx_in_range[0]
                    dist = dist_array[idx]    
        else:
            # if the location is school, search range is larger
            idx_in_range = np.where(dist_array<=search_range_school)[0]
            if len(idx_in_range)==0:
                dist = np.nanmin(dist_array)
                idx = dist_list.index(dist)
                if tripStart:
                    dist_col = np.array(dist_list)
                    idx = max(np.where(dist_col== dist)[0])
            else:
                if tripStart:
                    idx = idx_in_range[len(idx_in_range)-1]
                    dist = dist_array[idx]
                else:
                    idx = idx_in_range[0]
                    dist = dist_array[idx]


        return {'dist':dist,'idx': idx}
Ejemplo n.º 5
0
def trip_segment(data_frame, stopped_thresh=0.5, stopped_dwell=480):
    """Find POI's in the data frame, return a list of POIs and a list of
       indices of the poi points inside data_frame. Implements logic
       from Yuren's Matlab code.

    """
    
    def store_poi(data_frame, idx_buffer, stop_time):
        """Identify the lat/lon location and the indices in the data frame for
        a POI. idx_buffer has the indices in the data frame for this POI.

        """
        # if the total stop time is larger than stopped_dwell,
        # then a poi is detected, otherwise return
        if stop_time <= stopped_dwell:
            return
        # select lat/lon locations for this POI, assignment makes a copy of the rows
        df_stop_all = data_frame.loc[idx_buffer][['WLATITUDE', 'WLONGITUDE']]
        # check if all location are nan
        lat_stop = df_stop_all['WLATITUDE'].values
        if np.all(np.isnan(lat_stop)):
            return
        df_stop_all = df_stop_all.apply(round_values)
        # get the most frequent poi of this stop segment
        df_poi_cnt = df_stop_all.groupby(['WLATITUDE', 'WLONGITUDE']).size()
        poi_lat, poi_lon = df_poi_cnt.idxmax()
        # record poi lat/lon and the indices that correspond to it
        pois['poi_latlon'].append([poi_lat,poi_lon])
        pois['poi_idx'].append(idx_buffer)
        pois['last_time'].append(stop_time)
#        pois[(poi_lat,poi_lon)].extend(idx_buffer)

    # start of trip_segment()
    pois = defaultdict(list)
    round_decimal = 4;
    dist_2comb = 30; # m, distance close to which the two pois are combined
    round_values = partial(pd.Series.round, decimals=round_decimal)
    stop_time = 0
    idx_buffer = []
    for index, row in data_frame.iterrows():
        if row['AVE_VELOCITY'] < stopped_thresh:
            # if stop time is zero we are at the beginning of a new stop
            # reset the idx_buffer
            if stop_time == 0:
                idx_buffer = []
            # remember that we are stopped this index
            idx_buffer.append(index)
            # add the time delta of this row to the time we are stopped here
            stop_time += row['TIME_DELTA']
        else:
            # we are moving, check if we have just concluded a POI
            # and store the POI
            if stop_time > 0:
                store_poi(data_frame, idx_buffer, stop_time)
            stop_time = 0

    # process the last stop after exiting loop
    if stop_time > 0:
        store_poi(data_frame, idx_buffer, stop_time)
    
        # combine the close pois
    pois_latlon = pois['poi_latlon']
    num_pois = len(pois_latlon)
    if num_pois>1:
        cur_poi_latlon = pois_latlon[0]
        unique_pois = defaultdict(list)
        unique_pois['poi_latlon'].append(pois['poi_latlon'][0])
        unique_pois['poi_idx'].append(pois['poi_idx'][0])
        unique_pois['last_time'].append(pois['last_time'][0])
        i_unique_poi = 0
        last_time_prev = 0
        for i_poi in xrange(1,num_pois):
            dist = great_circle_dist(cur_poi_latlon,pois_latlon[i_poi],'meters')
            if dist<dist_2comb:
                # if the two pois are close, combine as one based on the time
                unique_pois['poi_idx'][i_unique_poi].extend(pois['poi_idx'][i_poi])
                unique_pois['last_time'][i_unique_poi]=unique_pois['last_time'][i_unique_poi]+pois['last_time'][i_poi]
                if pois['last_time'][i_poi]>last_time_prev:
                    unique_pois['poi_latlon'][i_unique_poi]=pois['poi_latlon'][i_poi]
                    cur_poi_latlon = pois_latlon[i_poi]
                    last_time_prev=pois['last_time'][i_poi]
            else:
                # if the two pois are far, insert a new unique poi
                i_unique_poi = i_unique_poi+1
                cur_poi_latlon = pois_latlon[i_poi]
                unique_pois['poi_latlon'].append(pois['poi_latlon'][i_poi])
                unique_pois['poi_idx'].append(pois['poi_idx'][i_poi])
                unique_pois['last_time'].append(pois['last_time'][i_poi])
                last_time_prev=pois['last_time'][i_poi]
        
        return unique_pois['poi_latlon'], unique_pois['poi_idx']
    # keys() and values() in a dictionary respect the same order
#    return pois.keys(), pois.values()
    return pois['poi_latlon'], pois['poi_idx']
Ejemplo n.º 6
0
def identify_home_school(pois, idx_of_pois, data_frame, school_start=9,
                         school_end=13, home_start=22, home_end=5,
                         max_school_thresh=100, round_decimals=4,
                         poi_cover_range = 30):
    """Identify home and school locations. Input is a list of point of
    interests tuples. Each tuple has the number of measurements, start
    and end index in the original data frame (starting from zero),
    start and end timestamp, start and end latitude and longitude.
    Find the tuples that fall between start and end time and average
    the locations. Return two tuples representing home (lat, lon) and
    school (lat, lon) locations. If there are no POIs identified for
    home or school, the tuple will be (None, None).

    pois is a list of all the unique pois generated by trip_segment
    idx_of_pois is a list of all the indices of the correspoing pois
    data_frame is the pandas data frame with the original data.
    school_start is the hour of the day when school starts. Default 9am.
    school_end is the hour of the day when school end. Default 1pm.
    home_start is the first hour of the day when students are assumed to be
    home at night. Default 10pm.
    home_end is the last hour of the day when students are assumed to be
    home at night. Default 5am.
    max_school_thresh is the threshold for school/home distances smaller than
    which the home/school poi is rejected - to negate the possibility
    of creating home/school links for sensors left at school by mistake
    round_decimals is the number of decimals used in the max_freq
    heuristic for rounding lat / lon values before taking the most
    frequent value
    time_offset is the offset in hours to subtract from each timestamp. Default 8.

    """

    ########## identify home/school completely same as MATLAB version ############
    # check the length of pois
    if len(pois)<1:
        logging.info("No poi is passed in")
        return (None, None), (None, None)

    # get out time data
    all_timestamp = data_frame[['TIMESTAMP']].values
    all_delta_time = data_frame[['TIME_DELTA']].values

    # initialization
    time_at_school = []
    time_at_home = []
    lat_pois = []
    lon_pois = []

    # go though each poi
    for i_poi in xrange(0,len(pois)):
        poi= pois[i_poi]
        idx_of_poi = idx_of_pois[i_poi]

        # lat/lon values to return
        lat_pois.append(poi[0])
        lon_pois.append(poi[1])

        # calculate the distance between each point and the poi
        latlon = data_frame[['WLATITUDE', 'WLONGITUDE']].values
        dist_to_poi = map(lambda x: great_circle_dist(x, poi, unit="meters"), latlon)

        # find the indices of points that are near to this poi
        idx_near_poi = [x for x in xrange(0,len(dist_to_poi)) if dist_to_poi[x]<=poi_cover_range]
#        idx_near_poi = np.where(np.array(dist_to_poi)<=poi_cover_range)[0].tolist() # another way to find the indices, has warning

        # combine the indices found with the earlier poi indices
        idx_of_pois_new = np.unique(idx_of_poi+idx_near_poi).tolist()

        poi_timestamp = all_timestamp[idx_of_pois_new]
        poi_delta_time = all_delta_time[idx_of_pois_new]
        poi_hourtime = np.array(map(lambda x: get_hour_SGT(x), poi_timestamp))

        # find the idx of points among current idx_poi which fit school time range
        idx_at_school = np.where((poi_hourtime >= school_start) & (poi_hourtime < school_end))[0]
        time_at_school.append(np.nansum(poi_delta_time[idx_at_school]))

        # find the idx of points among current idx_poi which fit home time range
        idx_at_home = np.where((poi_hourtime < home_end) | (poi_hourtime >= home_start))[0]
        time_at_home.append(np.nansum(poi_delta_time[idx_at_home]))

    if len(time_at_school)==0 and len(time_at_home)==0:
        raise Exception("No poi is passed in!")
        return (None, None), (None, None)
    else:
        # get school
        max_sch_cnt = max(time_at_school)
        idx_max_sch_cnt = np.argmax(time_at_school)
        # get home
        max_home_cnt = max(time_at_home)
        idx_max_home_cnt = np.argmax(time_at_home)
        
        # if max_sch_cnt and max_home_cnt are all zero, get all loc as None
        if max_sch_cnt==0 and max_home_cnt==0:
            return (None, None), (None, None)
        # if the same pois is detected as home or school
        # decide by the school/home time
        elif idx_max_sch_cnt==idx_max_home_cnt:
            if max_sch_cnt > max_home_cnt:
                school_lat = lat_pois[idx_max_sch_cnt]
                school_lon = lon_pois[idx_max_sch_cnt]
                home_lat = np.nan
                home_lon = np.nan
            elif max_sch_cnt < max_home_cnt:
                home_lat = lat_pois[idx_max_home_cnt]
                home_lon = lon_pois[idx_max_home_cnt]
                school_lat = np.nan
                school_lon = np.nan
            else:
                return (None, None), (None, None)
                
        else:
            # only if there are hits for school time, assign school
            if max_sch_cnt>0:
                school_lat = lat_pois[idx_max_sch_cnt]
                school_lon = lon_pois[idx_max_sch_cnt]
            else:
                school_lat = np.nan
                school_lon = np.nan
            # only if there are hits for home time, assign home
            if max_home_cnt>0:
                home_lat = lat_pois[idx_max_home_cnt]
                home_lon = lon_pois[idx_max_home_cnt]
            else:
                home_lat = np.nan
                home_lon = np.nan
        
        # sort out home/school pairings < YY km away - these are anomolies
        school_home_dist = great_circle_dist([school_lat,school_lon],[home_lat,home_lon],unit="meters")
        if school_home_dist < max_school_thresh:
            logging.info("home school distance:" + str(school_home_dist))            
            return (None, None), (None, None)

        if ~np.isnan(home_lat) and ~np.isnan(school_lat):
            return (home_lat, home_lon), (school_lat, school_lon)
        elif ~np.isnan(home_lat) and np.isnan(school_lat):
            return (home_lat, home_lon), (None, None)
        elif np.isnan(home_lat) and ~np.isnan(school_lat):
            return (None, None), (school_lat, school_lon)
        else:
            return (None, None), (None, None)
Ejemplo n.º 7
0
    def segFind(df, trip_return, mode_thresh=120, isAM = True, dist_lim = 45.3, max_mode=6, max_walk=4.0):
        #    definition of mode code
        MODE_WALK_IN = 3;
        MODE_WALK_OUT = 2;
        MODE_STOP_IN = 1;
        MODE_STOP_OUT = 0;
        MODE_CAR = 6;
        MODE_TRAIN = 4;
        
        # thresholds for calculating distance of mode segs
        ALL_WALK_TIME=15*60   # time shorter than which the distance of walk mode segment is calculated using all points
        real_to_jump_dist = 2;  # for short walking seg, limit the distance by 2 times the jump distance

        pred_modes = df[['PRED_MODE']].values[:,0] # take out the predicted modes
        # change all STOP_IN, STOP_OUT, WALK_OUT to WALK_IN
        pred_modes[(pred_modes==MODE_STOP_IN) | (pred_modes==MODE_STOP_OUT) | (pred_modes==MODE_WALK_OUT)]=MODE_WALK_IN
        mode_segs = list(chunks(pred_modes,True)) # take the mode chunks
        num_mode_segs = len(mode_segs)
        num_valid_mode_seg = 0
        logging.debug("Mode Segs: " + str(mode_segs))
        time_span = []
        valid_mode_segs = []
        prev_mode = 0

        # go through each mode chunk
        for mode_seg in mode_segs:
            time_span = np.sum(df['TIME_DELTA'].values[mode_seg[0]:mode_seg[1]])

            # abandon if the total segment time is less than threshold, and shorten the list down to 5 mode segments at most
            if time_span < mode_thresh or num_valid_mode_seg > max_mode-1:
                continue
            else:
                latlon_start = [df['WLATITUDE'].values[mode_seg[0]],df['WLONGITUDE'].values[mode_seg[0]]]
                latlon_end = [df['WLATITUDE'].values[mode_seg[1]-1],df['WLONGITUDE'].values[mode_seg[1]-1]]
                jump_dist = great_circle_dist(latlon_start,latlon_end,'meters')
                num_valid_mode_seg += 1
                if isAM:
                    mode_key = 'am_mode'
                    dist_key = 'am_distance'
                else:
                    mode_key = 'pm_mode'
                    dist_key = 'pm_distance'
                
                # calculate the distance of this mode segment
                if int(mode_seg[2]) == MODE_WALK_IN:
                    modes_cur_seg = df['PRED_MODE'].values[mode_seg[0]:mode_seg[1]]
                    dist_cur_seg = df['DISTANCE_DELTA'].values[mode_seg[0]:mode_seg[1]]
                    if time_span<ALL_WALK_TIME:
                        # if the time span is too small, consider all 0-3 modes as walking
                        dist_seg = np.nansum(dist_cur_seg)
                        dist_seg=checkDist(dist_seg,jump_dist*real_to_jump_dist)
                    else:
                        # else if the time span is not too small, only consider 2 and 3 modes
                        dist_seg = np.nansum(dist_cur_seg[np.where((modes_cur_seg==MODE_WALK_IN) | (modes_cur_seg==MODE_WALK_OUT))[0]])
                        dist_seg=checkDist(dist_seg,max_walk*1000)
                else:
                    dist_seg = np.nansum(df['DISTANCE_DELTA'].values[mode_seg[0]:mode_seg[1]])
                if dist_seg==0 or np.isnan(dist_seg):
                    # filter out the zero or nan values of dist_seg
                    continue
                if mode_seg[2]==prev_mode:
                    # if the current mode is same to the previous one, combine the two distance
                    prev_dist = trip_return[dist_key][len(trip_return[dist_key])-1]
                    cur_dist = checkDist((prev_dist*1000+dist_seg) / 1000,dist_lim)
                    trip_return[dist_key][len(trip_return[dist_key])-1]=cur_dist
                    continue
                trip_return[mode_key].append(int(mode_seg[2])) # append the mode
                trip_return[dist_key].append(checkDist(dist_seg / 1000,dist_lim))
                prev_mode = mode_seg[2]

        return num_valid_mode_seg
Ejemplo n.º 8
0
 def pointDist(row, lat,lon):
     vlat = row['WLATITUDE']
     vlon = row['WLONGITUDE']
     return great_circle_dist([vlat,vlon],[lat,lon])
Ejemplo n.º 9
0
def modeSmooth(hw_mode,timestamp,delta_t,lat,lon,vel,ave_vel,delta_steps,dist):
#     this function smooths the hw_mode code
#     Output: s_hw_mode : smoothed hw_code
#     Input: 
#        - hw_mode: a vector of hw_code
#        - timestamp: a vector of timestamp
#        - lat,lon: vectors of lat and lon representing location
#        - vel: vector of geographical velocity, m/s
#        - ave_vel: 5-window moving average of geographical velocity, m/s
    
    NUM_AFT_WALKING = 3 # num of points after walking segment to be set as invalid hw mode
    TIME_NOT_HIDE = 60*5 # sec, time longer than which the several points after each walking segment won't be set as TBD
    TIME_SET_STOPPED = 60*1 # sec, time shorter than which the not walking seg is set as stopped
    NOT_STOP_V = 5.0 # m/s, mean velocity above which the not walking seg is considered as not stopped
    IS_STOP_V = 1.0 # m/s, mean velocity below which the not walking seg is considered as stopped
    WALK_MAX_V_AVE = 7.0  # m/s, moving average velocity above which it's considered as TBD
    WALK_MAX_V_PT = 7.0  # m/s, single point velocity above which it's considered as TBD
    SINGLE_WALK_MAX_V = 1.5 # m/s, single point velocity above which it's considered as TBD for single walking point
    SLEEPING_TIME = 60  # s, time larger than which the mode is check and reset
    SLEEP_MAX_V = 2.0 # m/s, vel_ave or vel above which the sleeping point will be assigned as TBD_VC
    SLEEP_TO_WALK_STEPS = 1 # steps to time ratio below which the sleeping point will be assigned as stopped
    TBD_VC_TIME = 200 # s, time above which the point will be considered as vehicle mode
    TBD_VC_DIST = 300 # m, distance above which the point will be considered as vehicle mode
    SHORT_WALK = 200 # s, time below which the walking segment between two vehicle seg will be considered as invalid
    FEW_STEPS = 50 # steps below which the walking segment between two vehicle seg will be considered as invalid
    SHORT_WALK_MAX_V = 1.5    # m/s, single point velocity above which it's considered as TBD for single walking point
    VC_MIN_DIST = 100 #m, distance smaller than which the vehicle mode segment is needed to reprocess    
    WALK_IN_MAX_DIST = 150  #m, jump distance larger than which the walking mode segment is considered as outdoor
    
    #mode representation

#    MODE_WALK_IN = 3
#    MODE_WALK_OUT = 2
#    MODE_STOP_OUT = 0
#    MODE_STOP_IN = 1
#    MODE_TBD = 10
#    MODE_TBD_VC = 11

    # initialization
    num_pt_total = len(hw_mode) # total number of points in this trip
    hw_mode_refined = hw_mode.copy() # initialize the refined mode vector
    
    
    # check the long delta timestamp points
    # assign points with long delta timestamp but low velocity as "stopped indoor"
    idx_sleep = np.where(delta_t>SLEEPING_TIME)[0].tolist()
    prev_i_sp = 0
    for i_sp in idx_sleep:
        if ave_vel[i_sp] > SLEEP_MAX_V or vel[i_sp] > SLEEP_MAX_V:
            hw_mode_refined[i_sp] = MODE_TBD_VC
            if i_sp<num_pt_total-1:
                hw_mode_refined[i_sp+1] = hw_mode_refined[i_sp]
        elif delta_t[i_sp]>TBD_VC_TIME and delta_t[i_sp]*vel[i_sp]>TBD_VC_DIST:
            hw_mode_refined[i_sp] = MODE_TBD_VC
            if i_sp<num_pt_total-1:
                hw_mode_refined[i_sp+1] = hw_mode_refined[i_sp]
        elif (delta_steps[i_sp]/delta_t[i_sp]) < SLEEP_TO_WALK_STEPS:
            if hw_mode_refined[i_sp]==MODE_WALK_OUT:
                hw_mode_refined[i_sp] = MODE_STOP_OUT
                if delta_t[i_sp]>500:
                    if i_sp>0 and i_sp-1!=prev_i_sp:
                        hw_mode_refined[i_sp-1] = MODE_WALK_OUT
                        vel[i_sp-1] = 0
                        ave_vel[i_sp-1] = 0
                    if i_sp<num_pt_total-1:
                        if delta_t[i_sp+1]<SLEEPING_TIME:
                            hw_mode_refined[i_sp+1] = MODE_WALK_OUT
                            vel[i_sp+1] = 0
                            ave_vel[i_sp+1] = 0
            else:
                hw_mode_refined[i_sp] = MODE_STOP_IN
                if delta_t[i_sp]>500:
                    if i_sp>0 and i_sp-1!=prev_i_sp:
                        hw_mode_refined[i_sp-1] = MODE_WALK_IN
                        vel[i_sp-1] = 0
                        ave_vel[i_sp-1] = 0
                    if i_sp<num_pt_total-1:
                        if delta_t[i_sp+1]<SLEEPING_TIME:
                            hw_mode_refined[i_sp+1] = MODE_WALK_IN
                            vel[i_sp+1] = 0
                            ave_vel[i_sp+1] = 0
                        
        prev_i_sp = i_sp
    
    # refine the walking mode points by checking the moving average of velocity
    idx_walking = np.where((hw_mode_refined == MODE_WALK_IN) | (hw_mode_refined == MODE_WALK_OUT))[0].tolist()
    for i_walk in idx_walking:
        if ave_vel[i_walk] > WALK_MAX_V_AVE or vel[i_walk] > WALK_MAX_V_PT:
            hw_mode_refined[i_walk] = MODE_TBD_VC
            
    idx_walking = np.where((hw_mode_refined == MODE_WALK_IN) | (hw_mode_refined == MODE_WALK_OUT))[0].tolist()
    
#     get the start and end idx of each walking segment
    if(len(idx_walking)==0):
        num_walking_seg = 0
    else:
        start_idx_walking,end_idx_walking,num_walking_seg = getStartEndIdx(idx_walking)
        # check the single walking point, if vel>3m/s, set as TBD_VC
        idx_single_walking = list(set(start_idx_walking).intersection(end_idx_walking))
        for i_sw in idx_single_walking:
            if ave_vel[i_sw] > SINGLE_WALK_MAX_V or vel[i_sw] > SINGLE_WALK_MAX_V:
                hw_mode_refined[i_sw] = MODE_TBD_VC 
                start_idx_walking.remove(i_sw)
                end_idx_walking.remove(i_sw)
                num_walking_seg = num_walking_seg-1
                
        # go through each walking segment and change indoor to outdoor if dist larger than a threshold
        for i_walk_seg in xrange(0,num_walking_seg):
            jump_dist = great_circle_dist([lat[start_idx_walking[i_walk_seg]],lon[start_idx_walking[i_walk_seg]]],[lat[end_idx_walking[i_walk_seg]],lon[end_idx_walking[i_walk_seg]]],'meters')
            if jump_dist>WALK_IN_MAX_DIST:
                walk_seg_length = end_idx_walking[i_walk_seg]+1-start_idx_walking[i_walk_seg]
                hw_mode_refined[start_idx_walking[i_walk_seg]:end_idx_walking[i_walk_seg]+1] = np.ones(walk_seg_length)*MODE_WALK_OUT
        
#    # go through each walking segment
#    # modify modes of the several pts before and after the walking segment
#    # updated in hw_mode_trip_refined and idx_walking_trip
#    #print "walking_seg:",num_walking_seg
#    for i_seg in xrange(0,num_walking_seg):
#        if(i_seg<num_walking_seg-1):
#            start_next_seg = start_idx_walking[i_seg+1]
#        else:
#            start_next_seg = num_pt_total
#        
#        #print start_idx_walking[i_seg],end_idx_walking[i_seg]
#
#        if (end_idx_walking[i_seg]+NUM_AFT_WALKING < start_next_seg) and (timestamp[end_idx_walking[i_seg]+NUM_AFT_WALKING]-timestamp[end_idx_walking[i_seg]] < TIME_NOT_HIDE):
#            # make several pts after walking seg as MODE_TBD            
#             hw_mode_refined[end_idx_walking[i_seg]+1:end_idx_walking[i_seg]+NUM_AFT_WALKING+1] = MODE_TBD 
#        elif(timestamp[start_next_seg-1]-timestamp[end_idx_walking[i_seg]] < TIME_NOT_HIDE):
#            hw_mode_refined[end_idx_walking[i_seg]+1:start_next_seg] = MODE_TBD 

    hw_mode_refined = notWalkSegProcess(hw_mode_refined, ave_vel, delta_t, timestamp, lat, lon, dist, num_pt_total, NOT_STOP_V, IS_STOP_V, TIME_SET_STOPPED, VC_MIN_DIST)    
    
    # try to combine mode segments like: vehicle + stop/walk + vehicle
    temp_modes = np.array(hw_mode_refined.copy())
    temp_modes[(temp_modes==MODE_STOP_IN) | (temp_modes==MODE_STOP_OUT) | (temp_modes==MODE_WALK_OUT)]=MODE_WALK_IN
    mode_segs = list(chunks(temp_modes,True)) # take the mode chunk
    num_mode_segs = len(mode_segs)
    
    # go through each mode chunk
    for i_seg in xrange(1,num_mode_segs-1):
        mode_seg = mode_segs[i_seg]
        mode_seg_prev = mode_segs[i_seg-1]
        mode_seg_aft = mode_segs[i_seg+1]
        # check the steps and average velocity of walking seg between two vehicle seg
        if mode_seg[2]==MODE_WALK_IN:
            if mode_seg_prev[2]!=MODE_WALK_IN and mode_seg_aft[2]!=MODE_WALK_IN:
                time_span = np.sum(delta_t[mode_seg[0]:mode_seg[1]])
                tot_steps = np.nansum(delta_steps[mode_seg[0]:mode_seg[1]])
                v_mean_mode_seg = aveVelCalc(ave_vel[mode_seg[0]:mode_seg[1]], delta_t[mode_seg[0]:mode_seg[1]])
                if time_span<SHORT_WALK and (tot_steps<FEW_STEPS or v_mean_mode_seg>SHORT_WALK_MAX_V):
                    hw_mode_refined[mode_seg[0]:mode_seg[1]] = MODE_TBD_VC
            
                    
    hw_mode_refined = notWalkSegProcess(hw_mode_refined, ave_vel, delta_t, timestamp, lat, lon, dist, num_pt_total, NOT_STOP_V, IS_STOP_V, TIME_SET_STOPPED, VC_MIN_DIST)
    
    return hw_mode_refined
Ejemplo n.º 10
0
    def process(nid, analysis_date):
        """Process device nid for given date (%Y-%m-%d) and save the results
        to the backend API. Return pandas data frame with the device
        data, the predicted travle modes, identified trips, home
        location and school location

        """
        # get analysis status of that device, skip device if already processed
#        if getStatus(url, nid, analysis_date):
#            logging.info("STATUS = 1, ALREADY PROCESSED FOR NODE: %d" % nid)
#            return

        # convert analysis_date into unix timestamp in UTC time
        analysis_unix = calendar.timegm(analysis_date_tuple.timetuple())

        # get the starting and end indices for querying the data, for pilot2, pilot3 and synthetic data only
#        start_get = 0 #int(getFirstSecondOfDay(analysis_unix-8*3600)) #first second of the analysis day
#        end_get = 1443154915 #int(start_get+24*3600-1) #last second of the analysis day

#        start_get = int(getFirstSecondOfDay(analysis_unix)) #first second of the analysis day
#        start_get += 8*3600 # change utc to sgt, for pilot2, pilot3 and synthetic data only
#        start_get += 12*3600 # starting the query at 12 pm
#        end_get = int(start_get+24*3600-1) #last second of the analysis day
    #    start_get += 8*3600 # for 603447 and 603815 only

        start_get = int(getFirstSecondOfDay(analysis_unix))+12*3600 #12 pm of the analysis day
        end_get = int(start_get+24*3600-1) #12 pm of the day after the analysis day

        # retrieve unprocessed device data from the backend
        logging.info("Get data for device %d on the day %s" % (nid, analysis_date))
        data_frame = getData(url, nid, start_get, end_get)

#        num_pt = len(data_frame)
#        logging.debug("There are %d points in the data base for %d" % (num_pt, nid))
#        time_start = pd.to_datetime(data_frame['TIMESTAMP'].values[0]+8*3600,unit='s')
#        logging.debug("The starting time of this device's data: %f" % (time_start))
#        time_end = pd.to_datetime(data_frame['TIMESTAMP'].values[0]+8*3600,unit='s')
#        timespan = data_frame['TIMESTAMP'].values[num_pt-1]-data_frame['TIMESTAMP'].values[0]
#        local_ts = ts_date+28800 # add offset to change to SGT
#        local_ts_date = pd.to_datetime(local_ts,unit='s')  # convert local time in second to local datetime


        if data_frame is None:
            logging.info("No data returned for device %d, skip." % nid)
            return
        elif len(data_frame)<10:
            # if the data frame size is smaller than a certain threshold, then abandon the data
            logging.warning("Too little data returned for device %d, skip." % nid)
            return

        # clean data to reduce noise
        logging.info("Clean data for device %d" % nid)
        clean_data(data_frame,
                   valid_lat_low=valid_lat_low,
                   valid_lat_up=valid_lat_up,
                   valid_lon_low=valid_lon_low,
                   valid_lon_up=valid_lon_up,
                   location_accuracy_thresh=location_accuracy_thresh)
        # calculate additional features
        logging.info("Calculate features for device %d" % nid)
        calculate_features(data_frame, high_velocity_thresh=high_velocity_thresh)
        # predict the travel mode for each measurement
        logging.info("Predict modes for device %d" % nid)
        hw_modes = data_frame['MODE'].values
        smooth_modes = smooth_heuristic.predict(data_frame, hw_modes)
#        predicted_modes = smooth_modes
        predicted_modes = train_heuristic.predict(data_frame, smooth_modes)
        predicted_modes = bus_heuristic.predict(data_frame, predicted_modes)
#        predicted_modes = bus_heuristic.predict(data_frame, smooth_modes)
        # identify trips from the data
        trips, home_loc, school_loc = tripParse.process(predicted_modes, data_frame,
                                                      stopped_thresh=stopped_thresh,
                                                      poi_dwell_time=poi_dwell_time,
                                                      school_start=school_start,
                                                      school_end=school_end,
                                                      home_start=home_start,
                                                      home_end=home_end,
                                                      max_school_thresh = max_school_thresh,
                                                      home_school_round_decimals=home_school_round_decimals,
                                                      mode_thresh=mode_thresh,
                                                      poi_cover_range = poi_cover_range)
        logging.warning("NID: " + str(nid) + "; HOME: " + str(home_loc))
        logging.warning("NID: " + str(nid) + "; SCHOOL: " + str(school_loc))
        logging.warning("NID: " + str(nid) + "; TRIPS: " + str(trips))
        logging.info("Save modes for device %d" % nid)

        if home_loc!=(None,None) and school_loc!=(None,None):
            school_home_dist = great_circle_dist([school_loc[0],school_loc[1]],[home_loc[0],home_loc[1]],unit="meters")
            valid_loc_nid.append(nid)
            valid_loc_info.append({'home loc':home_loc,'school loc':school_loc,'distance':school_home_dist})

        nids_record.append(nid)
        am_modes_record.append(trips['am_mode'])
        pm_modes_record.append(trips['pm_mode'])
        # save detected mode to backend
#        timestamps = data_frame['TIMESTAMP'].values

#        modes_saved = saveMode(url, nid, timestamps, predicted_modes)
#        # save trips to backend. only save if AM or PM mode was detected
#        logging.info("Save trips for device %d" % nid)
#        logging.info("TRIP SAVE:\n %s" % str(trips))
#        trips_saved = saveTrips(url, nid, analysis_date, trips)
#        # if both mode save actions are successful, set the analysis flag to success
#        saved_status = 1 if modes_saved and trips_saved else 0
#        setStatus(url, nid, analysis_date, saved_status)

        return data_frame, predicted_modes, trips, home_loc, school_loc
Ejemplo n.º 11
0
def calculate_features(data_frame, high_velocity_thresh=40):
    """Calculate additional features and attributes from the raw hardware
    data. New attributes are added as new columns in the data frame in
    place.

    high_velocity_thresh : maximum threshold for velocities in m/s,
                           higher values are rejected. Default 40m/s
                           (= 144 km/h)
    """

    # calculate time delta since the last measurement, in seconds
    consec_timestamps = izip(data_frame[['TIMESTAMP']].values[:-1], data_frame[['TIMESTAMP']].values[1:])
    delta_timestamps = map(lambda x: x[1][0]-x[0][0], consec_timestamps)
    # add a zero value for the first measurement where no delta is available
    delta_timestamps = [0] + delta_timestamps
    data_frame['TIME_DELTA'] = pd.Series(delta_timestamps)

    # calculate steps delta since the last measurement
    consec_steps = izip(data_frame[['STEPS']].values[:-1], data_frame[['STEPS']].values[1:])
    delta_steps = map(lambda x: x[1][0]-x[0][0], consec_steps)
    # add a zero value for the first measurement where no delta is available
    data_frame['STEPS_DELTA'] = pd.Series([0] + delta_steps)

    # select rows in data frame that have valid locations
    df_validloc = data_frame.loc[~np.isnan(data_frame['WLATITUDE']) & ~np.isnan(data_frame['WLONGITUDE'])]
    # calculate distance delta from pairs of valid lat/lon locations that follow each other
    valid_latlon = df_validloc[['WLATITUDE', 'WLONGITUDE']].values
#    dist_delta = map(lambda loc_pair: great_circle_dist(np.floor(loc_pair[0]*10000)/10000, np.floor(loc_pair[1]*10000)/10000, unit="meters"), izip(valid_latlon[:-1], valid_latlon[1:]))
    dist_delta = map(lambda loc_pair: great_circle_dist(np.round(loc_pair[0],4), np.round(loc_pair[1],4), unit="meters"), izip(valid_latlon[:-1], valid_latlon[1:]))
    dist_delta2 = map(lambda loc_pair: great_circle_dist(loc_pair[0], loc_pair[1], unit="meters"), izip(valid_latlon[:-1], valid_latlon[1:]))

    # calculate time delta from pairs of valid timestamps
    valid_times = df_validloc['TIMESTAMP'].values
    time_delta = valid_times[1:] - valid_times[:-1]
    # calculate velocity, m/s
    velocity = dist_delta / time_delta
    velocity2 = dist_delta2 / time_delta

    # create new columns for delta distance, time delta and velocity, initialzied with NaN
    data_frame['DISTANCE_DELTA'] = pd.Series(dist_delta, df_validloc.index[1:])  # distance in m
    data_frame['DISTANCE_DELTA2'] = pd.Series(dist_delta2, df_validloc.index[1:])  # distance in m
    data_frame['VELOCITY'] = pd.Series(velocity, df_validloc.index[1:]) # velocity in m/s
    data_frame['VELOCITY2'] = pd.Series(velocity2, df_validloc.index[1:]) # velocity in m/s

    # replace very high velocity values which are due to wifi
    # localizations errors with NaN in VELOCITY column
    label_too_high_vel = data_frame['VELOCITY'] > high_velocity_thresh
    idx_too_high = label_too_high_vel[label_too_high_vel==True].index.tolist()
    idx_bef_too_high = (np.array(idx_too_high)-1).tolist()
    data_frame.loc[idx_too_high,['WLATITUDE', 'WLONGITUDE','DISTANCE_DELTA','VELOCITY']] = np.nan
    data_frame.loc[idx_bef_too_high,['WLATITUDE', 'WLONGITUDE','DISTANCE_DELTA','VELOCITY']] = np.nan

    # calculate the moving average of velocity, m/s
    LARGE_TIME_JUMP = 60
    window_size = 5
    velocity_all = data_frame['VELOCITY'].values
    ave_velocity_all = []
    for idx in xrange(0,len(velocity_all)):
        if idx<window_size:
            ave_velocity_all.append(aveWithNan(velocity_all[0:idx]))
        else:
            ave_velocity_all.append(aveWithNan(velocity_all[idx-window_size+1:idx]))
    ave_velocity_all = np.array(ave_velocity_all)
    # set moving average velocity of large time jump points as point velocity
    idx_large_jump = np.where(np.array(delta_timestamps)>LARGE_TIME_JUMP)[0].tolist()
    ave_velocity_all[idx_large_jump] = velocity_all[idx_large_jump]
    data_frame['AVE_VELOCITY'] = pd.Series(ave_velocity_all.tolist()) # velocity in m/s

    # calculate the moving average of velocity, m/s
    window_size = 5
    velocity_all2 = data_frame['VELOCITY2'].values
    ave_velocity_all2 = []
    for idx in xrange(0,len(velocity_all2)):
        if idx<window_size:
            ave_velocity_all2.append(aveWithNan(velocity_all2[0:idx]))
        else:
            ave_velocity_all2.append(aveWithNan(velocity_all2[idx-window_size+1:idx]))
    ave_velocity_all2 = np.array(ave_velocity_all2)
    idx_large_jump = np.where(np.array(delta_timestamps)>LARGE_TIME_JUMP)[0].tolist()
    ave_velocity_all2[idx_large_jump] = velocity_all2[idx_large_jump]
    data_frame['AVE_VELOCITY2'] = pd.Series(ave_velocity_all2.tolist()) # velocity in m/s

    # calculate the moving average of steps
    window_size = 5
    delta_steps_all = data_frame['STEPS_DELTA'].values
    ave_delta_steps_all = []
    for idx in xrange(0,len(delta_steps_all)):
        if idx<window_size:
            ave_delta_steps_all.append(aveWithNan(delta_steps_all[0:idx]))
        else:
            ave_delta_steps_all.append(aveWithNan(delta_steps_all[idx-window_size+1:idx]))

    data_frame['AVE_STEPS'] = pd.Series(ave_delta_steps_all) # moving average of steps