def spatial_selection(rs, re, ds, de): if not same_direction(rs, re, ds, de, 30): return False lambda_r = haversine_distance(rs, re) / 2 * 1.3 middle_p = Point((rs.lat + re.lat) / 2, (rs.lon + re.lon) / 2, None) if lambda_r > haversine_distance( middle_p, ds) and lambda_r > haversine_distance(middle_p, de): return False return True
def staypoints_geolife(traj): time_thresh = 30 * 60 dist_thresh = 250 staypoints = list() i, i_max = 0, len(traj) while i < i_max: j = i + 1 token = 0 while j < i_max: dist = haversine_distance(traj[i], traj[j]) if dist > dist_thresh: delta_time = traj[j].datetime - traj[i].datetime if delta_time.total_seconds() > time_thresh: mean_point = np.mean([[p.lat, p.lon] for p in traj[i:j + 1]], axis=0) arrival_time = traj[i].datetime leave_time = traj[j].datetime staypoints.append( [mean_point, arrival_time, leave_time, i, j]) i = j token = 1 break j = j + 1 if not token == 1: i = i + 1 return staypoints
def make_df(trajs): data = list() for traj in trajs: if haversine_distance(traj[0], traj[-1]) < 100: continue fp = traj[0] sps = staypoints_geolife(traj) lp = traj[-1] if len(sps) > 1: data.append([ fp.lat, fp.lon, fp.datetime, sps[0][0][0], sps[0][0][1], sps[0][1] ]) for i in range(1, len(sps) - 1): data.append([ sps[i][0][0], sps[i][0][1], sps[i][1], sps[i + 1][0][0], sps[i + 1][0][1], sps[i + 1][2] ]) data.append([ sps[-1][0][0], sps[-1][0][1], sps[-1][2], lp.lat, lp.lon, lp.datetime ]) else: data.append( [fp.lat, fp.lon, fp.datetime, lp.lat, lp.lon, lp.datetime]) df = pd.DataFrame(data, columns=[ 'start_lat', 'start_lon', 'start_date', 'end_lat', 'end_lon', 'end_date' ]) df = df.set_index(pd.DatetimeIndex(df['start_date'])).sort_index() return df
def resolve_endcluster(train, p): end_clusters = { Point(lat, lon, None): end for lat, lon, end in zip(train['end_lat'], train['end_lon'], train['end_cluster']) } end_cluster = None min_dist = 400 for ec_point in end_clusters: dist = haversine_distance(p.lat, p.lon, ec_point.lat, ec_point.lon) if dist < min_dist: min_dist = dist end_cluster = end_clusters[ec_point] return end_cluster