def drawACRBox(DBPath): """ car,bus,bike,walk,train,plane """ conn = DB.get_conn(DBPath) # find all segments fetSql = 'select ACR,true_class from GPS_segments' # data = DB.fetchAll(conn, fetSql) data = pd.read_sql(fetSql, conn) DB.closeDB(conn) carSet = data[data['true_class'].isin(['car'])] busSet = data[data['true_class'].isin(['bus'])] bikeSet = data[data['true_class'].isin(['bike'])] walkSet = data[data['true_class'].isin(['walk'])] trainSet = data[data['true_class'].isin(['train'])] planeSet = data[data['true_class'].isin(['plane'])] ACRpd = pd.DataFrame({'car': carSet[carSet.ACR < 0.8].ACR, 'bus': busSet[busSet.ACR < 0.8].ACR, 'bike': bikeSet[bikeSet.ACR < 0.8].ACR, 'walk': walkSet[walkSet.ACR < 0.8].ACR, 'train': trainSet[trainSet.ACR < 0.8].ACR, 'plane': planeSet[planeSet.ACR < 0.8].ACR}) ax = sns.boxplot(data=ACRpd, order=['car', 'bus', 'bike', 'walk', 'train', 'plane']) # ax.set_xlabel('Transportation Mode') # ax.set_ylabel('ACR') ax.set_xlabel('出行方式', fontproperties=font) ax.set_ylabel('单位距离内加速度变化率 (ACR)', fontproperties=font) plt.show(ax)
def getTransitionMatrix(DBPath): """ get transition matrix of transportation mode """ conn = DB.get_conn(DBPath) fetchSql = 'select id,trip_id,segment_id,true_class from GPS_segments' # data = pd.read_sql(fetchSql, conn) data = DB.fetchAll(conn, fetchSql) print(data) DB.closeDB(conn) modePd = pd.DataFrame({ 'car': [0], 'bus': [1], 'bike': [2], 'walk': [3], 'train': [4], 'plane': [5] }) modeMatrix = np.zeros((6, 6), dtype=np.int64) pre = data[0] index = 1 while index < len(data): cur = data[index] if cur[2] == 1: pre = cur index += 1 continue else: index1 = modePd[pre[3]][0] index2 = modePd[cur[3]][0] modeMatrix[index1][index2] = modeMatrix[index1][index2] + 1 pre = cur index += 1 print(modeMatrix)
def getSegFea(DBPath): """ get all segment with features, for example: [(id, user_id, trip_id, segment_id, start_point_id, end_point_id, point_num, distance, _85thV, MaxV1, MaxV2, MedianV, MinV, MeanV, Ev, Dv, HVR, MVR, LVR, _85thA, MaxA1, MaxA2, MedianA, MinA, MeanA, Ea, Da, HAR, MAR, LAR, TS, ACR, BSR, ACP, HCR, SR, VCR, is_deleted, predicted_class, true_class), (....] """ conn = DB.get_conn(DBPath) # find all points fetAllSegFea = 'select * from GPS_segments' data = pd.read_sql(fetAllSegFea, conn) # allRecords = DB.fetchAll(conn, fetAllSegFea) DB.closeDB(conn) # print(data["TS"]) # data = pd.DataFrame(columns=["id", "user_id", "trip_id", "segment_id", # "start_point_id","end_point_id","point_num", # "distance", "_85thV", "MaxV1", "MaxV2", # "MedianV", "MinV", "MeanV", "Ev","Dv","HVR", # "MVR", "LVR", "_85thA", "MaxA1","MaxA2", # "MedianA", "MinA", "MeanA", "Ea","Da","HAR", # "MAR", "LAR", "TS", "ACR", "BSR","ACP", # "HCR", "SR", "VCR", "is_deleted", # "predicted_class", "true_class"]) return data
def getVAFigure(DBPath): conn = DB.get_conn(DBPath) # velocity list carVArr = [] busVArr = [] bikeVArr = [] walkVArr = [] trainVArr = [] planeVArr = [] # acceleration list carAArr = [] busAArr = [] bikeAArr = [] walkAArr = [] trainAArr = [] planeAArr = [] i = 1 while i <= 32: fetSql = 'select velocity,accelerometer,mode from GPS_points_' + str(i) allRecords = DB.fetchAll(conn, fetSql) for item in allRecords: if item[2] == 'car': carVArr.append(item[0]) carAArr.append(item[1]) if item[2] == 'bus': busVArr.append(item[0]) busAArr.append(item[1]) if item[2] == 'bike': bikeVArr.append(item[0]) bikeAArr.append(item[1]) if item[2] == 'walk': walkVArr.append(item[0]) walkAArr.append(item[1]) if item[2] == 'train': trainVArr.append(item[0]) trainAArr.append(item[1]) if item[2] == 'plane': planeVArr.append(item[0]) planeAArr.append(item[1]) i += 1 DB.closeDB(conn) vBox = [] vBox.append(carVArr) vBox.append(busVArr) vBox.append(bikeVArr) vBox.append(walkVArr) vBox.append(trainVArr) vBox.append(planeVArr) aBox = [] aBox.append(carAArr) aBox.append(busAArr) aBox.append(bikeAArr) aBox.append(walkAArr) aBox.append(trainAArr) aBox.append(planeAArr) # drawCDF(vBox, 'Velocity (m/s)') drawCDF(aBox, 'Acceleration (m/s$^2$)')
def getSegment(DBPath, i, busInfoPath): """ get single trip, and call function insertSeg to calculate every feature of single segment divided by transition points. """ conn = DB.get_conn(DBPath) # find all points fetchAllSql = 'select id,user_id,lat,lon,time_stamp,velocity,' + \ 'accelerometer,distance,mode from GPS_points_' + str(i) allRecords = DB.fetchAll(conn, fetchAllSql) if allRecords is None: print('fetch point set Fail!') return """ records: type list-> [(1, 2, 40.29, 116.150, '2009/07/29 18:12:14' 0.602996094223764, 0.00162000482825461, 53.666652385915, 'walk').....] (id,user_id,lat,lon,time_stamp,velocity,accelerometer,distance,mode) id: 0 user_id: 1 lat: 2 lon: 3 time_stamp: 4 velocity: 5 accelerometer: 6 distance: 7 mode: 8 """ # print(len(allRecords)) trip = [] tripId = 0 allLen = len(allRecords) index = 1 pre = allRecords[0] while index < allLen: cur = allRecords[index] if pre[6] == -1 and cur[6] != -1: # trip begin tripId += 1 trip.append(cur) if pre[6] != -1 and cur[6] == -1 or index == allLen - 1: if index == allLen - 1: trip.append(cur) # another trip begin print(tripId) # print(trip) insertSeg(DBPath, i, trip, tripId, busInfoPath) trip.clear() if pre[6] != -1 and cur[6] != -1: # append normaly trip.append(cur) pre = cur index += 1 # output the last segment DB.closeDB(conn)
def getSegFea1(DBPath): """ get all segment with features, for example: [(id, user_id, trip_id, segment_id, start_point_id, end_point_id, point_num, distance, _85thV, MaxV1, MaxV2, MedianV, MinV, MeanV, Ev, Dv, HVR, MVR, LVR, _85thA, MaxA1, MaxA2, MedianA, MinA, MeanA, Ea, Da, HAR, MAR, LAR, TS, ACR, BSR, ACP, HCR, SR, VCR, is_deleted, predicted_class, true_class), (....] """ conn = DB.get_conn(DBPath) # find all points fetAllSegFea = 'select * from GPS_segments' data = pd.read_sql(fetAllSegFea, conn) # allRecords = DB.fetchAll(conn, fetAllSegFea) DB.closeDB(conn) train = data[:-2315] test = data[-2315:] return train, test
def getVA(DBPath): conn = DB.get_conn(DBPath) carArr = [] busArr = [] bikeArr = [] walkArr = [] trainArr = [] planeArr = [] i = 1 while i <= 32: fetSql = 'select velocity,accelerometer,mode from GPS_points_' + str(i) allRecords = DB.fetchAll(conn, fetSql) for item in allRecords: if item[2] == 'car': carArr.append((item[0], item[1])) if item[2] == 'bus': busArr.append((item[0], item[1])) if item[2] == 'bike': bikeArr.append((item[0], item[1])) if item[2] == 'walk': walkArr.append((item[0], item[1])) if item[2] == 'train': trainArr.append((item[0], item[1])) if item[2] == 'plane': planeArr.append((item[0], item[1])) i += 1 DB.closeDB(conn) carArr = np.array(carArr) busArr = np.array(busArr) bikeArr = np.array(bikeArr) walkArr = np.array(walkArr) trainArr = np.array(trainArr) planeArr = np.array(planeArr) # user 1~10 drawHist(carArr, 'Car Velocity (m/s)', 'Frequency', 'Car Acceleration (m/s$^2$)', 'Frequency', 0, 50, 0, 2.5, 10000, 70000)
def drawVABox(DBPath): """ car,bus,bike,walk,train,plane """ conn = DB.get_conn(DBPath) # find all segments fetSql = 'select _85thV,MaxV1,_85thA,MaxA1,true_class from GPS_segments' # data = DB.fetchAll(conn, fetSql) data = pd.read_sql(fetSql, conn) DB.closeDB(conn) carSet = data[data['true_class'].isin(['car'])] busSet = data[data['true_class'].isin(['bus'])] bikeSet = data[data['true_class'].isin(['bike'])] walkSet = data[data['true_class'].isin(['walk'])] trainSet = data[data['true_class'].isin(['train'])] planeSet = data[data['true_class'].isin(['plane'])] # print(busSet)true_class _85Vpd1 = pd.DataFrame({'car': carSet[carSet._85thV < 300]._85thV, 'bus': busSet[busSet._85thV < 300]._85thV, 'bike': bikeSet[bikeSet._85thV < 300]._85thV, 'walk': walkSet[walkSet._85thV < 300]._85thV, 'train': trainSet[trainSet._85thV < 300]._85thV, 'plane': planeSet[planeSet._85thV < 300]._85thV}) _85Vpd2 = pd.DataFrame({'car': carSet[carSet._85thV < 40]._85thV, 'bus': busSet[busSet._85thV < 40]._85thV, 'bike': bikeSet[bikeSet._85thV < 40]._85thV, 'walk': walkSet[walkSet._85thV < 40]._85thV, 'train': trainSet[trainSet._85thV < 40]._85thV}) maxVpd = pd.DataFrame({'car': carSet[carSet.MaxV1 < 60].MaxV1, 'bus': busSet[busSet.MaxV1 < 60].MaxV1, 'bike': bikeSet[bikeSet.MaxV1 < 60].MaxV1, 'walk': walkSet[walkSet.MaxV1 < 60].MaxV1, 'train': trainSet[trainSet.MaxV1 < 60].MaxV1}) _85Apd = pd.DataFrame({'car': carSet[carSet._85thA < 3]._85thA, 'bus': busSet[busSet._85thA < 3]._85thA, 'bike': bikeSet[bikeSet._85thA < 3]._85thA, 'walk': walkSet[walkSet._85thA < 3]._85thA, 'train': trainSet[trainSet._85thA < 3]._85thA, 'plane': planeSet[planeSet._85thA < 3]._85thA}) maxApd = pd.DataFrame({'car': carSet[carSet.MaxA1 < 20].MaxA1, 'bus': busSet[busSet.MaxA1 < 20].MaxA1, 'bike': bikeSet[bikeSet.MaxA1 < 20].MaxA1, 'walk': walkSet[walkSet.MaxA1 < 20].MaxA1, 'train': trainSet[trainSet.MaxA1 < 20].MaxA1, 'plane': planeSet[planeSet.MaxA1 < 20].MaxA1}) # print(_85Vpd) # ax = sns.boxplot(data=_85Vpd1, # order=['car', 'bus', 'bike', 'walk', 'train', 'plane']) # ax = sns.boxplot(data=_85Vpd2, # order=['car', 'bus', 'bike', 'walk', 'train']) # ax = sns.boxplot(data=maxVpd, # order=['car', 'bus', 'bike', 'walk', 'train']) # ax = sns.boxplot(data=_85Apd, # order=['car', 'bus', 'bike', 'walk', 'train', 'plane']) ax = sns.boxplot(data=maxApd, order=['car', 'bus', 'bike', 'walk', 'train', 'plane']) # 85thV # ax.set_xlabel('Transportation Mode') # ax.set_ylabel('85% Percentile Velocity (m/s)') # ax.set_xlabel('出行方式', fontproperties=font) # ax.set_ylabel('85%分位速度 (m/s)', fontproperties=font) # maxV # ax.set_xlabel('Transportation Mode') # ax.set_ylabel('Maximum Velocity (m/s)') # ax.set_xlabel('出行方式', fontproperties=font) # ax.set_ylabel('最大速度 (m/s)', fontproperties=font) # 85thA # ax.set_xlabel('Transportation Mode') # ax.set_ylabel('85% Percentile Acceleration (m/s$^2$)') # ax.set_xlabel('出行方式', fontproperties=font) # ax.set_ylabel('85%分位加速度 (m/s$^2$)', fontproperties=font) # maxA # ax.set_xlabel('Transportation Mode') # ax.set_ylabel('Maximum Acceleration (m/s$^2$)') ax.set_xlabel('出行方式', fontproperties=font) ax.set_ylabel('最大加速度 (m/s$^2$)', fontproperties=font) plt.show(ax)
def insertSeg(DBPath, i, trip, tripId, busInfoPath): """ Args: trip: such as, [(20202, 1, 1.415, 40.29, 116.150, '2009/07/29 18:12:14', 0.013, 2.83, 'walk'), ...] [(id,user_id,lat,lon,time_stamp,velocity,accelerometer,distance,mode)] id: 0 user_id: 1 lat: 2 lon: 3 time_stamp: 4 velocity: 5 accelerometer: 6 distance: 7 mode: 8 tripId: the id of this trip """ tripLen = len(trip) if tripLen < numThd: print('trip from ' + str(trip[0][0]) + ' to ' + str(trip[-1][0]) + ' is abandon!') return conn = DB.get_conn(DBPath) segmentId = 1 index = 1 pre = trip[0] segment = [] # get every segment from every trip while index < tripLen: cur = trip[index] if cur[-1] != pre[-1] or index == tripLen - 1: # another segment begin segment.append(pre) if index == tripLen - 1: segment.append(cur) print('segmentId: ' + str(segmentId) + ': from ' + str(segment[0][0]) + ' to ' + str(segment[-1][0]) + ' :' + str(segment[-1][-1])) # print(segment) # calculate column value from every segment userId = i startPointId = segment[0][0] endPointId = segment[-1][0] pointNum = len(segment) if pointNum < numThd: segment.clear() pre = cur index += 1 continue distSum = featureFunc.getSegDist(segment) preFea = (userId, tripId, segmentId, startPointId, endPointId, pointNum, distSum) fea = getFeatures(segment, busInfoPath) trueClass = segment[-1][-1] totalColumn = preFea + fea + (trueClass, ) print(totalColumn) parameters = [] parameters.append(totalColumn) # insert segment into datebases, 37 features, total 40 features # except field: id, is_deleted, predicted_class insertSql = 'insert into GPS_segments (user_id,trip_id,' + \ 'segment_id,start_point_id,end_point_id,point_num,' + \ 'distance,_85thV,MaxV1,MaxV2,MedianV,MinV,MeanV,' + \ 'Ev,Dv,HVR,MVR,LVR,_85thA,MaxA1,MaxA2,MedianA,MinA' + \ ',MeanA,Ea,Da,HAR,MAR,LAR,TS,ACR,BSR,ACP,HCR,SR,' + \ 'VCR,true_class) values (?,?,?,?,?,?,?,?,?,?,?,?,' + \ '?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)' DB.insert(conn, insertSql, parameters) segmentId += 1 parameters.clear() segment.clear() else: segment.append(pre) pre = cur index += 1 DB.closeDB(conn)
def drawALine(DBPath): """ car,bus,bike,walk,train,plane """ conn = DB.get_conn(DBPath) # find car segments # fetCarSql = 'select accelerometer from GPS_points_22 where id>204322 and id<206107' # carYData = pd.read_sql(fetCarSql, conn) # lenCar = len(carYData) # carX = np.linspace(0, 2 * lenCar - 2, lenCar) # plt.plot(carX, carYData.accelerometer, c='b') # plt.xlim(0, 1000) # plt.ylim(0, 2) # plt.ylabel('Car Acceleration (m/s$^2$)') # plt.xlabel('Time (s)') # # plt.xlabel('时间 (秒)', fontproperties=font) # # plt.ylabel('汽车加速度 (m/s$^2$)', fontproperties=font) # find bus segments # fetBusSql = 'select accelerometer from GPS_points_11 where id>273685 and id<275728' # busYData = pd.read_sql(fetBusSql, conn) # lenBus = len(busYData) # busX = np.linspace(0, 2 * lenBus - 2, lenBus) # plt.plot(busX, busYData.accelerometer, c='b') # plt.xlim(0, 1000) # plt.ylim(0, 2) # # plt.ylabel('Bus Acceleration (m/s$^2$)') # # plt.xlabel('Time (s)') # plt.xlabel('时间 (秒)', fontproperties=font) # plt.ylabel('公交车加速度 (m/s$^2$)', fontproperties=font) # find bike segments # fetBikeSql = 'select accelerometer from GPS_points_32 where id>284378 and id<285602' # bikeYData = pd.read_sql(fetBikeSql, conn) # lenBike = len(bikeYData) # bikeX = np.linspace(0, 2 * lenBike - 2, lenBike) # plt.plot(bikeX, bikeYData.accelerometer, c='b') # plt.xlim(0, 1000) # plt.ylim(0, 2) # plt.ylabel('Bike Acceleration (m/s$^2$)') # plt.xlabel('Time (s)') # # plt.xlabel('时间 (秒)', fontproperties=font) # # plt.ylabel('自行车加速度 (m/s$^2$)', fontproperties=font) # find walk segments # fetWalkSql = 'select accelerometer from GPS_points_27 where id>785915 and id<787725' # walkYData = pd.read_sql(fetWalkSql, conn) # lenWalk = len(walkYData) # walkX = np.linspace(0, 2 * lenWalk - 2, lenWalk) # plt.plot(walkX, walkYData.accelerometer, c='b') # plt.xlim(0, 1000) # plt.ylim(0, 2) # plt.ylabel('Walk Acceleration (m/s$^2$)') # plt.xlabel('Time (s)') # # plt.xlabel('时间 (秒)', fontproperties=font) # # plt.ylabel('步行加速度 (m/s$^2$)', fontproperties=font) # find train segments # fetTrainSql = 'select accelerometer from GPS_points_12 where id>801867 and id<802982' # trainYData = pd.read_sql(fetTrainSql, conn) # lenTrain = len(trainYData) # trainX = np.linspace(0, 2 * lenTrain - 2, lenTrain) # plt.plot(trainX, trainYData.accelerometer, c='b') # plt.xlim(0, 2000) # plt.ylim(0, 10) # # plt.ylabel('Train Acceleration (m/s$^2$)') # # plt.xlabel('Time (s)') # plt.xlabel('时间 (秒)', fontproperties=font) # plt.ylabel('火车加速度 (m/s$^2$)', fontproperties=font) # find plane segments fetPlaneSql = 'select accelerometer from GPS_points_1 where id>306601 and id<309414' planeYData = pd.read_sql(fetPlaneSql, conn) lenPlane = len(planeYData) planeX = np.linspace(0, 2 * lenPlane - 2, lenPlane) plt.plot(planeX, planeYData.accelerometer, c='b') plt.xlim(0, 1000) plt.ylim(0, 2) # plt.ylabel('Plane Acceleration (m/s$^2$)') # plt.xlabel('Time (s)') plt.xlabel('时间 (秒)', fontproperties=font) plt.ylabel('飞机加速度 (m/s$^2$)', fontproperties=font) plt.grid() # modeName = ['car', 'bus', 'bike', 'walk', 'train', 'plane'] # plt.legend(modeName) # plt.ylim(0, 4) plt.show() DB.closeDB(conn)