def why_fuel(ddata, datareinx): import getTimeDiff import numpy as np data = np.zeros((1, 15)) categary1 = 0 #由于充电时间不够长导致的用油(15min前是在充电,但是没有90%以上) categary2 = 0 #由于里程超出range而用油?(从90%以上的电量开始) categary3 = 0 #没有机会充电导致的用油?上一个行程距此很远,但是没有机会充电? categary4 = 0 #总用油行程数 qep = datareinx['quqantity_electricity_percent'] fc = datareinx['fuel_consumption'] tc = datareinx['time_collect'] for i in range(1, ddata.shape[0]): a = int(ddata[i, 2]) #索引 b = int(ddata[i, 3]) #索引 '''判断是不是烧油的行程段''' if any(fc.loc[a + 1:b] > 500) and ddata[ i, 1] == 1 and ddata[i, 6] <= 20: #大于500就是在烧油驱动 categary4 = categary4 + 1 data = np.vstack((data, ddata[i, :])) #由于充电时间不够长导致的用油(15min前是在充电,但是没有90%以上) if datareinx['statusn2'].loc[int( ddata[i - 1, 3] )] == 102 and getTimeDiff.GetTimeDiff(tc.loc[ (ddata[i - 1, 3])], tc.loc[a]) / 60 <= 15 and qep.loc[a] < 90: categary1 += 1 #由于里程超出range而用油?(从90%以上的电量开始) elif qep.loc[a] >= 90: categary2 += 1 #没有机会充电导致的用油?上一个行程距此很久远,但是没有机会充电? elif getTimeDiff.GetTimeDiff(tc.loc[ (ddata[i - 1, 3])], tc.loc[a]) / 60 > 45: categary3 += 1 return categary1, categary2, categary3, categary4
def ct3(ddata, datareinx): import getTimeDiff qe = datareinx['quqantity_electricity'] qep = datareinx['quqantity_electricity_percent'] da = datareinx['distance_accumulative'] fc = datareinx['fuel_consumption'] tc = datareinx['time_collect'] ddm10 = ddata[ddata[:, 1] == 1] #行程段 # ddm10=ddm10[ddm10[:,11]>10] # ddm10=ddm10[ddm10[:,11]<100] ##仅处理10-300公里的的行程段 cf = [] ce = [] cs = [] cd = [] tcc = [] soc = [] for i in range(ddm10.shape[0]): a = int(ddm10[i, 2]) #索引 b = int(ddm10[i, 3]) #索引 '''判断是不是烧油的行程段''' # ddd_temp=datareinx.loc[a:b] if any(fc.loc[a + 1:b] > 500): #大于500就是在烧油驱动 fcc = qe.loc[a] - qe.loc[ b] #电池剩余能量 #.loc, 行或列只能是标签名。 只加一个参数时,只能进行 行 选择 fce = qep.loc[a] - qep.loc[b] #soc tccc = getTimeDiff.GetTimeDiff(tc.loc[a], tc.loc[b]) / 3600 #换算成小时 daa = da.loc[a + 1:b].reset_index( drop=True) #distance_accumulative dau = da.loc[a:b - 1].reset_index(drop=True) fcb = fc.loc[a + 1:b].reset_index(drop=True) #fuel_consumption fca = sum( (daa - dau) * fcb / 100) * 8.9 # 行驶的距离 * 每百公里燃油消耗量 =燃油消耗量 cf.append(fcc) #使用的电池能量 ce.append(fca) #使用燃油量 cd.append(ddm10[i, 11]) #里程 cs.append(fce) #soc变化 tcc.append(tccc) #耗时 soc.append((qep.loc[a] + qep.loc[b]) / 2) return cf, ce, cd, cs, tcc, soc
def datacleaning(data): #data=data[data['distance_accumulative']>0] data = data[data['longitude'] > 0] data = data[data['latitude'] > 0] data = data[data['current_status_vehicle'] != -1] data = data[data['distance_accumulative'] <= 1000000] d_c = np.array(data['distance_accumulative']) lon = np.array(data['longitude']) lat = np.array(data['latitude']) for i in range(d_c.shape[0]): if d_c[i] == 0: #对于数据缺失的 d_c[i] = d_c[i - 1] + latlon.haversine( lon[i - 1], lat[i - 1], lon[i], lat[i]) / 1000 #补全车公里 d_c = d_c.astype(int) #转化为整数 ###删除可能存在的跳跃的里程数据 dcc = pd.Series(d_c, index=list(data.index)) data['distance_accumulative'] = dcc t_c = data['time_collect'] d_c = data['distance_accumulative'] timed = np.ones((t_c.shape[0], )) timed = pd.Series(timed, index=list(t_c.index)) dd = np.zeros((t_c.shape[0], )) dd = pd.Series(dd, index=list(t_c.index)) t_cu = t_c.shift(-1) d_cu = d_c.shift(-1) for i in list(t_c.index)[0:-1]: ###关注一下apply ##[0:-1]表示第一个元素到倒数第二个元素的切片 timed[i] = getTimeDiff.GetTimeDiff(t_c[i], t_cu[i]) if timed[i] > 50 * 60: #如果说开始出现时间的跳跃 timed[i] = 1e20 dd[i] = d_cu[i] - d_c[i] #里程差值 else: dd[i] = d_cu[i] - d_c[i] spd = dd / timed * 1e3 data[ 'newspd'] = spd #新增一列计算速度,代表该时刻与下时刻里程差值与时间差值的比值,单位为m/S,区分E-20左右时有时间的跳跃 return data
def ct(ddata, datareinx): import getTimeDiff qe = datareinx['quqantity_electricity'] qep = datareinx['quqantity_electricity_percent'] da = datareinx['distance_accumulative'] fc = datareinx['fuel_consumption'] tc = datareinx['time_collect'] ddm10 = ddata[ddata[:, 1] == 1] #行程段 ddm10 = ddm10[ddm10[:, 11] > 10] ddm10 = ddm10[ddm10[:, 11] < 100] ##仅处理10-300公里的的行程段 cf = [] ce = [] cs = [] tcc = [] for i in range(ddm10.shape[0]): a = int(ddm10[i, 2]) #索引 b = int(ddm10[i, 3]) #索引 fcc = qe.loc[a] - qe.loc[ b] #电池剩余能量 #.loc, 行或列只能是标签名。 只加一个参数时,只能进行 行 选择 fce = qep.loc[a] - qep.loc[b] #soc tccc = getTimeDiff.GetTimeDiff(tc.loc[a], tc.loc[b]) / 3600 #换算成小时 daa = da.loc[a + 1:b].reset_index(drop=True) #distance_accumulative dau = da.loc[a:b - 1].reset_index(drop=True) fcb = fc.loc[a + 1:b].reset_index(drop=True) #fuel_consumption fca = sum((daa - dau) * fcb / 100) # 行驶的距离 * 每百公里燃油消耗量 =燃油消耗量(毫升) # print(daa) tcc.append(tccc) cf.append(fcc) #电池剩余能量 ce.append(fca) #使用燃油量 cs.append(fce) #soc变化 cd = ddm10[:, 11] #里程 return cf, ce, cd, cs, tcc
cn[:, 11] = 0 ###代表非通勤 cn[:, 12] = i #代表用户编号 cn = cn[cn[:, 1] < 200] cn = cn[cn[:, 2] < 200] #cn=cn[cn[:,6]<0]##充电电流不可能大于0 cn = cn[cn[:, 1] > 0] cn = cn[cn[:, 2] > 0] g = ddata2[ddata2[:, 1] == 2] #所有充电事件 g1 = g[g[:, -1] == 123] tvmt[i, 8] = ddata2[ddata2[:, 1] == 2].shape[0] ##所有充电事件的个数8 tvmt[i, 9] = g1.shape[0] #家充事件总数9 tvmt[i, 10] = cn.shape[0] #途中充电总数10 tvmt[i, 12] = int( getTimeDiff.GetTimeDiff(datareinx['time_collect'].iloc[0], datareinx['time_collect'].iloc[-1]) / (60 * 60 * 24)) #记录时间12 else: tvmt[i, 7] = dvmt.commutea(ddata2, datareinx) ##计算通勤距离 cirinx = ChargeInroute.cin(ddata2, datareinx) cn = np.zeros((cirinx.shape[0], 13)) cn[:, 0:11] = cirinx cn[:, 11] = 1 ###代表通勤 cn[:, 12] = i #代表用户编号 cn = cn[cn[:, 1] < 200] cn = cn[cn[:, 2] < 200] #cn=cn[cn[:,6]<0]##充电电流不可能大于0 cn = cn[cn[:, 1] > 0] cn = cn[cn[:, 2] > 0] g = ddata2[ddata2[:, 1] == 2] #所有充电事件
da = datareinx['distance_accumulative'] fc = datareinx['fuel_consumption'] tc = datareinx['time_collect'] ac = datareinx['acc'] for j in range(0, len(start) - 1): a = datareinx['index'].loc[start[j]] #索引 b = datareinx['index'].loc[stop[j]] #索引 data_olas = np.zeros((1, 5)) fcc = qe.loc[a] - qe.loc[ b] #电池剩余能量 #.loc, 行或列只能是标签名。 只加一个参数时,只能进行 行 选择 fce = qep.loc[a] - qep.loc[b] #soc vmt = da.loc[b] - da.loc[a] #距离 tcc = getTimeDiff.GetTimeDiff(tc.loc[a], tc.loc[b]) # 时间 daa = da.loc[a + 1:b].reset_index( drop=True) #distance_accumulative dau = da.loc[a:b - 1].reset_index(drop=True) fcb = fc.loc[a + 1:b].reset_index(drop=True) #fuel_consumption fca = sum((daa - dau) * fcb / 100) # 行驶的距离 * 每百公里燃油消耗量 =燃油消耗量 # ============================================================================= k = a # for k in range((index_start,index_end)): while k < b: time_start = datareinx['time_collect'].loc[k] da_start = datareinx['distance_accumulative'].loc[k] qe_start = datareinx['quqantity_electricity'].loc[k]
def cin(ddata, datarinx): #ddata=ddata[ddata[:,4]!=0] lenr = -1 rec = np.zeros((ddata.shape[0], 10)) lens = ddata.shape[0] i = 0 while i < lens - 1: if ddata[i, 1] == 2: j = i + 1 #print(j) while j < lens - 2: if ddata[j, 1] == 2 and (ddata[j, 12] == 100 or ddata[j, 12] == 30 or ddata[j, 12] == 123 or ddata[j, 4] < 0 or ddata[j, 4] == 0 or ddata[j, 11]) != 0: #i=j-1 #print(ddata[j,1]) #print(j+10000) break if ddata[j, 1] == 2 and ddata[j, 12] != 100 and ddata[ j, 12] != 30 and ddata[j, 12] != 123 and ddata[ j, 11] == 0 and ddata[j, 4] > 0: k = j + 1 #print(k+100000) while k < lens - 2: if ddata[k, 1] == 2: #print(i,j,k) lenr = lenr + 1 rec[lenr, 0] = ddata[i, 2] ##开始SOC rec[lenr, 1] = ddata[i, 3] # rec[lenr, 2] = ddata[j, 2] rec[lenr, 3] = ddata[j, 3] rec[lenr, 4] = ddata[k, 2] rec[lenr, 5] = ddata[j, 4] ##充电时长 rec[lenr, 6] = getTimeDiff.GetTimeDiff( datarinx['time_collect'][ddata[j, 2]], datarinx['time_collect'][ddata[ j + 1, 2]]) / 60 #停留时长(下次行程开始-充电开始) #print(datarinx['time_collect'][ddata[j+1,2]]) rec[lenr, 7] = ddata[j, 6] #-ddata[j,5] #print(i,j,k) clevel = np.array(datarinx['current'][int(ddata[ j, 2]):int(ddata[j, 3])]) #clevel=clevel[clevel<0] if len(clevel) < 2: rec[lenr, 8] = 10000 else: clevel = np.around(clevel, decimals=3) #print(clevel) rec[lenr, 8] = stats.mode(clevel)[0][0] ##current break k = k + 1 break j = j + 1 i = i + 1 #return rec #print(lenr) if lenr == -1: reg = np.zeros((1, 11)) if lenr != -1: rec = rec[:lenr + 1, :] reg = np.zeros((rec.shape[0], 11)) for j in range(reg.shape[0]): #print(j) reg[j, 0] = datarinx['quqantity_electricity_percent'][int( rec[j, 2])] ##开始SOC reg[j, 1] = datarinx['distance_accumulative'][rec[ j, 2]] - datarinx['distance_accumulative'][rec[j, 1]] ##上次行程距离 reg[j, 2] = datarinx['distance_accumulative'][rec[ j, 4]] - datarinx['distance_accumulative'][rec[j, 3]] ###下次行程距离 reg[j, 3] = rec[j, 5] ##充电时长 reg[j, 4] = rec[j, 6] ##停留时长 reg[j, 5] = rec[j, 7] ##充电电量 reg[j, 6] = rec[j, 8] #充电功率current reg[j, 7] = pd.Timestamp( datarinx['time_collect'][rec[j, 2]]).weekday() #工作日标记 reg[j, 8] = pd.Timestamp( datarinx['time_collect'][rec[j, 2]]).hour + pd.Timestamp( datarinx['time_collect'][rec[j, 2]]).minute / 60 ##充电时间 reg[j, 9] = (datarinx['quqantity_electricity'][rec[j, 1]] - datarinx['quqantity_electricity'][rec[j, 2]] ) / reg[j, 1] ##上段行程能耗 reg[j, 10] = datarinx['quqantity_electricity'][int( rec[j, 3])] - datarinx['quqantity_electricity'][int(rec[j, 2])] return reg
def modified_ct(ddata, datareinx): #先处理index和“index”不符合的情况 # datareinx=datareinx[datareinx['index']<datareinx.shape[0]] datareinx['index'] = datareinx.index data = np.zeros((1, 16)) ## data_divert_T = np.zeros((1, 4)) T = 5 #时间间隔 datereinx_temp = datareinx[datareinx['fuel_consumption'] == 0] # datereinx_temp=datereinx_temp[datereinx_temp['status_basic']==1] if datereinx_temp.shape[0] != 0: index = np.array(datereinx_temp['index']) index_2 = [] #放入燃油从0到非0 的全部索引 for i in range(1, len(index) - 1): # if index[i] in datereinx_temp['index'] and index[i+1] in datereinx_temp['index']: if datereinx_temp['index'].loc[ index[i]] + 1 < datereinx_temp['index'].loc[index[i + 1]]: index_2.append((datereinx_temp['index'].loc[index[i]], datereinx_temp['index'].loc[index[i + 1]])) # else: # continue if (datareinx.shape[0] - 1) not in index_2: index_2.append((index[-1], datareinx.shape[0] - 1)) # ============================================================================= ########提取纯用电的 # # ddata_0_temp=np.zeros((1,15)) # # for i in range(len(ddata)): # # if ddata[i,2] in index: # # ddata_0_temp=np.vstack((ddata_0_temp,ddata[i,:])) # # # # ddata_0_temp=np.delete(ddata_0_temp,[0],axis=0) # # cg=consumptionana.ct(ddata_0_temp,datareinx) # # ce=np.array(cg[1])/1000 ##使用的燃油量‘ # # # ##矩阵中添加行:numpy.row_stack(mat, a) # ##矩阵中添加列:numpy.column_stack(mat,a) # # # # ddata_0_temp=np.column_stack((ddata_0_temp,ce)) # # ddata_pure_electricity=ddata_0_temp[ddata_0_temp[:,15]==0,:] # # temp1=np.array(datareinx['quqantity_electricity'].loc[ddata_pure_electricity[:,2]]) # # temp2=np.array(datareinx['quqantity_electricity'].loc[ddata_pure_electricity[:,3]]) # # ddata_pure_electricity[:,15]=temp1-temp2 #消耗的quqantity_electricity # ============================================================================= #######用油的 #从燃油消耗量为0的地方开始计算,下一个燃油量减上一个 #当100km以上时, #如果下一个行程和上一个行程之间的distance_accumulate大于30?,停止,从下一个为0 的开始 #或者一直进行到fuel为0,该段行程舍去 da = np.array(datareinx['distance_accumulative']) fc = np.array(datareinx['fuel_consumption']) for i in range(len(index_2)): index_start = index_2[i][0] index_end = index_2[i][1] #前100公里 datareinx_temp = datareinx.iloc[ index_start:index_end] #从index_start到index_end;前闭后闭 #判断是否存在fuel_consumption跳跃的点 fc = datareinx_temp['fuel_consumption'] fca = fc.loc[index_start + 1:index_end].reset_index(drop=True) fcu = fc.loc[index_start:index_end - 1].reset_index(drop=True) fcminus = fca - fcu ##找到第一个跳跃的点,往后全部舍弃 fcminus = fcminus[fcminus > 20] if len(fcminus) > 0: #如果存在,舍弃后面的,重新赋值 index_end = fcminus[fcminus == fcminus.iloc[0]].index.tolist() index_end = index_end[0] + index_start datareinx_temp = datareinx.iloc[ index_start:index_end] #从index_start到index_end;前闭后闭 ##增加一列累计燃油 datareinx_temp['fuel_accumulate'] = 0 distance_accumulate_start = np.array( datareinx['distance_accumulative'].loc[index_start]) distance_accumulate_100 = distance_accumulate_start + 100 #找100km开外的公里数 distance_accumulate_start = distance_accumulate_100 - 100 #找开始的公里数 #找100km开外的公里数的index index_100 = datareinx[ datareinx.distance_accumulative == distance_accumulate_100].index.tolist() # index if len(index_100) > 0: #要是找到了 index_100 = index_100[0] else: #要是没找到,找最近的那个 x = distance_accumulate_100 a = abs(da - x) b = a.min() ###还是要用插值法?? if b > 10: #(距离小于10) 要是找到最近的那个了,但是它大于10km远,我们就不找了,索引的end index_100 = index_end + 1 #为了创造条件,使得下面一块运行if后面的语句 else: c = abs(x - b) #最近里程的数字 index_100 = datareinx[datareinx.distance_accumulative == c].index.tolist() # index if len(index_100) == 0: c = abs(x + b) #最近里程的数字 index_100 = datareinx[datareinx.distance_accumulative == c].index.tolist() # index index_100 = index_100[0] #第100公里所在的行号index #如果第100公里所在的行号index超过了index_end if index_100 > index_end: datareinx_temp['fuel_accumulate'].loc[ index_start:index_end] = fc[index_start - index_start:index_end + 1 - index_start] else: #如果没超过 datareinx_temp['fuel_accumulate'].loc[ index_start:index_100] = fc[index_start - index_start:index_100 + 1 - index_start] #100公里外的 j = index_100 while j < index_end: index_self = np.where(da == da[j]) #本身的最后一个值 index_self = index_self[0][-1] index_temp = np.where(da == da[j] - 100) # index #找100公里之前的那个数字 #if fc[j]!=0: if len(index_temp[0]) != 0: #100km之前的那个索引 k = -1 index_temp_1 = index_temp[0][k] while index_temp_1 > index_end: #避免有一些里程不规律,出现递减 index_temp_1 = index_temp[0][k] k = k - 1 if index_temp_1 > index_start and index_temp_1 <= index_end: datareinx_temp['fuel_accumulate'].loc[ j:index_self] = fc[index_temp_1] + fc[j] else: #如果找不到的话,找最近的里程的数 x = da[j] - 100 a = abs(da - x) b = a.min() if b <= 10: c = abs(x - b) #最近里程的数字 index_temp_2 = np.where(da == c) # index if index_temp_2[0].size == 0: c = abs(x + b) #最近里程的数字 index_temp_2 = np.where(da == c) # index index_temp_2 = index_temp_2[0][-1] #100km之前的那个索引 if index_temp_2 > index_start: datareinx_temp['fuel_accumulate'].loc[ j:index_self] = fc[index_temp_2] + fc[j] else: break j = index_self + 1 data = np.vstack((data, modified_trip.trip(datareinx_temp))) # ============================================================================= # 不同速度区间下的电动车的能耗?速度跟油耗/电耗的关系?每五分钟,算平均速度(里程/5min), ## 耗电量/耗油量/状态为1 # datareinx_temp=datareinx_temp[datareinx_temp['statusn2']==1] k = index_start # for k in range((index_start,index_end)): while k < index_end: time_start = datareinx['time_collect'].loc[k] da_start = datareinx['distance_accumulative'].loc[k] qep_start = datareinx['quqantity_electricity_percent'].loc[k] fa_start = datareinx_temp['fuel_accumulate'].loc[k] while getTimeDiff.GetTimeDiff( time_start, datareinx['time_collect'].loc[k] ) < T * 60 and k < index_end: #如果在5分钟内 k = k + 1 #break出来的时候,就是找到了那个大于5分钟的值? #持续时间、路程、soc变化、燃油变化 data_divert_T_temp=np.hstack((getTimeDiff.GetTimeDiff(time_start,datareinx['time_collect'].loc[k-1]), \ datareinx['distance_accumulative'].loc[k-1]-da_start, \ qep_start-datareinx['quqantity_electricity_percent'].loc[k-1], \ datareinx_temp['fuel_accumulate'].loc[k-1]-fa_start)) data_divert_T = np.vstack((data_divert_T, data_divert_T_temp)) # ============================================================================= data = data[data[:, 15] >= 0] data = np.delete(data, 0, axis=0) data_divert_T = data_divert_T[data_divert_T[:, 0] <= (T + 1) * 60] #小于t+1分钟 data_divert_T[:, 0] = data_divert_T[:, 0] / 60 #换算成分钟 data_divert_T = data_divert_T[data_divert_T[:, 1] > 0] #路程大于0 data_divert_T = data_divert_T[data_divert_T[:, 3] >= 0] return data, data_divert_T
def trip(dataframe): # dataframe=datareinx_temp ### 分日提取充电段 status2=dataframe['statusn2'] status=dataframe['current_status_vehicle'] q_e_p=dataframe['quqantity_electricity_percent'] time_collect=dataframe['time_collect'] str0=pd.Series('2000-01-01 01:01:01') time0=str0.append(time_collect, ignore_index=True) #重新赋值index time0=time0.append(str0, ignore_index=True) ##头尾都加上str0 distance_acc=dataframe['distance_accumulative'] # fuel_acc=dataframe['fuel_accumulate'] # dis0=pd.Series([0]) # distance_acc0=dis0.append(distance_acc,ignore_index=True) #重新赋值index # distance_acc0=distance_acc0.append(dis0,ignore_index=True) ##头尾都加上dis0 # fuel0=pd.Series([0]) # fuel_acc0=fuel0.append(fuel_acc,ignore_index=True) # fuel_acc0=fuel_acc0.append(fuel0,ignore_index=True) ##头尾都加上fuel0 status2=list(status2) status=list(status) lens=len(status) #记录数 start=[] stop=[] status20=[0]+status2+[0] status0=[0]+status+[0] k=0 for k in range(lens): w1=(status20[k+1]==102 and status20[k]!=102) #如果前是move,后是stop,放入start w2=(status20[k+1]==102 and status20[k+2]!=102) ##如果前是stop,后是move,放入stop if w1: start.append(k) tt=getTimeDiff.GetTimeDiff(time0[k],time0[k+1]) if tt>3600 and (status20[k+1]==102) and (status20[k]==102): ##接下来的状态都是停止, start.append(k) stop.append(k-1) if w2: stop.append(k) if len(start)>len(stop): start.remove(start[-1]) at=[] bt=[] for i in range(len(start)-1):###针对充电段落间隔时间过小的拼接#######################假设:如果充电段间没有里程差异,认为是一个充电段############################################################################################################ interv=getTimeDiff.GetTimeDiff(time_collect[stop[i]+dataframe.iat[0,0]],time_collect[start[i+1]+dataframe.iat[0,0]]) ##############################################################################################################12-25增加充电段拼接的附加条件:里程没有太大变化################################################################################################ l=distance_acc[start[i+1]+dataframe.iat[0,0]]-distance_acc[stop[i]+dataframe.iat[0,0]] m=q_e_p[start[i+1]+dataframe.iat[0,0]]-q_e_p[stop[i]+dataframe.iat[0,0]] #if interv<15*60 and l<1: if l<2 and l>-1 and m>-1 and q_e_p[start[i+1]+dataframe.iat[0,0]]<100: at.append(i+1) #如果前后两个充电段距离短,电量没有减少,电未充满,则认为是同一个充电段 bt.append(i) start=np.array(start) stop=np.array(stop) start=np.delete(start,at,axis=0) stop=np.delete(stop,bt,axis=0) start=np.array(start) stop=np.array(stop) ### travel period starttrip=[] stoptrip=[] for k in range(lens): w1=(status0[k+1]==1 and status0[k]!=1) w2=(status0[k+1]==1 and status0[k+2]!=1) if w1: starttrip.append(k) tt=getTimeDiff.GetTimeDiff(time0[k],time0[k+1]) if tt>3600 and (status0[k+1]==1) and (status0[k]==1): #and abs(distance_acc0[k+1]-distance_acc0[k])<10: #针对间断的时间段进行处理 starttrip.append(k) stoptrip.append(k-1) if w2: stoptrip.append(k) numtripdur=len(starttrip) startdetind=[] #需要删除的 stopdetind=[] #需要删除的 for i in range(numtripdur-1): #x=相邻后一个行程段的开头减去前一个行程段的结尾 interv=getTimeDiff.GetTimeDiff(time_collect[stoptrip[i]+dataframe.iat[0,0]],time_collect[starttrip[i+1]+dataframe.iat[0,0]]) if interv<15*60 and q_e_p[starttrip[i+1]+dataframe.iat[0,0]]-q_e_p[stoptrip[i]+dataframe.iat[0,0]]<=0: ##############如果停留时间少于15分钟,且中间不是充电段,删掉停留记录 startdetind.append(i+1) stopdetind.append(i) starttrip=np.array(starttrip) stoptrip=np.array(stoptrip) starttrip=np.delete(starttrip,startdetind,axis=0) stoptrip=np.delete(stoptrip,stopdetind,axis=0) starttrip=starttrip.reshape(starttrip.shape[0],1) onz=np.ones((starttrip.shape[0],1)) starttrip=np.append(onz,starttrip,axis=1) start=start.reshape(start.shape[0],1) onz2=np.ones((start.shape[0],1))*2 start=np.append(onz2,start,axis=1) ##充电段和行程段串联 b=np.append(starttrip,start,axis=0) e=np.append(stoptrip,stop,axis=0) re=np.zeros((b.shape[0],4)) re[:,1:3]=b #天数,状态,starttrip re[:,3]=e #stoptrip rg=np.lexsort(re.T) re=re[rg] re=re.astype(int) if len(re)>0: quqantity_electricity_percent=dataframe['quqantity_electricity_percent'] q_e_p=quqantity_electricity_percent.as_matrix() q_e_p_begin=q_e_p[re[:,2]] q_e_p_begin=q_e_p_begin.reshape((len(q_e_p_begin),1)) q_e_p_end=q_e_p[re[:,3]] q_e_p_end=q_e_p_end.reshape((len(q_e_p_end),1)) longitude=dataframe['longitude'] longitude=longitude.as_matrix() latitude=dataframe['latitude'] latitude=latitude.as_matrix() long_begin=longitude[re[:,2]] long_begin=long_begin.reshape((len(long_begin),1)) la_begin=latitude[re[:,2]] la_begin=la_begin.reshape((len(la_begin),1)) long_end=longitude[re[:,3]] long_end=long_end.reshape((len(long_end),1)) la_end=latitude[re[:,3]] la_end=la_end.reshape((len(la_end),1)) fuel_accumulative=dataframe['fuel_accumulate'] dist_cha=np.zeros((len(re),1)) fuel_cha=np.zeros((len(re),1)) whether_weekday=np.zeros((len(re),1)) #是否工作日 time_start=np.zeros((len(re),1)) time_end=np.zeros((len(re),1)) duration=np.zeros((len(re),1)) for i in range(len(re)): re[i,2]=dataframe.iat[re[i,2],0] re[i,3]=dataframe.iat[re[i,3],0] duration[i]=getTimeDiff.GetTimeDiff(time_collect[re[i,2]],time_collect[re[i,3]])/60 #dist_gap=np.zeros((len(re),1)) #计算下次段落的开始经纬度与上次段落结束的经纬度之间的距离 #timediff=(time_endarr-time_startarr)/60 #min #aa=a.reshape(1,1) #time_startarr2=np.append(time_startarr,aa,axis=0) #time_startarr3=np.delete(time_startarr2,0,axis=0) #time_periodgap=(time_startarr3-time_endarr)/60 #min 这段结束与下段开始的差值 a=long_begin[long_begin.shape[0]-1].reshape(1,1) long_begin2=np.append(long_begin,a,axis=0) long_begin2=np.delete(long_begin2,0,axis=0) a=la_begin[la_begin.shape[0]-1].reshape(1,1) la_begin2=np.append(la_begin,a,axis=0) la_begin2=np.delete(la_begin2,0,axis=0) for i in range(len(re)): dist_cha[i]=distance_acc[re[i,3]]-distance_acc[re[i,2]] fuel_cha[i]=fuel_accumulative[re[i,3]]-fuel_accumulative[re[i,2]] whether_weekday[i]=datetime.datetime.strptime(time_collect[re[i,2]],"%Y-%m-%d %H:%M:%S").weekday()+1 #星期 time_start[i]=float(time_collect[re[i,2]][11:13])+float(time_collect[re[i,2]][14:16])/60 time_end[i]=float(time_collect[re[i,3]][11:13])+float(time_collect[re[i,3]][14:16])/60 #dist_gap[i]=latlon.haversine(long_end[i,0]/1000000,la_end[i,0]/1000000,long_begin2[i,0]/1000000,la_begin2[i,0]/1000000) dddata=np.hstack((re,duration,q_e_p_begin,q_e_p_end,long_begin,la_begin,long_end,la_end,dist_cha,whether_weekday,time_start,time_end,fuel_cha)) # aa=data_analysis.re_idx(dataframe,ddata).reshape(ddata.shape[0],) # ddata[:,0]=aa # ddata=ddata[ddata[:,4]>0] #把持续时间为0的事件删除 else: dddata=np.zeros((1,16))-1 return dddata
cn[:,11]=0###代表飞通勤 cn[:,12]=i#代表用户编号 cn=cn[cn[:,1]<200] cn=cn[cn[:,2]<200] #cn=cn[cn[:,6]<0]##充电电流不可能大于0 cn=cn[cn[:,1]>0] cn=cn[cn[:,2]>0] g=ddata2[ddata[:,1]==2] #所有充电事件 g1=g[g[:,-1]==123] tvmt[i,8]=ddata2[ddata[:,1]==2].shape[0]##所有充电事件的个数8 tvmt[i,9]=g1.shape[0]#家充事件总数9 tvmt[i,10]=cn.shape[0]#途中充电总数10 tvmt[i,12]=int(getTimeDiff.GetTimeDiff(datareinx['time_collect'].iloc[0],datareinx['time_collect'].iloc[-1])/(60*60*24))#记录时间12 else: tvmt[i,7]=dvmt.commutea(ddata2,datareinx)##计算通勤距离 cirinx=ChargeInroute.cin(ddata2,datareinx) cn=np.zeros((cirinx.shape[0],13)) cn[:,0:11]=cirinx cn[:,11]=1###代表通勤 cn[:,12]=i#代表用户编号 cn=cn[cn[:,1]<200] cn=cn[cn[:,2]<200] #cn=cn[cn[:,6]<0]##充电电流不可能大于0 cn=cn[cn[:,1]>0] cn=cn[cn[:,2]>0] g=ddata2[ddata[:,1]==2] #所有充电事件 g1=g[g[:,12]==100]
def ct(ddata, datareinx): import numpy as np import getTimeDiff import olsan qe = datareinx['quqantity_electricity'] qep = datareinx['quqantity_electricity_percent'] da = datareinx['distance_accumulative'] fc = datareinx['fuel_consumption'] tc = datareinx['time_collect'] ddm10 = ddata[ddata[:, 1] == 1] #行程段 data_divert = np.zeros((1, 4)) data_divert_T = np.zeros((1, 5)) T = 8 #时间间隔 # p1=[] # p2=[] p = np.zeros((1, 2)) for i in range(ddm10.shape[0]): a = int(ddm10[i, 2]) #索引 b = int(ddm10[i, 3]) #索引 data_olas = np.zeros((1, 5)) '''判断是不是烧油的行程段''' # ddd_temp=datareinx.loc[a:b] if any(fc.loc[a + 1:b] > 500): #大于500就是在烧油驱动 fcc = qe.loc[a] - qe.loc[ b] #电池剩余能量 #.loc, 行或列只能是标签名。 只加一个参数时,只能进行 行 选择 fce = qep.loc[a] - qep.loc[b] #soc tccc = getTimeDiff.GetTimeDiff(tc.loc[a], tc.loc[b]) / 3600 #换算成小时 daa = da.loc[a + 1:b].reset_index( drop=True) #distance_accumulative dau = da.loc[a:b - 1].reset_index(drop=True) fcb = fc.loc[a + 1:b].reset_index(drop=True) #fuel_consumption fca = sum((daa - dau) * fcb / 100) # 行驶的距离 * 每百公里燃油消耗量 =燃油消耗量 # # aa=(daa-dau)*fcb/100 # ============================================================================= k = a # for k in range((index_start,index_end)): while k < b: time_start = datareinx['time_collect'].loc[k] da_start = datareinx['distance_accumulative'].loc[k] qe_start = datareinx['quqantity_electricity'].loc[k] qep_start = datareinx['quqantity_electricity_percent'].loc[k] # fa_start=datareinx_temp['fuel_accumulate'].loc[k] while getTimeDiff.GetTimeDiff(time_start, datareinx['time_collect'].loc[k] ) < T * 60 and k < b: #如果在5分钟内 k = k + 1 #break出来的时候,就是找到了那个大于5分钟的值? daaa = da.loc[a + 1:k - 1].reset_index( drop=True) #distance_accumulative dauu = da.loc[a:k - 2].reset_index(drop=True) fcbb = fc.loc[a + 1:k - 1].reset_index( drop=True) #fuel_consumption #持续时间、路程、soc变化、燃油变化 data_divert_T_temp=np.hstack((getTimeDiff.GetTimeDiff(time_start,datareinx['time_collect'].loc[k-1]), \ datareinx['distance_accumulative'].loc[k-1]-da_start, \ qe_start-datareinx['quqantity_electricity'].loc[k-1], \ (qep_start+datareinx['quqantity_electricity_percent'].loc[k-1])/2, \ sum((daaa-dauu)*fcbb/100))) ##油耗 data_olas = np.vstack((data_olas, data_divert_T_temp)) data_divert_T = np.vstack((data_divert_T, data_divert_T_temp)) data_olas = np.delete(data_olas, 0, axis=0) data_olas = np.delete(data_olas, np.where(data_olas[:, 1] == 0), axis=0) ##做olsa if len(data_olas) > 3: p1, p2, ev, pv, t, vmt, r2 = olsan.olsa_5min(data_olas) if r2 > 0.7: data_divert = np.vstack( (data_divert, [float(ev), float(pv), t, vmt])) p = np.vstack((p, [p1, p2])) # ============================================================================= para1 = np.mean(p[:, 0]) para2 = np.mean(p[:, 1]) data_divert_T = np.delete(data_divert_T, 0, axis=0) data_divert_T = np.delete(data_divert_T, np.where(data_divert_T[:, 1] == 0), axis=0) data_divert_T = np.delete(data_divert_T, np.where(data_divert_T[:, 1] == 0), axis=0) para1, para2, Ev, Pv, T, Vmt, rsquare2 = olsan.olsa_5min(data_divert_T) return data_divert_T, para2 / para1 #耗时, 里程,电池能量,soc,燃油量 # dg = np.vstack((np.array(cf), np.array(ce), np.array(cd), np.array(cs), np.array(tcc))) dg = np.transpose(dg) np.sum(np.array(cf))
def ct2(ddata, datareinx): import numpy as np import getTimeDiff qe = datareinx['quqantity_electricity'] qep = datareinx['quqantity_electricity_percent'] da = datareinx['distance_accumulative'] fc = datareinx['fuel_consumption'] tc = datareinx['time_collect'] ddm10 = ddata[ddata[:, 1] == 1] #行程段 # ddm10=ddm10[ddm10[:,11]>10] # ddm10=ddm10[ddm10[:,11]<100] ##仅处理10-300公里的的行程段 cf = [] ce = [] cs = [] cd = [] tcc = [] soc = [] cf2 = [] ce2 = [] cs2 = [] cd2 = [] tcc2 = [] soc2 = [] for i in range(ddm10.shape[0]): a = int(ddm10[i, 2]) #索引 b = int(ddm10[i, 3]) #索引 '''判断是不是烧油的行程段''' # ddd_temp=datareinx.loc[a:b] if any(fc.loc[a + 1:b] > 500): #大于500就是在烧油驱动 fcc = qe.loc[a] - qe.loc[ b] #电池剩余能量 #.loc, 行或列只能是标签名。 只加一个参数时,只能进行 行 选择 fce = qep.loc[a] - qep.loc[b] #soc tccc = getTimeDiff.GetTimeDiff(tc.loc[a], tc.loc[b]) / 3600 #换算成小时 daa = da.loc[a + 1:b].reset_index( drop=True) #distance_accumulative dau = da.loc[a:b - 1].reset_index(drop=True) fcb = fc.loc[a + 1:b].reset_index(drop=True) #fuel_consumption fca = sum((daa - dau) * fcb / 100) # 行驶的距离 * 每百公里燃油消耗量 =燃油消耗量 cf.append(fcc) #使用的电池能量 ce.append(fca) #使用燃油量 cd.append(ddm10[i, 11]) #里程 cs.append(fce) #soc变化 tcc.append(tccc) #耗时 soc.append((qep.loc[a] + qep.loc[b]) / 2) ##烧电的 else: fcc2 = qe.loc[a] - qe.loc[ b] #电池剩余能量 #.loc, 行或列只能是标签名。 只加一个参数时,只能进行 行 选择 fce2 = qep.loc[a] - qep.loc[b] #soc tccc2 = getTimeDiff.GetTimeDiff(tc.loc[a], tc.loc[b]) / 3600 #换算成小时 daa2 = da.loc[a + 1:b].reset_index( drop=True) #distance_accumulative dau2 = da.loc[a:b - 1].reset_index(drop=True) fcb2 = fc.loc[a + 1:b].reset_index(drop=True) #fuel_consumption fca2 = sum((daa2 - dau2) * fcb2 / 100) # 行驶的距离 * 每百公里燃油消耗量 =燃油消耗量 cf2.append(fcc2) #使用的电池能量 ce2.append(fca2) #使用燃油量 cd2.append(ddm10[i, 11]) #里程 cs2.append(fce2) #soc变化 tcc2.append(tccc2) #耗时 soc2.append((qep.loc[a] + qep.loc[b]) / 2) return cf, ce, cd, cs, tcc, soc, cf2, ce2, cd2, cs2, tcc2, soc2 #电池能量,燃油量,里程,soc,耗时 # dg = np.vstack((np.array(cf), np.array(ce), np.array(cd), np.array(cs), np.array(tcc))) dg = np.transpose(dg) np.sum(np.array(cf))
def pp2(data): import pandas as pd # import trans import getTimeDiff # import datetime # import l_s # import latlon #data=trans.datacleaning(data) statusn = data['current_status_vehicle'] q_e_p = data['quqantity_electricity_percent'] odom = data['distance_accumulative'] t_c = data['time_collect'] lon = data['longitude'] lat = data['latitude'] #dwellinx=dwell(data) #statusn[dwellinx]=0 #data['current_vehicle_status']=statusn i = 0 start = [] stop = [] ps = [] pe = [] hs = [] he = [] #ixlist=list(statusn.index) # while i<len(ixlist)-1: # if i==0 and ixlist[0]==0 and statusn[ixlist[0]]==101: # start.append(i) # if statusn[ixlist[i]]!=101 and statusn[ixlist[i+1]]==101: # start.append(ixlist[i+1]) # if statusn[ixlist[i]]==101 and statusn[ixlist[i+1]]!=101: # stop.append(ixlist[i]) # if i==endin and statusn[endin]==101: # stop.append(endin) # i=i+1 while i < len(statusn) - 2: if i == 0 and statusn[0] == 0: #第一个状态 start.append(i) j = i + 1 while odom[j] == odom[i] and j < len(statusn) - 1: #当里程没有增加,则j+1 j = j + 1 stop.append(j - 1) #当里程增加时,stop有了索引 i = j - 1 if i > 0 and statusn[i] == 0 and statusn[ i + 1] == 0 and getTimeDiff.GetTimeDiff(t_c[i - 1], t_c[i]) <= 20 * 60: start.append(i) j = i + 1 while odom[j] == odom[i + 1] and j < len(statusn) - 1: j = j + 1 stop.append(j - 1) i = j - 1 #if i==endin-1 and statusn[endin-1]==0: # stop.append(endin-1) if statusn[i] != 0 and odom[ i + 1] - odom[i] < 2 and getTimeDiff.GetTimeDiff( t_c[i], t_c[i + 1]) > 20 * 60: ##如果状态不为0,下一个状态-上一个状态里程小于2,时间间隔大于20min start.append(i) j = i + 1 while odom[i] == odom[j] and j < len(statusn) - 1: j = j + 1 stop.append(j) i = j i = i + 1 while (len(start) != len(stop)): if len(start) > len(stop): del (start[-1]) else: del (stop[-1]) # for i in range(len(start)): t1hour = pd.Timestamp(t_c[start[i]]).hour #print(i,start[i],stop[i]) t2hour = pd.Timestamp(t_c[stop[i]]).hour xq1 = pd.Timestamp(t_c[start[i]]).weekday() xq2 = pd.Timestamp(t_c[start[i]]).weekday() if getTimeDiff.GetTimeDiff(t_c[start[i]],t_c[stop[i]])>2*60*60 and t1hour>4 and t2hour<22 and t1hour<17 and t1hour<t2hour\ and getTimeDiff.GetTimeDiff(t_c[start[i]],t_c[stop[i]])<12*60*60\ and xq1<6 and xq2<6 and xq1>0 and xq2>0:##保证在工作日 而且是白天 间隔不短于2h,不超过12h ps.append(start[i]) pe.append(stop[i]) k = q_e_p[stop[i]] - q_e_p[start[i]] #print(start[i],stop[i]) #print(1,startstop[i]],lon[start[i]],lat[start[i]],xq1) if getTimeDiff.GetTimeDiff(t_c[start[i]], t_c[stop[i]]) > 2 * 60 * 60 and ( t1hour > 16 or t1hour < 5): #在夜晚 hs.append(start[i]) he.append(stop[i]) k = q_e_p[stop[i]] - q_e_p[start[i]] #print(start[i],stop[i]) ########################## #lon1,lon2=lon[ps],lon[pe].reset_index(drop=True) #lat1,lat2=lat[ps],lat[pe].reset_index(drop=True) #lonh1,lath1=lon[hs],lat[hs] lonlatds = pd.concat([lon[ps], lat[ps]], axis=1) lonlaths = pd.concat([lon[hs], lat[hs]], axis=1) return lonlatds, lonlaths
def mode(ddata, datareinx): qe = datareinx['quqantity_electricity'] qep = datareinx['quqantity_electricity_percent'] da = datareinx['distance_accumulative'] fc = datareinx['fuel_consumption'] tc = datareinx['time_collect'] ddm10 = ddata[ddata[:, 1] == 1] #行程段 total_vmt = sum(ddm10[:, 11]) data_divert_T = np.zeros((1, 5)) p = np.zeros((1, 2)) T = 8 #时间间隔 Ehybrid = [] Qhybeid = [] Qengine1 = [] Qengine2 = [] Epure = [] vmt_hybrid = 0 vmt_engine1 = 0 vmt_engine2 = 0 vmt_pure = 0 for i in range(ddm10.shape[0]): a = int(ddm10[i, 2]) #索引 b = int(ddm10[i, 3]) #索引 data_olas = np.zeros((1, 5)) fcc = qe.loc[a] - qe.loc[ b] #电池剩余能量 #.loc, 行或列只能是标签名。 只加一个参数时,只能进行 行 选择 fce = qep.loc[a] - qep.loc[b] #soc vmt = da.loc[b] - da.loc[a] #距离 tccc = getTimeDiff.GetTimeDiff(tc.loc[a], tc.loc[b]) # 时间 daa = da.loc[a + 1:b].reset_index(drop=True) #distance_accumulative dau = da.loc[a:b - 1].reset_index(drop=True) fcb = fc.loc[a + 1:b].reset_index(drop=True) #fuel_consumption fca = sum((daa - dau) * fcb / 1000) # 行驶的距离 * 每百公里燃油消耗量 =燃油消耗量 '''判断是不是烧油的行程段''' if any(fc.loc[a + 1:b] > 500): #大于500就是在烧油驱动 if fce > 0: #混合驱动 Ehybrid.append(fcc / vmt * 100) Qhybeid.append(fca / vmt) vmt_hybrid += vmt # ============================================================================= # # k=a # # for k in range((index_start,index_end)): # while k<b: # time_start=datareinx['time_collect'].loc[k] # da_start=datareinx['distance_accumulative'].loc[k] # qe_start=datareinx['quqantity_electricity'].loc[k] # qep_start=datareinx['quqantity_electricity_percent'].loc[k] # # fa_start=datareinx_temp['fuel_accumulate'].loc[k] # # while getTimeDiff.GetTimeDiff(time_start,datareinx['time_collect'].loc[k])<T*60 and k<b :#如果在5分钟内 # k=k+1 # #break出来的时候,就是找到了那个大于5分钟的值? # # daaa=da.loc[a+1:k-1].reset_index(drop=True) #distance_accumulative # dauu=da.loc[a:k-2].reset_index(drop=True) # fcbb=fc.loc[a+1:k-1].reset_index(drop=True) #fuel_consumption # #持续时间、路程、soc变化、燃油变化 # data_divert_T_temp=np.hstack((getTimeDiff.GetTimeDiff(time_start,datareinx['time_collect'].loc[k-1]), \ # datareinx['distance_accumulative'].loc[k-1]-da_start, \ # qe_start-datareinx['quqantity_electricity'].loc[k-1], \ # (qep_start+datareinx['quqantity_electricity_percent'].loc[k-1])/2, \ # sum((daaa-dauu)*fcbb/100))) ##油耗 # data_olas=np.vstack((data_olas,data_divert_T_temp)) # # # # data_divert_T=np.vstack((data_divert_T,data_divert_T_temp)) # ============================================================================= elif fce == 0: #内燃机 Qengine1.append(fca / vmt) vmt_engine1 += vmt elif fce < 0 and tccc > 120: #行车充电 Qengine2.append(fca / vmt) vmt_engine2 += vmt else: #纯电动模式 Epure.append(fcc / vmt * 100) vmt_pure += vmt # ============================================================================= # # # # data_olas=np.delete(data_olas,0,axis=0) # data_olas=np.delete(data_olas,np.where(data_olas[:,1]==0),axis=0) # ##做olsa # if len(data_olas)>3: # p1,p2,ev,pv,t,vmt,r2=olsan.olsa_5min(data_olas) # if r2>0.7: ## data_divert=np.vstack((data_divert,[float(ev),float(pv),t,vmt])) # p=np.vstack((p,[p1,p2])) # # para1=np.mean(p[:,0]) # para2=np.mean(p[:,1]) # data_divert_T=np.delete(data_divert_T,np.where(data_divert_T[:,1]==0),axis=0) # # return data_divert_T,para2/para1 # # ============================================================================= # 计算样本 PHEV 的纯电动行驶平均百公里电耗 Epure = np.array(Epure) Epure = np.delete(Epure, np.where(Epure[:] < 0)) Epure = np.mean(Epure) return Epure, vmt_hybrid / total_vmt
def trip(dataframe): ### 分日提取充电段 status2 = dataframe['statusn2'] status = dataframe['current_status_vehicle'] q_e_p = dataframe['quqantity_electricity_percent'] time_collect = dataframe['time_collect'] lon = dataframe['longitude'] lat = dataframe['latitude'] ori = dataframe['orientation'] spe = dataframe['newspd'] str0 = pd.Series('2000-01-01 01:01:01') time0 = str0.append(time_collect, ignore_index=True) time0 = time0.append(str0, ignore_index=True) ##头尾都加上str0 distance_acc = dataframe['distance_accumulative'] temp = dataframe['high_temperature'] dis0 = pd.Series([0]) distance_acc0 = dis0.append(distance_acc, ignore_index=True) distance_acc0 = distance_acc0.append(dis0, ignore_index=True) ##头尾都加上dis0 status2 = list(status2) status = list(status) lens = len(status) start = [] stop = [] status20 = [0] + status2 + [0] status0 = [0] + status + [0] k = 0 for k in range(lens): w1 = (status20[k + 1] == 102 and status20[k] != 102) #如果前是move,后是stop,放入start w2 = (status20[k + 1] == 102 and status20[k + 2] != 102) ##如果前是stop,后是move,放入stop if w1: start.append(k) tt = getTimeDiff.GetTimeDiff(time0[k], time0[k + 1]) if tt > 3600 and (status20[k + 1] == 102) and (status20[k] == 102): ##接下来的状态都是停止, start.append(k) stop.append(k - 1) if w2: stop.append(k) if len(start) > len(stop): start.remove(start[-1]) at = [] bt = [] for i in range( len(start) - 1 ): ###针对充电段落间隔时间过小的拼接#######################假设:如果充电段间没有里程差异,认为是一个充电段############################################################################################################ interv = getTimeDiff.GetTimeDiff(time_collect[stop[i]], time_collect[start[i + 1]]) ##############################################################################################################12-25增加充电段拼接的附加条件:里程没有太大变化################################################################################################ l = distance_acc[start[i + 1]] - distance_acc[stop[i]] m = q_e_p[start[i + 1]] - q_e_p[stop[i]] #if interv<15*60 and l<1: if l < 2 and l > -1 and m > -1 and q_e_p[start[i + 1]] < 100: at.append(i + 1) #如果前后两个充电段距离短,电量没有减少,电未充满,则认为是同一个充电段 bt.append(i) start = np.array(start) stop = np.array(stop) start = np.delete(start, at, axis=0) stop = np.delete(stop, bt, axis=0) start = np.array(start) stop = np.array(stop) # for i in range(start.shape[0]): # print(start[i],stop[i]) # start=start.reshape(start.shape[0],1) # stop=stop.reshape(stop.shape[0],1) ## data cleaning of charging period # numchargedur=size(start,1); # startchind=[]; # stopchind=[]; # for i= 1:numchargedur-1 # %相邻后一个charge段的开头减去前一个charge段的结尾 # num1=datenum(2001,01,01,12,00,00); # num2=datenum(2001,01,01,12,00,01); # num=num2-num1; # num1=datenum(alldata.time_collect(start(i+1),:)); # num2=datenum(alldata.time_collect(stop(i),:)); # interv=(num1-num2)/num; # if interv<900 %charge时间少于5分钟,删掉charge记录 # startchind=[startchind;i+1]; # stopchind=[stopchind;i]; # end # end # # start(startchind,:)=[]; # stop(startchind,:)=[]; # table_charge=tabulate(datestr(startchargedate)); ### travel period starttrip = [] stoptrip = [] for k in range(lens): w1 = (status0[k + 1] == 1 and status0[k] != 1) w2 = (status0[k + 1] == 1 and status0[k + 2] != 1) if w1: starttrip.append(k) tt = getTimeDiff.GetTimeDiff(time0[k], time0[k + 1]) if tt > 3600 and (status0[k + 1] == 1) and ( status0[k] == 1): #and abs(distance_acc0[k+1]-distance_acc0[k])<10: #针对间断的时间段进行处理 starttrip.append(k) stoptrip.append(k - 1) if w2: stoptrip.append(k) numtripdur = len(starttrip) startdetind = [] #需要删除的 stopdetind = [] #需要删除的 for i in range(numtripdur - 1): #x=相邻后一个行程段的开头减去前一个行程段的结尾 interv = getTimeDiff.GetTimeDiff(time_collect[stoptrip[i]], time_collect[starttrip[i + 1]]) if interv < 15 * 60 and q_e_p[starttrip[i + 1]] - q_e_p[ stoptrip[i]] <= 0: ##############如果停留时间少于15分钟,且中间不是充电段,删掉停留记录 startdetind.append(i + 1) stopdetind.append(i) starttrip = np.array(starttrip) stoptrip = np.array(stoptrip) starttrip = np.delete(starttrip, startdetind, axis=0) stoptrip = np.delete(stoptrip, stopdetind, axis=0) starttrip = starttrip.reshape(starttrip.shape[0], 1) onz = np.ones((starttrip.shape[0], 1)) starttrip = np.append(onz, starttrip, axis=1) start = start.reshape(start.shape[0], 1) onz2 = np.ones((start.shape[0], 1)) * 2 start = np.append(onz2, start, axis=1) ##充电段和行程段串联 b = np.append(starttrip, start, axis=0) e = np.append(stoptrip, stop, axis=0) re = np.zeros((b.shape[0], 4)) re[:, 1:3] = b re[:, 3] = e rg = np.lexsort(re.T) re = re[rg] re = re.astype(int) duration = np.zeros((len(re), 1)) for i in range(len(re)): duration[i] = getTimeDiff.GetTimeDiff(time_collect[re[i, 2]], time_collect[re[i, 3]]) / 60 quqantity_electricity_percent = dataframe['quqantity_electricity_percent'] q_e_p = quqantity_electricity_percent.as_matrix() q_e_p_begin = q_e_p[re[:, 2]] q_e_p_begin = q_e_p_begin.reshape((len(q_e_p_begin), 1)) q_e_p_end = q_e_p[re[:, 3]] q_e_p_end = q_e_p_end.reshape((len(q_e_p_end), 1)) longitude = dataframe['longitude'] longitude = longitude.as_matrix() latitude = dataframe['latitude'] latitude = latitude.as_matrix() long_begin = longitude[re[:, 2]] long_begin = long_begin.reshape((len(long_begin), 1)) la_begin = latitude[re[:, 2]] la_begin = la_begin.reshape((len(la_begin), 1)) long_end = longitude[re[:, 3]] long_end = long_end.reshape((len(long_end), 1)) la_end = latitude[re[:, 3]] la_end = la_end.reshape((len(la_end), 1)) distance_accumulative = dataframe['distance_accumulative'] dist_cha = np.zeros((len(re), 1)) whether_weekday = np.zeros((len(re), 1)) #是否工作日 time_start = np.zeros((len(re), 1)) time_end = np.zeros((len(re), 1)) #dist_gap=np.zeros((len(re),1)) #计算下次段落的开始经纬度与上次段落结束的经纬度之间的距离 #timediff=(time_endarr-time_startarr)/60 #min #aa=a.reshape(1,1) #time_startarr2=np.append(time_startarr,aa,axis=0) #time_startarr3=np.delete(time_startarr2,0,axis=0) #time_periodgap=(time_startarr3-time_endarr)/60 #min 这段结束与下段开始的差值 a = long_begin[long_begin.shape[0] - 1].reshape(1, 1) long_begin2 = np.append(long_begin, a, axis=0) long_begin2 = np.delete(long_begin2, 0, axis=0) a = la_begin[la_begin.shape[0] - 1].reshape(1, 1) la_begin2 = np.append(la_begin, a, axis=0) la_begin2 = np.delete(la_begin2, 0, axis=0) #温度 temperature = np.zeros((len(re), 1)) for i in range(len(re)): temperature[i] = temp[re[i, 3]] dist_cha[i] = distance_accumulative[re[i, 3]] - distance_accumulative[ re[i, 2]] whether_weekday[i] = datetime.datetime.strptime( time_collect[re[i, 2]], "%Y-%m-%d %H:%M:%S").weekday() + 1 #星期 time_start[i] = float(time_collect[re[i, 2]][11:13]) + float( time_collect[re[i, 2]][14:16]) / 60 time_end[i] = float(time_collect[re[i, 3]][11:13]) + float( time_collect[re[i, 3]][14:16]) / 60 #dist_gap[i]=latlon.haversine(long_end[i,0]/1000000,la_end[i,0]/1000000,long_begin2[i,0]/1000000,la_begin2[i,0]/1000000) ddata = np.hstack( (re, duration, q_e_p_begin, q_e_p_end, long_begin, la_begin, long_end, la_end, dist_cha, whether_weekday, time_start, time_end, temperature)) aa = re_idx(dataframe, ddata).reshape(ddata.shape[0], ) ddata[:, 0] = aa ddata = ddata[ddata[:, 4] > 0] #把持续时间为0的事件删除 return ddata