def deal_trip(filepath, outpath="../../data/train_trip.csv"): rowlist = ["TRIP_ID", "CALL_TYPE", "TAXI_ID", "TIMESTAMP", "DAY_TYPE", "POLYLINE"] datas = read_csvfile(filepath, rowlist) label_file = open("../../data/label_des.csv","r") out = open(outpath, "w") i = 0 trip_id, label = get_label(label_file.readline()) trip_id = int(trip_id) for idx, row in datas.iterrows(): line = json.loads(row['POLYLINE']) try: lenght = len(line) if lenght < 10: continue else: if lenght < 34: data = trip_unit(row) data.append(line[0:5]+line[-5:]) while row['TRIP_ID'] != trip_id: trip_id, label = get_label(label_file.readline()) trip_id = int(trip_id) data.append(label) out.write(json.dumps(data)+"\n") if 34 < lenght: data = trip_unit(row) data.append(line[0:5]+line[int(lenght*0.3)-5:int(lenght*0.3)]) while row['TRIP_ID'] != trip_id: trip_id, label = get_label(label_file.readline()) trip_id = int(trip_id) data.append(label) out.swrite(json.dumps(data) + "\n") if 70 < lenght: data = trip_unit(row) data.append(line[0:5]+line[int(lenght*0.6)-5:int(lenght*0.6)]) while row['TRIP_ID'] != trip_id: trip_id, label = get_label(label_file.readline()) trip_id = int(trip_id) data.append(label) out.write(json.dumps(data)+"\n") if 100 < lenght: data = trip_unit(row) data.append(line[0:5]+line[int(lenght*0.8)-5:int(lenght*0.8)]) while row['TRIP_ID'] != trip_id: trip_id, label = get_label(label_file.readline()) trip_id = int(trip_id) data.append(label) out.write(json.dumps(data)+"\n") if i % 1000 == 999: out.flush() i = 0 i += 1 except : print 'error line:', line, trip_id
def deal_time2we(filepath, outpath="../../data/time2we.csv"): rowlist = ['TRIP_ID','TIMESTAMP'] datas = read_csvfile(filepath, rowlist) out = open(outpath, "w") we = [] i = 0 for line in datas['TIMESTAMP']: data = [] data.append([datas['TRIP_ID'][i]]) try: if line: mdhm = time.strftime("%m:%d:%H:%M:%U:%w",time.localtime(int(line))).split(":") month,day,hour,minute,weeks, week = mdhm[0],mdhm[1],mdhm[2],mdhm[3],mdhm[4],mdhm[5] #print month,day,hour,minute,weeks,week hm = int(hour)*6 + int(minute)/10 #print hm out.write(datas['TRIP_ID'][i]+" "+month+" "+day+" "+week+" "+str(hm)+"\n") data.append([month,day,week,str(hm)]) we.append(data) except: print 'error line:',line i += 1 print we return we
def deal_des(filepath, outpath="../../data/des.csv"): rowlist =['TRIP_ID','POLYLINE'] datas = read_csvfile(filepath, rowlist) out = open(outpath, "w") trip = [] i = 0 blank = 0 errornum = 0 for line in datas['POLYLINE']: data = [] data.append([datas['TRIP_ID'][i]]) tmplist = json.loads(line) try: if tmplist: x, y = tmplist[-1][0], tmplist[-1][1] out.write(str(datas['TRIP_ID'][i])+" "+str(x)+" "+str(y)+"\n") data.append(tmplist[-1]) trip.append(data) else: blank += 1 except : print tmplist errornum += 1 i += 1 print "blank:",blank,"valid:",len(trip),"error:",errornum return trip
def latlon2grid(filepath, outpath="../../data/grid_trip.csv"): rowlist = ["TRIP_ID", "POLYLINE", "TIMESTAMP"] datas = read_csvfile(filepath, rowlist) output = open(outpath, "w") codes = {} for idx, row in datas.iterrows(): line = json.loads(row["POLYLINE"]) trip_time = row["TIMESTAMP"] # print idx, row["TRIP_ID"], line trip = [] pre = "" for point in line: code = gh.encode(point[1], point[0], precision=5) # print code if codes.has_key(code) == False: codes[code] = 1 if pre != code: pre = code trip.append(code) if len(trip) < 2: continue data = [] if trip_time: mdhm = time.strftime("%m:%d:%H:%M:%U:%w", time.localtime(int(trip_time))).split(":") month, day, hour, minute, weeks, week = mdhm[0], mdhm[1], mdhm[2], mdhm[3], mdhm[4], mdhm[5] # print month,day,hour,minute,weeks,week hm = int(hour) * 6 + int(minute) / 10 # print hm data = [month, day, week, str(hm)] output.write(str(row["TRIP_ID"]) + " | ") output.write(json.dumps(trip)) output.write("|" + json.dumps(data)) output.write("\n") print "code has ", str(len(codes))