def od_merge_mergeable(child_trip, father_trip): '''check if this pair of pairs is od-merge mergable and admissible-od-merge mergable''' if child_trip['start_point']['time'] >= father_trip['start_point']['time']: return False, False, -1 t_walkLeg1 = geo.distBetween( child_trip['start_point']['lat'], child_trip['start_point']['lng'], father_trip['start_point']['lat'], father_trip['start_point']['lng']) / Constants.PACE if child_trip['start_point']['time'] + t_walkLeg1 > father_trip[ 'start_point']['time']: return False, False, -1 t_walkLeg2 = geo.distBetween( child_trip['end_point']['lat'], child_trip['end_point']['lng'], father_trip['end_point']['lat'], father_trip['end_point']['lng']) / Constants.PACE delay = father_trip['end_point']['time'] + t_walkLeg2 - child_trip[ 'end_point']['time'] - child_trip['tt'] ''' if delay<0: print delay, 'f=(%d~%d), c=(%d~%d)'%(father_trip['start_point']['time'], father_trip['end_point']['time'], child_trip['start_point']['time'],child_trip['end_point']['time']) delay+=864000 ''' #if father_trip['end_point']['time']- child_trip['start_point']['time']+ t_walkLeg2 / PACE > child_trip['tt'] * (1 + DELTA): if delay > Constants.DELTA: return True, False, delay #print father_trip['tt'], t_walkLeg1, t_walkLeg2, child_trip['start_point']['time'], father_trip['start_point']['time'], child_trip['tt'], father_trip['tt']+(t_walkLeg1+t_walkLeg2)/PACE+(father_trip['start_point']['time']-child_trip['start_point']['time']-t_walkLeg1/PACE), child_trip['tt']*(1+DELTA/100) return True, True, delay
def is_valid_trip(trip_file_name, debug=False): """this function determines whether an extracted trip is valid based on some heuristics""" #filename=Constants.PROCESSED_DIR+dirname+'/trip_trajectory/'+str(trip_id)+'.txt' #gps_point=[] trip_file=open(trip_file_name, 'r') lines=trip_file.readlines() count=0 for idx in range(len(lines)): _, lon, lat, _, _, time=lines[idx].split(',') lon=float(lon) lat=float(lat) time=int(time) if count==5: #if the max distance from the start point does not increase in 5 consecutive steps, then disregard the trip return False if idx==0: start_time=time start_point=(lat, lon) max_dist=0 #gps_point.append((0, lat, lon)) else: dist=geo.distBetween(lat, lon, start_point[0], start_point[1]) if debug: print dist if dist<max_dist: count+=1 else: max_dist=dist #gps_point.append((dist, lat, lon)) if time-start_time<30: #if travel time < half minute, disregard the trip return False return True
def is_valid_trip(trip_file_name, debug=False): """this function determines whether an extracted trip is valid based on some heuristics""" #filename=Constants.PROCESSED_DIR+dirname+'/trip_trajectory/'+str(trip_id)+'.txt' #gps_point=[] trip_file = open(trip_file_name, 'r') lines = trip_file.readlines() count = 0 for idx in range(len(lines)): _, lon, lat, _, _, time = lines[idx].split(',') lon = float(lon) lat = float(lat) time = int(time) if count == 5: #if the max distance from the start point does not increase in 5 consecutive steps, then disregard the trip return False if idx == 0: start_time = time start_point = (lat, lon) max_dist = 0 #gps_point.append((0, lat, lon)) else: dist = geo.distBetween(lat, lon, start_point[0], start_point[1]) if debug: print dist if dist < max_dist: count += 1 else: max_dist = dist #gps_point.append((dist, lat, lon)) if time - start_time < 30: #if travel time < half minute, disregard the trip return False return True
def od_merge_mergeable(child_trip, father_trip): '''check if this pair of pairs is od-merge mergable and admissible-od-merge mergable''' if child_trip['start_point']['time'] >= father_trip['start_point']['time']: return False, False, -1 t_walkLeg1 = geo.distBetween(child_trip['start_point']['lat'], child_trip['start_point']['lng'], father_trip['start_point']['lat'], father_trip['start_point']['lng']) / Constants.PACE if child_trip['start_point']['time'] + t_walkLeg1 > father_trip['start_point']['time']: return False, False, -1 t_walkLeg2 = geo.distBetween(child_trip['end_point']['lat'], child_trip['end_point']['lng'], father_trip['end_point']['lat'], father_trip['end_point']['lng']) / Constants.PACE delay = father_trip['end_point']['time'] + t_walkLeg2 - child_trip['end_point']['time']-child_trip['tt'] ''' if delay<0: print delay, 'f=(%d~%d), c=(%d~%d)'%(father_trip['start_point']['time'], father_trip['end_point']['time'], child_trip['start_point']['time'],child_trip['end_point']['time']) delay+=864000 ''' #if father_trip['end_point']['time']- child_trip['start_point']['time']+ t_walkLeg2 / PACE > child_trip['tt'] * (1 + DELTA): if delay > Constants.DELTA: return True, False, delay #print father_trip['tt'], t_walkLeg1, t_walkLeg2, child_trip['start_point']['time'], father_trip['start_point']['time'], child_trip['tt'], father_trip['tt']+(t_walkLeg1+t_walkLeg2)/PACE+(father_trip['start_point']['time']-child_trip['start_point']['time']-t_walkLeg1/PACE), child_trip['tt']*(1+DELTA/100) return True, True, delay
def extract_trip_from_one_file(file_name, trip_dir, trip_meta): #print file_name f = open(file_name) # Using a DictReader instead r = csv.DictReader(f, ['taxi_id', 'timestamp', 'lat', 'lng', 'unknown1', 'unknown2', 'occupied']) trips = [] max_lat = 0.0 min_lat = 180 max_lng = 0.0 min_lng = 180 lat = [] lng = [] timestamp = [] trip = [] travelDistance = 0.0 trip_meta_file = open(trip_meta, 'a+') trip_id = extract_trip_from_one_file.count for line in r: if "{occupied}".format(**line) == '1': day_of_week = int(time.strftime("%w", time.strptime("{timestamp}".format(**line), "%Y-%m-%d %H:%M:%S"))) hour_of_day = int(time.strftime("%H", time.strptime("{timestamp}".format(**line), "%Y-%m-%d %H:%M:%S"))) minute = int(time.strftime("%M", time.strptime("{timestamp}".format(**line), "%Y-%m-%d %H:%M:%S"))) sec = int(time.strftime("%S", time.strptime("{timestamp}".format(**line), "%Y-%m-%d %H:%M:%S"))) sec_of_day = hour_of_day * 3600 + minute * 60 + sec trip.append(("{taxi_id}".format(**line), float("{lat}".format(**line)), float("{lng}".format(**line)), day_of_week, hour_of_day, sec_of_day)) lat.append(float("{lat}".format(**line))) lng.append(float("{lng}".format(**line))) timestamp.append(sec_of_day) if len(lat) > 1: travelDistance += geo.distBetween(lat[-1], lng[-1], lat[-2], lng[-2]) else: if trip: if timestamp[-1] >= timestamp[0]: travelTime = timestamp[-1] - timestamp[0] else: # a trip crossed a day travelTime = timestamp[-1] - timestamp[0] + 60 * 60 * 24 if travelTime > 0 and travelDistance > 0: #filter trips with zero travel time/distance trips.append((trip, lat, lng, timestamp, travelDistance, travelTime)) #generate a trip file trip_file = open(trip_dir + str(trip_id) + '.txt', 'w') for line in trip: print >> trip_file, ','.join([str(field) for field in line]) trip_file.close() #update trip meta trip_meta_file.write(','.join([str(trip_id), str(trip[0][0]), str(lat[0]), str(lng[0]), str(timestamp[0]), str(lat[-1]), str(lng[-1]), str(timestamp[-1]), str(travelTime), str(travelDistance)])+'\n') trip_id += 1 max_lat = max(max(lat), max_lat) min_lat = min(min(lat), min_lat) max_lng = max(max(lng), max_lng) min_lng = min(min(lng), min_lng) lat = [] lng = [] timestamp = [] trip = [] travelDistance = 0.0 trip_meta_file.close() extract_trip_from_one_file.count = trip_id return trips, max_lat, min_lat, max_lng, min_lng
def extract_trip_from_one_file(file_name, trip_dir, trip_meta): #print file_name f = open(file_name) # Using a DictReader instead r = csv.DictReader(f, [ 'taxi_id', 'timestamp', 'lat', 'lng', 'unknown1', 'unknown2', 'occupied' ]) trips = [] max_lat = 0.0 min_lat = 180 max_lng = 0.0 min_lng = 180 lat = [] lng = [] timestamp = [] trip = [] travelDistance = 0.0 trip_meta_file = open(trip_meta, 'a+') trip_id = extract_trip_from_one_file.count for line in r: if "{occupied}".format(**line) == '1': day_of_week = int( time.strftime( "%w", time.strptime("{timestamp}".format(**line), "%Y-%m-%d %H:%M:%S"))) hour_of_day = int( time.strftime( "%H", time.strptime("{timestamp}".format(**line), "%Y-%m-%d %H:%M:%S"))) minute = int( time.strftime( "%M", time.strptime("{timestamp}".format(**line), "%Y-%m-%d %H:%M:%S"))) sec = int( time.strftime( "%S", time.strptime("{timestamp}".format(**line), "%Y-%m-%d %H:%M:%S"))) sec_of_day = hour_of_day * 3600 + minute * 60 + sec trip.append( ("{taxi_id}".format(**line), float("{lat}".format(**line)), float("{lng}".format(**line)), day_of_week, hour_of_day, sec_of_day)) lat.append(float("{lat}".format(**line))) lng.append(float("{lng}".format(**line))) timestamp.append(sec_of_day) if len(lat) > 1: travelDistance += geo.distBetween(lat[-1], lng[-1], lat[-2], lng[-2]) else: if trip: if timestamp[-1] >= timestamp[0]: travelTime = timestamp[-1] - timestamp[0] else: # a trip crossed a day travelTime = timestamp[-1] - timestamp[0] + 60 * 60 * 24 if travelTime > 0 and travelDistance > 0: #filter trips with zero travel time/distance trips.append((trip, lat, lng, timestamp, travelDistance, travelTime)) #generate a trip file trip_file = open(trip_dir + str(trip_id) + '.txt', 'w') for line in trip: print >> trip_file, ','.join( [str(field) for field in line]) trip_file.close() #update trip meta trip_meta_file.write(','.join([ str(trip_id), str(trip[0][0]), str(lat[0]), str(lng[0]), str(timestamp[0]), str(lat[-1]), str(lng[-1]), str(timestamp[-1]), str(travelTime), str(travelDistance) ]) + '\n') trip_id += 1 max_lat = max(max(lat), max_lat) min_lat = min(min(lat), min_lat) max_lng = max(max(lng), max_lng) min_lng = min(min(lng), min_lng) lat = [] lng = [] timestamp = [] trip = [] travelDistance = 0.0 trip_meta_file.close() extract_trip_from_one_file.count = trip_id return trips, max_lat, min_lat, max_lng, min_lng