def process_file(fn): _, yymm = fn[:-len('.csv')].split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) with open('%s/%s' % (merged_trip_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() # id_did = headers.index('driver-id') id_vid = headers.index('vehicle-id') id_st, id_et = headers.index('start-time'), headers.index('end-time') id_dur, id_fare = headers.index('duration'), headers.index('fare') id_s_long, id_s_lat = headers.index('start-long'), headers.index('start-lat') id_e_long, id_e_lat = headers.index('end-long'), headers.index('end-lat') # vehicle_prev_trip_position_time = {} with open('%s/full-drivers-trips-%s.csv' % (for_full_driver_dir, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) new_headers = ['did', 'prev-trip-end-time', 'prev-trip-end-location', 'start-time', 'start-location', 'end-time', 'end-location', 'duration', 'fare'] writer.writerow(new_headers) count = 0 for row in reader: did = int(row[id_did]) if did > len(driver_full_or_not) or did < 0: continue if not driver_full_or_not[did]: continue vid = row[id_vid] start_time, end_time = eval(row[id_st]), eval(row[id_et]), s_long, s_lat = eval(row[id_s_long]), eval(row[id_s_lat]) e_long, e_lat = eval(row[id_e_long]), eval(row[id_e_lat]) s_location, e_location = is_in_airport(s_long, s_lat), is_in_airport(e_long, e_lat) # if not vehicle_prev_trip_position_time.has_key(vid): # ASSUMPTION # If this trip is the driver's first trip in a month, # let's assume that the previous trip occurred out of the airport # and also assume that the previous trip's end time is the current trip's start time vehicle_prev_trip_position_time[vid] = (OUT_AP, start_time) prev_trip_end_location, prev_trip_end_time = vehicle_prev_trip_position_time[vid] # new_row = [did, prev_trip_end_time, prev_trip_end_location, start_time, s_location, end_time, e_location, row[id_dur], row[id_fare]] writer.writerow(new_row) # vehicle_prev_trip_position_time[vid] = (e_location, end_time) # count +=1 if count % check_progress == 0: print '%s-----%d' % (yymm, count) logging_msg('%s-----%d' % (yymm, count)) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_file(path_to_csv_file): print path_to_csv_file ori_log_fn = path_to_csv_file.split('/')[-1] _, yymm, _ = ori_log_fn.split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) with open(path_to_csv_file, 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_time, id_vid, id_did = headers.index('time'), headers.index( 'vehicle-id'), headers.index('driver-id') index_long, index_lat = headers.index('longitude'), headers.index( 'latitude') with open('%s/log-%s.csv' % (logs_dir, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) new_headers = ['time', 'vid', 'did', 'ap-or-not', 'np-or-not'] writer.writerow(new_headers) # for row in reader: ap_or_not = is_in_airport(eval(row[index_long]), eval(row[index_lat])) np_or_not = is_in_night_safari(eval(row[index_long]), eval(row[index_lat])) new_row = [ row[id_time], row[id_vid], row[id_did], ap_or_not, np_or_not ] writer.writerow(new_row) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_file(path_to_csv_file): print path_to_csv_file ori_log_fn = path_to_csv_file.split('/')[-1] _, yymm, _ = ori_log_fn.split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) with open(path_to_csv_file, 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_time, id_vid, id_did = headers.index('time'), headers.index('vehicle-id'), headers.index('driver-id') index_long, index_lat = headers.index('longitude'), headers.index('latitude') with open('%s/log-%s.csv' % (logs_dir, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) new_headers = ['time', 'vid', 'did', 'ap-or-not', 'np-or-not'] writer.writerow(new_headers) # for row in reader: ap_or_not = is_in_airport(eval(row[index_long]), eval(row[index_lat])) np_or_not = is_in_night_safari(eval(row[index_long]), eval(row[index_lat])) new_row = [row[id_time], row[id_vid], row[id_did], ap_or_not, np_or_not] writer.writerow(new_row) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)