def process_file(fn):
    _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    y, m = int('20' + yymm[:2]), int(yymm[2:])
    # find the next month's first day
    if m == 12:
        next_y, next_m = y + 1, 1
    else:
        next_y, next_m = y, m + 1
    next_m_first_day = datetime.datetime(next_y, next_m, 1, 0)
    cur_m_last_day = next_m_first_day - datetime.timedelta(days=1)
    dd = '%2d' % cur_m_last_day.day
    last_day_timestamp = time.mktime(cur_m_last_day.timetuple())
    with open('%s/%s' % (logs_dir, fn), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_time = headers.index('time')
        with open('%s/log-last-day-%s%s.csv' % (log_last_day_dir, yymm, dd),
                  'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            writer.writerow(headers)
            for row in reader:
                t = eval(row[id_time])
                if t <= last_day_timestamp:
                    continue
                writer.writerow(row)
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
Esempio n. 2
0
def process_file(fn):
    _, _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    #
    is_driver_vehicle = load_picle_file('%s/driver-vehicle-%s.pkl' % (shifts_dir, yymm))
    full_drivers = set()
    with open('%s/%s' % (shifts_dir, fn), 'rt') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_yy, id_mm, id_dd, id_hh = headers.index('yy'), headers.index('mm'), headers.index('dd'), headers.index('hh')
        id_vid, id_did = headers.index('vid'), headers.index('did')
        id_pro_dur, id_x_pro_dur = headers.index('pro-dur'), headers.index('x-pro-dur')
        with open('%s/shift-full-time-%s.csv' % (full_shift_dir, yymm), 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            new_headers = ['year', 'month', 'day', 'hour', 'vehicle-id', 'driver-id', 'productive-duration', 'x-productive-duration']
            writer.writerow(new_headers)
            for row in reader:
                if len(is_driver_vehicle[row[id_vid]]) > 1:
                    continue
                writer.writerow([row[id_yy], row[id_mm], row[id_dd], row[id_hh], row[id_vid], row[id_did], row[id_pro_dur], row[id_x_pro_dur]])
                full_drivers.add(row[id_did])
    save_pickle_file('%s/full-time-drivers-%s.pkl' % (full_shift_dir, yymm), full_drivers)
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
def process_file(path_to_csv_file):
    print path_to_csv_file
    ori_log_fn = path_to_csv_file.split('/')[-1]
    _, yymm, _ = ori_log_fn.split('-')
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    with open(path_to_csv_file, 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_time, id_vid, id_did = headers.index('time'), headers.index(
            'vehicle-id'), headers.index('driver-id')
        index_long, index_lat = headers.index('longitude'), headers.index(
            'latitude')
        with open('%s/log-%s.csv' % (logs_dir, yymm), 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            new_headers = ['time', 'vid', 'did', 'ap-or-not', 'np-or-not']
            writer.writerow(new_headers)
            #
            for row in reader:
                ap_or_not = is_in_airport(eval(row[index_long]),
                                          eval(row[index_lat]))
                np_or_not = is_in_night_safari(eval(row[index_long]),
                                               eval(row[index_lat]))
                new_row = [
                    row[id_time], row[id_vid], row[id_did], ap_or_not,
                    np_or_not
                ]
                writer.writerow(new_row)
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
def process_file(fn):
    _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm 
    logging_msg('handle the file; %s' % yymm)
    y, m = int('20' + yymm[:2]), int(yymm[2:])
    # find the next month's first day
    if m == 12:
        next_y, next_m = y + 1, 1 
    else:
        next_y, next_m = y, m + 1
    next_m_first_day = datetime.datetime(next_y, next_m, 1, 0)
    cur_m_last_day = next_m_first_day - datetime.timedelta(days=1)
    dd = '%2d' % cur_m_last_day.day
    last_day_timestamp = time.mktime(cur_m_last_day.timetuple())
    with open('%s/%s' % (logs_dir, fn), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_time = headers.index('time')
        with open('%s/log-last-day-%s%s.csv' % (log_last_day_dir, yymm, dd), 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            writer.writerow(headers)
            for row in reader:        
                t = eval(row[id_time])
                if t <= last_day_timestamp:
                    continue
                writer.writerow(row)
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
def process_file(fn):
    _, yymm = fn[:-len('.csv')].split('-')
    #
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not = {}, {}
    vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not = {}, {}
    if yymm not in ['0901', '1001', '1011']:
        path_to_last_day_csv_file = None
        temp_csv_files = get_all_files(log_last_day_dir, '', '.csv')
        prev_fn = None
        y, m = int(yymm[:2]), int(yymm[2:])
        prev_m = m - 1
        prev_yymm = '%02d%02d' % (y, prev_m)
        for temp_fn in temp_csv_files:
            if temp_fn.startswith('log-last-day-%s' % prev_yymm):
                prev_fn = temp_fn
                break
        assert prev_fn, yymm
        path_to_last_day_csv_file = '%s/%s' % (log_last_day_dir, prev_fn)
        vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not, vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not = \
                        record_crossing_time(path_to_last_day_csv_file, vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not,
                                             vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not)
    path_to_csv_file = '%s/%s' % (logs_dir, fn)
    vehicle_ap_crossing_time_from_out_to_in, _, vehicle_ns_crossing_time_from_out_to_in, _ = \
            record_crossing_time(path_to_csv_file, vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not,
                                 vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not)
    #
    save_pickle_file('%s/ap-crossing-time-%s.pkl' % (logs_dir, yymm),
                     vehicle_ap_crossing_time_from_out_to_in)
    save_pickle_file('%s/ns-crossing-time-%s.pkl' % (logs_dir, yymm),
                     vehicle_ns_crossing_time_from_out_to_in)
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
def process_files(yymm):
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    trip_df = pd.read_csv('%s/%s%s.csv' % (trips_dir, trip_prefix, yymm))
    #
    yyyy, mm = 2000 + int(yymm[:2]), int(yymm[2:])
    dd, hh = 1, 0
    cur_day_time = datetime.datetime(yyyy, mm, dd, hh)
    if mm == 12:
        next_yyyy, next_mm = yyyy + 1, 1
    else:
        next_yyyy, next_mm = yyyy, mm + 1
    last_day_time = datetime.datetime(next_yyyy, next_mm, dd, hh)
    #
    st_label = 'start-time'
    ap_tm_lable, ns_tm_lable = 'ap-trip-mode', 'ns-trip-mode' 
    dur_lable, fare_label = 'duration', 'fare'
    #
    ap_tm = [DInAP_PInAP, DInAP_POutAP, DOutAP_PInAP, DOutAP_POutAP]
    ns_tm = [DInNS_PInNS, DInNS_POutNS, DOutNS_PInNS, DOutNS_POutNS]
    #
    while cur_day_time != last_day_time:
        next_day_time = cur_day_time + datetime.timedelta(hours=1)
        st_timestamp, et_timestamp = time.mktime(cur_day_time.timetuple()), time.mktime(next_day_time.timetuple())
        #
        yyyy, mm, dd, hh = cur_day_time.year, cur_day_time.month, cur_day_time.day, cur_day_time.hour
        #    
        filtered_trip = trip_df[(st_timestamp <= trip_df[st_label]) & (trip_df[st_label] < et_timestamp)]
        #
        gp_f_trip = filtered_trip.groupby([ap_tm_lable])
        tm_num_totalDuration_totalFare = [[tm, 0, 0, 0] for tm in ap_tm]
        tm_num_df = gp_f_trip.count()[fare_label].to_frame('total_tm_num').reset_index()
        for tm, num in tm_num_df.values:
            tm_num_totalDuration_totalFare[tm][1] += num
        tm_dur_df = gp_f_trip.sum()[dur_lable].to_frame('total_tm_dur').reset_index()
        for tm, dur in tm_dur_df.values:
            tm_num_totalDuration_totalFare[tm][2] += dur
        tm_fare_df = gp_f_trip.sum()[fare_label].to_frame('total_tm_fare').reset_index()
        for tm, fare in tm_fare_df.values:
            tm_num_totalDuration_totalFare[tm][3] += fare
        save_as_csv(ap_fn, yymm, dd, hh, tm_num_totalDuration_totalFare)
        #
        gp_f_trip = filtered_trip.groupby([ns_tm_lable])
        tm_num_totalDuration_totalFare = [[tm, 0, 0, 0] for tm in ns_tm]
        tm_num_df = gp_f_trip.count()[fare_label].to_frame('total_tm_num').reset_index()
        for tm, num in tm_num_df.values:
            tm_num_totalDuration_totalFare[tm][1] += num
        tm_dur_df = gp_f_trip.sum()[dur_lable].to_frame('total_tm_dur').reset_index()
        for tm, dur in tm_dur_df.values:
            tm_num_totalDuration_totalFare[tm][2] += dur
        tm_fare_df = gp_f_trip.sum()[fare_label].to_frame('total_tm_fare').reset_index()
        for tm, fare in tm_fare_df.values:
            tm_num_totalDuration_totalFare[tm][3] += fare
        save_as_csv(ns_fn, yymm, dd, hh, tm_num_totalDuration_totalFare)
        #
        cur_day_time = next_day_time
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
Esempio n. 7
0
def process_file(fn):
    _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm 
    logging_msg('handle the file; %s' % yymm)
    with open('%s/%s' % (merged_trip_dir, fn), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        #
        id_tid, id_vid, id_did = headers.index('trip-id'), headers.index('vehicle-id'), headers.index('driver-id')
        id_st, id_et = headers.index('start-time'), headers.index('end-time')
        id_dur, id_fare = headers.index('duration'), headers.index('fare')
        id_s_long, id_s_lat = headers.index('start-long'), headers.index('start-lat')
        id_e_long, id_e_lat = headers.index('end-long'), headers.index('end-lat')
        #
        vehicle_prev_trip_position_time = {}
        with open('%s/whole-trip-%s.csv' % (trips_dir, yymm), 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            new_headers = ['tid', 'vid', 'did', 'start-time', 'end-time', 'duration', 'fare', 'ap-trip-mode', 'ns-trip-mode', 'prev-trip-end-time']
            writer.writerow(new_headers)
            for row in reader:
                vid = row[id_vid]
                start_time, end_time = eval(row[id_st]), eval(row[id_et]),
                s_long, s_lat = eval(row[id_s_long]), eval(row[id_s_lat])
                e_long, e_lat = eval(row[id_e_long]), eval(row[id_e_lat])
                #
                c_start_ter, c_end_ter = check_terminal_num(s_long, s_lat), check_terminal_num(e_long, e_lat)
                c_sl_ns, c_el_ns = is_in_night_safari(s_long, s_lat), is_in_night_safari(e_long, e_lat) 
                #
                if not vehicle_prev_trip_position_time.has_key(vid):
                    # ASSUMPTION
                    # If this trip is the driver's first trip in a month,
                    # let's assume that the previous trip occurred out of the airport and out of the night safari
                    # and also assume that the previous trip's end time is the current trip's start time 
                    # -1 represents out of airport zone
                    vehicle_prev_trip_position_time[vid] = (-1, OUT_NS, start_time)
                prev_trip_end_ter, prev_trip_end_loc_ns, prev_trip_time = vehicle_prev_trip_position_time[vid]
                ap_trip_mode, ns_trip_mode = None, None
                if prev_trip_end_ter != -1 and c_start_ter != -1 : ap_trip_mode = DInAP_PInAP
                elif prev_trip_end_ter != -1 and c_start_ter == -1: ap_trip_mode = DInAP_POutAP
                elif prev_trip_end_ter == -1 and c_start_ter != -1: ap_trip_mode = DOutAP_PInAP
                elif prev_trip_end_ter == -1 and c_start_ter == -1: ap_trip_mode = DOutAP_POutAP
                else: assert False
                #
                if prev_trip_end_loc_ns == IN_NS and c_sl_ns == IN_NS: ns_trip_mode = DInNS_PInNS
                elif prev_trip_end_loc_ns == IN_NS and c_sl_ns == OUT_NS: ns_trip_mode = DInNS_POutNS
                elif prev_trip_end_loc_ns == OUT_NS and c_sl_ns == IN_NS: ns_trip_mode = DOutNS_PInNS
                elif prev_trip_end_loc_ns == OUT_NS and c_sl_ns == OUT_NS: ns_trip_mode = DOutNS_POutNS   
                else: assert False
                
                new_row = [row[id_tid], vid, row[id_did],
                           start_time, end_time,
                           row[id_dur], row[id_fare],
                           ap_trip_mode, ns_trip_mode, prev_trip_time]
                writer.writerow(new_row)
                #
                vehicle_prev_trip_position_time[vid] = (c_end_ter, c_el_ns, end_time)
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
def run():
    csv_files = get_all_files(airport_trips_dir, 'airport-trip-', '.csv')
    init_multiprocessor()
    count_num_jobs = 0
    for fn in csv_files:
        try:
            put_task(process_file, [fn])
        except Exception as _:
            logging_msg('Algorithm runtime exception (%s)\n' % (fn) + format_exc())
            raise
        count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
def process_files(yymm):
    print "handle the file; %s" % yymm
    logging_msg("handle the file; %s" % yymm)
    #
    init_csv_files(yymm)
    #
    full_dids = sorted(
        [eval(x) for x in load_picle_file("%s/%s%s.pkl" % (full_shift_dir, monthly_full_did_prefix, yymm))]
    )
    s_df = pd.read_csv("%s/%s%s.csv" % (full_shift_dir, sh_full_prefix, yymm))
    trip_df = pd.read_csv("%s/%s%s.csv" % (trips_dir, trip_prefix, yymm))
    ap_trip_df = pd.read_csv("%s/%s%s.csv" % (airport_trips_dir, ap_trip_op_ep_prefix, yymm))
    #
    yy, mm = int(yymm[:2]), int(yymm[2:])
    for did in full_dids:
        # General
        did_sh = s_df[(s_df["driver-id"] == did)]
        pro_dur = sum(did_sh["productive-duration"]) * SEC
        did_wt = trip_df[(trip_df["did"] == did)]
        total_fare = sum(did_wt["fare"])
        if pro_dur > 0 and total_fare != 0:
            total_prod = total_fare / pro_dur
            with open("%s/%s%s.csv" % (individual_detail_dir, general_prefix, yymm), "a") as w_csvfile:
                writer = csv.writer(w_csvfile)
                writer.writerow([yy, mm, did, pro_dur, total_fare, total_prod])
        #
        did_ap = ap_trip_df[(ap_trip_df["did"] == did)]
        prev_in_ap_trip = did_ap[(did_ap["trip-mode"] == DInAP_PInAP)]
        prev_out_ap_trip = did_ap[(did_ap["trip-mode"] == DOutAP_PInAP)]
        #
        if len(did_ap) != 0:
            # prev in ap trip
            ap_qu, ap_dur = sum(prev_in_ap_trip["queue-time"]), sum(prev_in_ap_trip["duration"])
            ap_fare = sum(prev_in_ap_trip["fare"])
            ap_op_cost, ap_eco_profit = sum(prev_in_ap_trip["op-cost"]), sum(prev_in_ap_trip["economic"])
            if ap_qu + ap_dur > 0 and ap_fare != 0:
                ap_prod = ap_fare / (ap_qu + ap_dur)
                with open("%s/%s%s.csv" % (individual_detail_dir, prev_in_ap_prefix, yymm), "a") as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    writer.writerow([yy, mm, did, ap_qu, ap_dur, ap_fare, ap_prod, ap_op_cost, ap_eco_profit])
            #
            # prev out ap trip
            ap_qu, ap_dur = sum(prev_out_ap_trip["queue-time"]), sum(prev_out_ap_trip["duration"])
            ap_fare = sum(prev_out_ap_trip["fare"])
            ap_op_cost, ap_eco_profit = sum(prev_out_ap_trip["op-cost"]), sum(prev_out_ap_trip["economic"])
            if ap_qu + ap_dur > 0 and ap_fare != 0:
                ap_prod = ap_fare / (ap_qu + ap_dur)
                with open("%s/%s%s.csv" % (individual_detail_dir, prev_out_ap_prefix, yymm), "a") as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    writer.writerow([yy, mm, did, ap_qu, ap_dur, ap_fare, ap_prod, ap_op_cost, ap_eco_profit])
    print "End the file; %s" % yymm
    logging_msg("End the file; %s" % yymm)
Esempio n. 10
0
def run():
    remove_creat_dir(full_shift_dir)
    csv_files = get_all_files(shifts_dir, '', '.csv')
    init_multiprocessor()
    count_num_jobs = 0
    for fn in csv_files:
        try:
            put_task(process_file, [fn])
        except Exception as _:
            logging_msg('Algorithm runtime exception (%s)\n' % (fn) + format_exc())
            raise
        count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
Esempio n. 11
0
def run():
    csv_files = get_all_files(airport_trips_dir, 'airport-trip-', '.csv')
    init_multiprocessor()
    count_num_jobs = 0
    for fn in csv_files:
        try:
            put_task(process_file, [fn])
        except Exception as _:
            logging_msg('Algorithm runtime exception (%s)\n' % (fn) +
                        format_exc())
            raise
        count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
def run():
    remove_creat_dir(individual_detail_dir)
    #     process_files('1007')
    init_multiprocessor()
    count_num_jobs = 0
    for y in xrange(9, 11):
        for m in xrange(1, 13):
            try:
                yymm = "%02d%02d" % (y, m)
                if yymm in ["0912", "1010"]:
                    continue
                put_task(process_files, [yymm])
            except Exception as _:
                logging_msg("Algorithm runtime exception (%s)\n" % (yymm) + format_exc())
                raise
            count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
Esempio n. 13
0
def process_file(fn):
    _, _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    new_fn = '%s/hourly-summary-%s.csv' % (hourly_summary, yymm)
    with open(new_fn, 'wt') as w_csvfile:
        writer = csv.writer(w_csvfile)
        headers = [
            'yy', 'mm', 'dow', 'hh', 'trip-mode', 'total-num', 'total-fare'
        ]
        writer.writerow(headers)
    #
    trip_df = pd.read_csv('%s/%s' % (trips_dir, fn))

    yyyy, mm = 2000 + int(yymm[:2]), int(yymm[2:])
    dd, hh = 1, 0
    cur_datetime = datetime.datetime(yyyy, mm, dd, hh)
    if mm == 12:
        next_yyyy, next_mm = yyyy + 1, 1
    else:
        next_yyyy, next_mm = yyyy, mm + 1
    last_day_time = datetime.datetime(next_yyyy, next_mm, dd, hh)
    #
    st_label = 'start-time'
    fare_label = 'fare'
    tms = [DInAP_PInAP, DInAP_POutAP, DOutAP_PInAP, DOutAP_POutAP]
    while cur_datetime != last_day_time:
        next_datetime = cur_datetime + datetime.timedelta(hours=1)
        cur_timestamp, next_timestamp = time.mktime(
            cur_datetime.timetuple()), time.mktime(next_datetime.timetuple())
        filtered_trip = trip_df[(cur_timestamp <= trip_df[st_label])
                                & (trip_df[st_label] < next_timestamp)]
        #
        tm_grouped = filtered_trip.groupby(['trip-mode'], sort=True)
        yy, mm = yymm[:2], yymm[2:]
        dow, hh = cur_datetime.strftime("%a"), cur_datetime.hour
        with open(new_fn, 'a') as csvFile:
            writer = csv.writer(csvFile)
            tm_totalNum_totalFare = zip(tms,
                                        list(tm_grouped.count()[fare_label]),
                                        list(tm_grouped.sum()[fare_label]))
            for tm, totalNum, totalFare in tm_totalNum_totalFare:
                writer.writerow([yy, mm, dow, hh, tm, totalNum, totalFare])
        cur_datetime = next_datetime
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
Esempio n. 14
0
def process_files(yymm):
    Qsa_value, state_action_fare_dur = load_picle_file('%s/ALPHA-0.10-GAMMA-0.50/ALPHA-0.10-GAMMA-0.50-q-value-fare-dur-%s.pkl'%(for_learning_dir,yymm))
    argmax_as = {}
    for s1 in DAY_OF_WEEK:
        for s2 in TIME_SLOTS:
            for s3 in [IN_AP, OUT_AP]:
                argmax_as[(s1, s2, s3)] = IN_AP if Qsa_value[(s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else OUT_AP
    #
    whole_rev, sub_rev = 0, 0
    whole_count, sub_count = 0,0
    count = 0        
    with open('%s/diff-pin-eco-extreme-drivers-trip-%s.csv' % (for_full_driver_dir, yymm), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_prev_tetime, id_prev_teloc = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location')
        id_stime, id_sloc = headers.index('start-time'), headers.index('start-location'),
        id_dur, id_fare = headers.index('duration'), headers.index('fare')
        for row in reader:
            prev_tetime, stime = eval(row[id_prev_tetime]), eval(row[id_stime]) 
            setup_time = stime - prev_tetime
            #
            prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime)
            s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour
            s3 = row[id_prev_teloc]
            #
            a = row[id_sloc]
            #
            dur, fare = eval(row[id_dur]), eval(row[id_fare])
            alter_a = OUT_AP if a == IN_AP else IN_AP
            if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0:
                op_cost = 0
            else:
                op_cost = (setup_time + dur) * state_action_fare_dur[(s1, s2, s3, alter_a)][0] / state_action_fare_dur[(s1, s2, s3, alter_a)][1] 
            economic_profit = fare - op_cost
            #
            whole_rev += economic_profit
            whole_count += 1
            if argmax_as[(s1, s2, s3)] == a:
                sub_rev += economic_profit
                sub_count += 1
            count += 1
            if count % MOD_STAN == 0:
                print '%s, %d' % (yymm, count)
                logging_msg('%s, %d' % (yymm, count))
    save_pickle_file('%s/comparision-%s.pkl'%(for_full_driver_dir, yymm), [whole_rev, whole_count, sub_rev, sub_count])
Esempio n. 15
0
def run():
    remove_creat_dir(individual_detail_dir)
    #     process_files('1007')
    init_multiprocessor()
    count_num_jobs = 0
    for y in xrange(9, 11):
        for m in xrange(1, 13):
            try:
                yymm = '%02d%02d' % (y, m)
                if yymm in ['0912', '1010']:
                    continue
                put_task(process_files, [yymm])
            except Exception as _:
                logging_msg('Algorithm runtime exception (%s)\n' % (yymm) +
                            format_exc())
                raise
            count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
Esempio n. 16
0
def process_file(fn):
    _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm 
    logging_msg('handle the file; %s' % yymm)
    with open('%s/%s' % (merged_trip_dir, fn), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        #
        id_vid = headers.index('vehicle-id')
        id_st, id_et = headers.index('start-time'), headers.index('end-time')
        id_dur, id_fare = headers.index('duration'), headers.index('fare')
        id_s_long, id_s_lat = headers.index('start-long'), headers.index('start-lat')
        id_e_long, id_e_lat = headers.index('end-long'), headers.index('end-lat')
        #
        vehicle_prev_trip_position_time = {}
        with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm), 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            new_headers = ['prev-trip-end-time', 'prev-trip-end-location', 'start-time', 'start-location', 'end-time', 'end-location', 'duration', 'fare']
            writer.writerow(new_headers)
            for row in reader:
                vid = row[id_vid]
                start_time, end_time = eval(row[id_st]), eval(row[id_et]),
                s_long, s_lat = eval(row[id_s_long]), eval(row[id_s_lat])
                e_long, e_lat = eval(row[id_e_long]), eval(row[id_e_lat])
                s_location, e_location = is_in_airport(s_long, s_lat), is_in_airport(e_long, e_lat)
                #
                if not vehicle_prev_trip_position_time.has_key(vid):
                    # ASSUMPTION
                    # If this trip is the driver's first trip in a month,
                    # let's assume that the previous trip occurred out of the airport
                    # and also assume that the previous trip's end time is the current trip's start time 
                    vehicle_prev_trip_position_time[vid] = (OUT_AP, start_time)
                prev_trip_end_location, prev_trip_end_time = vehicle_prev_trip_position_time[vid]
                #
                new_row = [prev_trip_end_time, prev_trip_end_location, 
                           start_time, s_location,
                           end_time, e_location,
                           row[id_dur], row[id_fare]]
                writer.writerow(new_row)
                #
                vehicle_prev_trip_position_time[vid] = (e_location, end_time)
    print 'end the file; %s' % yymm 
    logging_msg('end the file; %s' % yymm)
def process_file(fn):
    _, _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm 
    logging_msg('handle the file; %s' % yymm)
    new_fn = '%s/hourly-summary-%s.csv' % (hourly_summary, yymm)
    with open(new_fn, 'wt') as w_csvfile:
        writer = csv.writer(w_csvfile)
        headers = ['yy', 'mm', 'dow', 'hh', 'trip-mode', 'total-num', 'total-fare']
        writer.writerow(headers)
    #
    trip_df = pd.read_csv('%s/%s' % (trips_dir, fn))
    
    yyyy, mm = 2000 + int(yymm[:2]), int(yymm[2:])
    dd, hh = 1, 0
    cur_datetime = datetime.datetime(yyyy, mm, dd, hh)
    if mm == 12:
        next_yyyy, next_mm = yyyy + 1, 1
    else:
        next_yyyy, next_mm = yyyy, mm + 1
    last_day_time = datetime.datetime(next_yyyy, next_mm, dd, hh)
    #
    st_label = 'start-time'
    fare_label = 'fare'
    tms = [DInAP_PInAP, DInAP_POutAP, DOutAP_PInAP, DOutAP_POutAP]
    while cur_datetime != last_day_time:
        next_datetime = cur_datetime + datetime.timedelta(hours=1)
        cur_timestamp, next_timestamp = time.mktime(cur_datetime.timetuple()), time.mktime(next_datetime.timetuple())
        filtered_trip = trip_df[(cur_timestamp <= trip_df[st_label]) & (trip_df[st_label] < next_timestamp)]
        #
        tm_grouped = filtered_trip.groupby(['trip-mode'], sort=True)
        yy, mm = yymm[:2], yymm[2:]
        dow, hh = cur_datetime.strftime("%a"), cur_datetime.hour 
        with open(new_fn, 'a') as csvFile:
            writer = csv.writer(csvFile)
            tm_totalNum_totalFare = zip(tms, list(tm_grouped.count()[fare_label]), list(tm_grouped.sum()[fare_label]))
            for tm, totalNum, totalFare in tm_totalNum_totalFare:
                writer.writerow([yy, mm, dow, hh, tm, totalNum, totalFare])
        cur_datetime = next_datetime
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
def process_file(path_to_csv_file):
    print path_to_csv_file
    ori_log_fn = path_to_csv_file.split('/')[-1]
    _, yymm, _ = ori_log_fn.split('-')
    print 'handle the file; %s' % yymm 
    logging_msg('handle the file; %s' % yymm)
    with open(path_to_csv_file, 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_time, id_vid, id_did = headers.index('time'), headers.index('vehicle-id'), headers.index('driver-id')
        index_long, index_lat = headers.index('longitude'), headers.index('latitude')
        with open('%s/log-%s.csv' % (logs_dir, yymm), 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            new_headers = ['time', 'vid', 'did', 'ap-or-not', 'np-or-not']
            writer.writerow(new_headers)
            #
            for row in reader:        
                ap_or_not = is_in_airport(eval(row[index_long]), eval(row[index_lat]))
                np_or_not = is_in_night_safari(eval(row[index_long]), eval(row[index_lat]))
                new_row = [row[id_time], row[id_vid], row[id_did], ap_or_not, np_or_not]
                writer.writerow(new_row)
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
Esempio n. 19
0
def process_file(fn):
    _, _, _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    #        
    driver_vehicle = {}
    productive_state = ['dur%d' % x for x in [0, 3, 4, 5, 6, 7, 8, 9, 10]]
    with open('%s/%s' % (shifts_dir, fn), 'rt') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        hid = {h : i for i, h in enumerate(headers)}
        with open('%s/shift-pro-dur-%s.csv' % (shift_pro_dur_dir, yymm), 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            new_headers = ['yy', 'mm', 'dd', 'hh', 'vid', 'did', 'pro-dur']
            writer.writerow(new_headers)
            for row in reader:
                vid, did = row[hid['vehicle-id']], row[hid['driver-id']]
                productive_duration = sum(int(row[hid[dur]]) for dur in productive_state)
                writer.writerow([row[hid['year']][-2:], row[hid['month']], row[hid['day']], row[hid['hour']],
                                 vid, did, productive_duration])
                driver_vehicle.setdefault(vid, set()).add(did)
    save_pickle_file('%s/driver-vehicle-%s.pkl' % (shifts_dir, yymm), driver_vehicle)
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
def process_files(yymm):
    old_time = time.time()
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    begin_timestamp = datetime.datetime(2009, 1, 1, 0)
    last_timestamp = datetime.datetime(2011, 2, 1, 0)
    hourly_total, time_period_order = {}, []
    while begin_timestamp < last_timestamp:
        yyyy, mm, dd, hh = begin_timestamp.year, begin_timestamp.month, begin_timestamp.day, begin_timestamp.hour
        k = (yyyy, mm, dd, hh)
        hourly_total[k] = [0 for _ in range(len([NS_DUR, NS_FARE, NS_QUEUE]))]
        time_period_order.append(k)
        begin_timestamp += datetime.timedelta(hours=1)
    #
    st_label, et_label, dur_label, fare_label = 'start-time', 'end-time', 'duration', 'fare'
    ns_qt_label = 'ns-queue-time'
    # Night Safari fare, duration and queue time
    with open('%s/%s%s.csv' % (nightsafari_trips_dir, ns_trip_prefix, yymm),
              'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        hid = {h: i for i, h in enumerate(headers)}
        for row in reader:
            st_ts, et_ts = eval(row[hid[st_label]]), eval(row[hid[et_label]])
            dur, fare = eval(row[hid[dur_label]]), eval(row[hid[fare_label]])
            ns_qt = eval(row[hid[ns_qt_label]])
            #
            st_dt, et_dt = datetime.datetime.fromtimestamp(
                st_ts), datetime.datetime.fromtimestamp(et_ts)
            # Duration and queue time
            if st_dt.hour == et_dt.hour:
                hourly_total[(st_dt.year, st_dt.month, st_dt.day,
                              st_dt.hour)][NS_DUR] += dur
                hourly_total[(st_dt.year, st_dt.month, st_dt.day,
                              st_dt.hour)][NS_FARE] += fare
            else:
                next_ts_dt = datetime.datetime(
                    st_dt.year, st_dt.month, st_dt.day,
                    st_dt.hour) + datetime.timedelta(hours=1)
                tg_year, tg_month, tg_day, tg_hour = \
                        next_ts_dt.year, next_ts_dt.month, next_ts_dt.day, next_ts_dt.hour
                tg_dt = datetime.datetime(tg_year, tg_month, tg_day, tg_hour)
                tg_ts = time.mktime(tg_dt.timetuple())
                dur_within_slot = tg_ts - st_ts
                prop = dur_within_slot / dur
                hourly_total[(st_dt.year, st_dt.month, st_dt.day,
                              st_dt.hour)][NS_DUR] += dur_within_slot
                hourly_total[(et_dt.year, et_dt.month, et_dt.day,
                              et_dt.hour)][NS_FARE] += fare * prop
                while True:
                    if tg_dt.hour == et_dt.hour:
                        dur_within_slot = et_ts - tg_ts
                        prop = dur_within_slot / dur
                        hourly_total[(et_dt.year, et_dt.month, et_dt.day,
                                      et_dt.hour)][NS_DUR] += dur_within_slot
                        hourly_total[(et_dt.year, et_dt.month, et_dt.day,
                                      et_dt.hour)][NS_FARE] += fare * prop
                        break
                    prop = HOUR / dur
                    hourly_total[(tg_dt.year, tg_dt.month, tg_dt.day,
                                  tg_dt.hour)][NS_DUR] += HOUR
                    hourly_total[(tg_dt.year, tg_dt.month, tg_dt.day,
                                  tg_dt.hour)][NS_DUR] += fare * prop
                    tg_dt += datetime.timedelta(hours=1)
            # Queue time
            if ns_qt < Q_LIMIT_MIN:
                ns_qt = Q_LIMIT_MIN
            q_jt_ts = st_ts - ns_qt
            q_jt_dt = datetime.datetime.fromtimestamp(q_jt_ts)
            if q_jt_dt.hour == st_dt.hour:
                hourly_total[(st_dt.year, st_dt.month, st_dt.day,
                              st_dt.hour)][NS_QUEUE] += ns_qt
            else:
                next_ts_dt = datetime.datetime(
                    q_jt_dt.year, q_jt_dt.month, q_jt_dt.day,
                    q_jt_dt.hour) + datetime.timedelta(hours=1)
                tg_year, tg_month, tg_day, tg_hour = \
                        next_ts_dt.year, next_ts_dt.month, next_ts_dt.day, next_ts_dt.hour
                tg_dt = datetime.datetime(tg_year, tg_month, tg_day, tg_hour)
                tg_ts = time.mktime(tg_dt.timetuple())
                hourly_total[(q_jt_dt.year, q_jt_dt.month, q_jt_dt.day,
                              q_jt_dt.hour)][NS_QUEUE] += tg_ts - q_jt_ts
                while True:
                    if tg_dt.hour == st_dt.hour:
                        hourly_total[(st_dt.year, st_dt.month, st_dt.day,
                                      st_dt.hour)][NS_QUEUE] += st_ts - tg_ts
                        break
                    hourly_total[(tg_dt.year, tg_dt.month, tg_dt.day,
                                  tg_dt.hour)][NS_QUEUE] += HOUR
                    tg_dt += datetime.timedelta(hours=1)
            if (time.time() - old_time) > TIME_ALARM == 0:
                old_time = time.time()
                print 'handling; %s' % yymm
                logging_msg('handling; %s' % yymm)
    with open(
            '%s/%s%s.csv' %
        (ns_dur_fare_q_time_dir, ns_dur_fare_q_time_prefix, yymm),
            'wt') as w_csvfile:
        writer = csv.writer(w_csvfile)
        header = [
            'yy', 'mm', 'dd', 'hh', 'ns-duration', 'ns-fare', 'ns-queue-time'
        ]
        writer.writerow(header)
        for yyyy, mm, dd, hh in time_period_order:
            ns_dur, ns_fare, ns_qt = hourly_total[(yyyy, mm, dd, hh)]
            writer.writerow([yyyy - 2000, mm, dd, hh, ns_dur, ns_fare, ns_qt])
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
def process_file(fn):
    _, _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm 
    logging_msg('handle the file; %s' % yymm)
    with open('%s/%s' % (airport_trips_dir, fn), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        
        id_tid, id_vid, id_did = headers.index('tid'), headers.index('vid'), headers.index('did')
        id_st, id_et, id_dur = headers.index('start-time'), headers.index('end-time'), headers.index('duration')
        id_fare = headers.index('fare')
        id_tm, id_pt_et = headers.index('trip-mode'), headers.index('prev-trip-end-time')
        id_jqt, id_qt = headers.index('join-queue-time'), headers.index('queue-time')
        
        with open('%s/ap-trip-op-ep-%s.csv' % (airport_trips_dir, yymm), 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            new_headers = ['tid', 'vid', 'did', 'start-time', 'end-time', 'duration', 'fare', 'trip-mode', 'prev-trip-end-time', 'join-queue-time', 'queue-time',
                           'op-cost', 'economic', 'yy', 'mm', 'dd', 'hh']
            writer.writerow(new_headers)
            for row in reader:
                jqt, st, et = eval(row[id_jqt]), eval(row[id_st]), eval(row[id_et])
                dur, fare = eval(row[id_dur]), eval(row[id_fare]) 
                if st - jqt < Q_LIMIT_MIN:
                    qt = Q_LIMIT_MIN
                elif Q_LIMIT_MAX < st - jqt:
                    qt = Q_LIMIT_MAX
                else:
                    qt = st - jqt
                modi_jqt = st - qt
                jqt_datetime = datetime.datetime.fromtimestamp(modi_jqt)
                st_datetime = datetime.datetime.fromtimestamp(st)
                et_datetime = datetime.datetime.fromtimestamp(et)
                op_cost = 0
                st_yyyy, st_mm, st_dd, st_hh = st_datetime.year, st_datetime.month, st_datetime.day, st_datetime.hour
                if jqt_datetime.hour == st_datetime.hour:
                    try:
                        op_cost_per_sec = op_costs[(st_yyyy, st_mm, st_dd, st_hh)]
                    except KeyError:
                        alternative_datetime = st_datetime - datetime.timedelta(hours=1)
                        a_yyyy, a_mm, a_dd, a_hh = \
                        alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour
                        op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)]
                    op_cost += qt * op_cost_per_sec
                else:
                    tp = datetime.datetime(st_datetime.year, st_datetime.month, st_datetime.day, st_datetime.hour)
                    tp_timestamp = time.mktime(tp.timetuple())
                    p_jqt_st = (tp_timestamp - modi_jqt) / qt
                    prev_dt = st_datetime - datetime.timedelta(hours=1)
                    try:
                        op_cost_per_sec = op_costs[(prev_dt.year, prev_dt.month, prev_dt.day, prev_dt.hour)]
                    except KeyError:
                        alternative_datetime = prev_dt - datetime.timedelta(hours=1)
                        a_yyyy, a_mm, a_dd, a_hh = \
                        alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour
                        op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)]
                    op_cost += op_cost_per_sec * qt * p_jqt_st
                    #
                    try:
                        op_cost_per_sec = op_costs[(st_yyyy, st_mm, st_dd, st_hh)]
                    except KeyError:
                        alternative_datetime = st_datetime - datetime.timedelta(hours=1)
                        a_yyyy, a_mm, a_dd, a_hh = \
                        alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour
                        op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)]
                    op_cost += op_cost_per_sec * qt * (1 - p_jqt_st)
                if st_datetime.hour == et_datetime.hour:
                    try:
                        op_cost_per_sec = op_costs[(st_yyyy, st_mm, st_dd, st_hh)]
                    except KeyError:
                        alternative_datetime = st_datetime - datetime.timedelta(hours=1)
                        a_yyyy, a_mm, a_dd, a_hh = \
                        alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour
                        op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)]
                    op_cost += dur * op_cost_per_sec
                else:
                    # This part don't regards cases when duration is more than a hour
                    tp = datetime.datetime(et_datetime.year, et_datetime.month, et_datetime.day, et_datetime.hour)
                    tp_timestamp = time.mktime(tp.timetuple())
                    p_st_et = (tp_timestamp - st) / dur
                    next_dt = st_datetime + datetime.timedelta(hours=1)
                    try:
                        op_cost_per_sec = op_costs[(st_yyyy, st_mm, st_dd, st_hh)]
                    except KeyError:
                        alternative_datetime = st_datetime - datetime.timedelta(hours=1)
                        a_yyyy, a_mm, a_dd, a_hh = \
                        alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour
                        op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)]
                    op_cost += op_cost_per_sec * qt * p_st_et
                    #
                    try:
                        op_cost_per_sec = op_costs[(next_dt.year, next_dt.month, next_dt.day, next_dt.hour)]
                    except KeyError:
                        alternative_datetime = next_dt - datetime.timedelta(hours=1)
                        a_yyyy, a_mm, a_dd, a_hh = \
                        alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour
                        op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)]
                    op_cost += op_cost_per_sec * qt * (1 - p_st_et)
                economic_profit = fare - op_cost
                #
                writer.writerow([row[id_tid], row[id_vid], row[id_did],
                                st, et, dur, fare,
                                row[id_tm], row[id_pt_et],
                                modi_jqt, qt,
                                op_cost, economic_profit,
                                st_yyyy, st_mm, st_dd, st_hh])
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
Esempio n. 22
0
def process_file(ALPHA, GAMMA, ALPHA_GAMMA_dir, yymm):            
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    #
    print yymm
    if yymm == '0901':
        prev_yymm = None
    elif yymm == '1001':
        prev_yymm = '0911'
    elif yymm == '1011':
        prev_yymm = '1009'
    else:
        yy, mm = int(yymm[:2]), int(yymm[2:]) 
        prev_yymm = '%02d%02d' % (yy, mm - 1)
    #
    if not prev_yymm:
        Qsa_value, state_action_fare_dur = {}, {}
        locations = [IN_AP, OUT_AP]
        actions = [IN_AP, OUT_AP]
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in locations:
                    for a in actions:
                        Qsa_value[(s1, s2, s3, a)] = 0
                        state_action_fare_dur[(s1, s2, s3, a)] = [0, 0]
    else:
        Qsa_value, state_action_fare_dur = load_picle_file('%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, prev_yymm))
    #
    
    with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_prev_tetime, id_prev_teloc = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location')
        id_stime, id_sloc = headers.index('start-time'), headers.index('start-location'),
        id_etime, id_eloc = headers.index('end-time'), headers.index('end-location'),
        id_dur, id_fare = headers.index('duration'), headers.index('fare')
        #
        count = 0
        for row in reader:
            prev_tetime, stime, etime = eval(row[id_prev_tetime]), eval(row[id_stime]), eval(row[id_etime]) 
            setup_time = stime - prev_tetime
            #
            if setup_time < 0 or HOUR * 2 < setup_time:
                continue 
            #
            prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime)
            s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour
            s3 = row[id_prev_teloc]
            #
            etime_datetime = datetime.datetime.fromtimestamp(etime)
            new_s1, new_s2 = etime_datetime.strftime("%a"), etime_datetime.hour
            new_s3 = row[id_eloc]
            #
            a = row[id_sloc]
            dur, fare = eval(row[id_dur]), eval(row[id_fare])
            #
            state_action_fare_dur[(s1, s2, s3, a)][0] += fare
            state_action_fare_dur[(s1, s2, s3, a)][1] += setup_time + dur
            #
            if Qsa_value[(new_s1, new_s2, new_s3, IN_AP)] > Qsa_value[(new_s1, new_s2, new_s3, OUT_AP)] :
                future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3, IN_AP)]
            else:
                future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3, OUT_AP)]
            #
            alter_a = OUT_AP if a == IN_AP else IN_AP
            if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0:
                op_cost = 0
            else:
                op_cost = (setup_time + dur) * state_action_fare_dur[(s1, s2, s3, alter_a)][0] / state_action_fare_dur[(s1, s2, s3, alter_a)][1] 
            qrs = fare - op_cost + GAMMA * future_max_q_value
            Qsa_value[(s1, s2, s3, a)] = \
                        (1 - ALPHA) * Qsa_value[(s1, s2, s3, a)] + ALPHA * qrs
            count += 1
            if count % MOD_STAN == 0:
                print '%s, %d' % (yymm, count)
                logging_msg('%s, %d' % (yymm, count))
                save_pickle_file('%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm), [Qsa_value, state_action_fare_dur])
        save_pickle_file('%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm), [Qsa_value, state_action_fare_dur])
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
Esempio n. 23
0
def process_files(yymm):
    old_time = time.time()
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    begin_timestamp = datetime.datetime(2009, 1, 1, 0) 
    last_timestamp = datetime.datetime(2011, 2, 1, 0)
    hourly_total, time_period_order = {}, []
    while begin_timestamp < last_timestamp:
        yyyy, mm, dd, hh = begin_timestamp.year, begin_timestamp.month, begin_timestamp.day, begin_timestamp.hour
        k = (yyyy, mm, dd, hh)
        hourly_total[k] = [0 for _ in range(len([GEN_DUR, GEN_FARE]))]
        time_period_order.append(k)
        begin_timestamp += datetime.timedelta(hours=1)
    #
    st_label, et_label, dur_label, fare_label = 'start-time', 'end-time', 'duration', 'fare'
    # Productive duration
    yyyy, mm = 2000 + int(yymm[:2]), int(yymm[2:])
    with open('%s/%s%s.csv' % (shift_pro_dur_dir, shift_pro_dur_prefix, yymm), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        hid = {h : i for i, h in enumerate(headers)}
        for row in reader:
            dd, hh = eval(row[hid['dd']]), eval(row[hid['hh']])
            hourly_total[(yyyy, mm, dd, hh)][GEN_DUR] += eval(row[hid['pro-dur']]) * 60  # unit change; Minute -> Second
            if (time.time() - old_time) > TIME_ALARM == 0:
                old_time = time.time()
                print 'handling; %s' % yymm
                logging_msg('handling; %s' % yymm)
    # Total fare
    with open('%s/%s%s.csv' % (trips_dir, trip_prefix, yymm), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        hid = {h : i for i, h in enumerate(headers)}
        for row in reader:
            st_ts, et_ts = eval(row[hid[st_label]]), eval(row[hid[et_label]])
            dur, fare = eval(row[hid[dur_label]]), eval(row[hid[fare_label]])
            #
            st_dt, et_dt = datetime.datetime.fromtimestamp(st_ts), datetime.datetime.fromtimestamp(et_ts)
            #
            if st_dt.hour == et_dt.hour: 
                hourly_total[(st_dt.year, st_dt.month,
                              st_dt.day, st_dt.hour)][GEN_FARE] += fare
            else:
                next_ts_dt = datetime.datetime(st_dt.year, st_dt.month, st_dt.day, st_dt.hour) + datetime.timedelta(hours=1)
                tg_year, tg_month, tg_day, tg_hour = \
                        next_ts_dt.year, next_ts_dt.month, next_ts_dt.day, next_ts_dt.hour
                tg_dt = datetime.datetime(tg_year, tg_month, tg_day, tg_hour)
                tg_ts = time.mktime(tg_dt.timetuple())
                prop = (tg_ts - st_ts) / dur
                hourly_total[(st_dt.year, st_dt.month,
                              st_dt.day, st_dt.hour)][GEN_FARE] += fare * prop
                while True:
                    if tg_dt.hour == et_dt.hour:
                        prop = (et_ts - tg_ts) / dur
                        hourly_total[(et_dt.year, et_dt.month,
                              et_dt.day, et_dt.hour)][GEN_FARE] += fare * prop
                        break
                    prop = HOUR / dur
                    hourly_total[(tg_dt.year, tg_dt.month,
                              tg_dt.day, tg_dt.hour)][GEN_FARE] += fare * prop
                    tg_dt += datetime.timedelta(hours=1)
            if (time.time() - old_time) > TIME_ALARM == 0:
                old_time = time.time()
                print 'handling; %s' % yymm
                logging_msg('handling; %s' % yymm)
    with open('%s/%s%s.csv' % (general_dur_fare_dir, general_dur_fare_prefix, yymm), 'wt') as w_csvfile:
        writer = csv.writer(w_csvfile)
        header = ['yy', 'mm', 'dd', 'hh', 'gen-duration', 'gen-fare']
        writer.writerow(header)
        for yyyy, mm, dd, hh in time_period_order:
            gen_dur, gen_fare = hourly_total[(yyyy, mm, dd, hh)] 
            writer.writerow([yyyy - 2000, mm, dd, hh, gen_dur, gen_fare])
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
def process_files(yymm):
    old_time = time.time()
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    begin_timestamp = datetime.datetime(2009, 1, 1, 0) 
    last_timestamp = datetime.datetime(2011, 2, 1, 0)
    hourly_total, time_period_order = {}, []
    while begin_timestamp < last_timestamp:
        yyyy, mm, dd, hh = begin_timestamp.year, begin_timestamp.month, begin_timestamp.day, begin_timestamp.hour
        k = (yyyy, mm, dd, hh)
        hourly_total[k] = [0 for _ in range(len([AP_DUR, AP_FARE, AP_QUEUE]))]
        time_period_order.append(k)
        begin_timestamp += datetime.timedelta(hours=1)
    #
    st_label, et_label, dur_label, fare_label = 'start-time', 'end-time', 'duration', 'fare'
    ap_qt_label = 'ap-queue-time'
    # Airport fare, duration and queue time
    with open('%s/%s%s.csv' % (airport_trips_dir, ap_trip_prefix, yymm), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        hid = {h : i for i, h in enumerate(headers)}
        for row in reader:
            st_ts, et_ts = eval(row[hid[st_label]]), eval(row[hid[et_label]])
            dur, fare = eval(row[hid[dur_label]]), eval(row[hid[fare_label]])
            ap_qt = eval(row[hid[ap_qt_label]])
            #
            st_dt, et_dt = datetime.datetime.fromtimestamp(st_ts), datetime.datetime.fromtimestamp(et_ts)
            # Duration and queue time
            if st_dt.hour == et_dt.hour: 
                hourly_total[(st_dt.year, st_dt.month,
                              st_dt.day, st_dt.hour)][AP_DUR] += dur
                hourly_total[(st_dt.year, st_dt.month,
                              st_dt.day, st_dt.hour)][AP_FARE] += fare
            else:
                next_ts_dt = datetime.datetime(st_dt.year, st_dt.month, st_dt.day, st_dt.hour) + datetime.timedelta(hours=1)
                tg_year, tg_month, tg_day, tg_hour = \
                        next_ts_dt.year, next_ts_dt.month, next_ts_dt.day, next_ts_dt.hour
                tg_dt = datetime.datetime(tg_year, tg_month, tg_day, tg_hour)
                tg_ts = time.mktime(tg_dt.timetuple())
                dur_within_slot = tg_ts - st_ts
                prop = dur_within_slot / dur
                hourly_total[(st_dt.year, st_dt.month,
                              st_dt.day, st_dt.hour)][AP_DUR] += dur_within_slot
                hourly_total[(et_dt.year, et_dt.month,
                              et_dt.day, et_dt.hour)][AP_FARE] += fare * prop
                while True:
                    if tg_dt.hour == et_dt.hour:
                        dur_within_slot = et_ts - tg_ts 
                        prop = dur_within_slot / dur
                        hourly_total[(et_dt.year, et_dt.month,
                              et_dt.day, et_dt.hour)][AP_DUR] += dur_within_slot
                        hourly_total[(et_dt.year, et_dt.month,
                              et_dt.day, et_dt.hour)][AP_FARE] += fare * prop      
                        break
                    prop = HOUR / dur
                    hourly_total[(tg_dt.year, tg_dt.month,
                              tg_dt.day, tg_dt.hour)][AP_DUR] += HOUR
                    hourly_total[(tg_dt.year, tg_dt.month,
                              tg_dt.day, tg_dt.hour)][AP_DUR] += fare * prop
                    tg_dt += datetime.timedelta(hours=1)
            # Queue time
            if ap_qt < Q_LIMIT_MIN:
                ap_qt = Q_LIMIT_MIN
            q_jt_ts = st_ts - ap_qt
            q_jt_dt = datetime.datetime.fromtimestamp(q_jt_ts)
            if q_jt_dt.hour == st_dt.hour: 
                hourly_total[(st_dt.year, st_dt.month,
                              st_dt.day, st_dt.hour)][AP_QUEUE] += ap_qt
            else:
                next_ts_dt = datetime.datetime(q_jt_dt.year, q_jt_dt.month, 
                                               q_jt_dt.day, q_jt_dt.hour) + datetime.timedelta(hours=1)
                tg_year, tg_month, tg_day, tg_hour = \
                        next_ts_dt.year, next_ts_dt.month, next_ts_dt.day, next_ts_dt.hour
                tg_dt = datetime.datetime(tg_year, tg_month, tg_day, tg_hour)
                tg_ts = time.mktime(tg_dt.timetuple())
                hourly_total[(q_jt_dt.year, q_jt_dt.month,
                              q_jt_dt.day, q_jt_dt.hour)][AP_QUEUE] += tg_ts - q_jt_ts
                while True:
                    if tg_dt.hour == st_dt.hour:
                        hourly_total[(st_dt.year, st_dt.month,
                              st_dt.day, st_dt.hour)][AP_QUEUE] += st_ts - tg_ts
                        break
                    hourly_total[(tg_dt.year, tg_dt.month,
                              tg_dt.day, tg_dt.hour)][AP_QUEUE] += HOUR
                    tg_dt += datetime.timedelta(hours=1)
            if (time.time() - old_time) > TIME_ALARM == 0:
                old_time = time.time()
                print 'handling; %s' % yymm
                logging_msg('handling; %s' % yymm)
    with open('%s/%s%s.csv' % (ap_dur_fare_q_time_dir, ap_dur_fare_q_time_prefix, yymm), 'wt') as w_csvfile:
        writer = csv.writer(w_csvfile)
        header = ['yy', 'mm', 'dd', 'hh', 'ap-duration', 'ap-fare', 'ap-queue-time']
        writer.writerow(header)
        for yyyy, mm, dd, hh in time_period_order:
            ap_dur, ap_fare, ap_qt = hourly_total[(yyyy, mm, dd, hh)] 
            writer.writerow([yyyy - 2000, mm, dd, hh, ap_dur, ap_fare, ap_qt])
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
def process_files(yymm, q_lerning_ended_dir):
    candi_pkl_files = []
    for dn in q_lerning_ended_dir:
        if os.path.exists('%s/%s/results-%s.pkl' %
                          (for_learning_dir, dn, yymm)):
            continue
        candi_pkl_files.append('%s/%s/%s-q-value-fare-dur-%s.pkl' %
                               (for_learning_dir, dn, dn, yymm))
    result_pkls = [
        os.path.dirname(pkl_path) + '/results-%s.pkl' % yymm
        for pkl_path in candi_pkl_files
    ]
    #
    list_argmax_as = []
    state_action_fare_dur = None
    for pkl_file_path in candi_pkl_files:
        Qsa_value, state_action_fare_dur = load_picle_file(pkl_file_path)
        argmax_as = {}
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in [IN_AP, OUT_AP]:
                    argmax_as[(s1, s2, s3)] = IN_AP if Qsa_value[(
                        s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3,
                                                          OUT_AP)] else OUT_AP
        list_argmax_as.append(argmax_as)
    #
    whole_rev, whole_count = 0, 0
    list_sub_rev, list_sub_count = [0 for _ in xrange(len(candi_pkl_files))], [
        0 for _ in xrange(len(candi_pkl_files))
    ]

    count = 0
    with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm),
              'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_prev_tetime, id_prev_teloc = headers.index(
            'prev-trip-end-time'), headers.index('prev-trip-end-location')
        id_stime, id_sloc = headers.index('start-time'), headers.index(
            'start-location'),
        id_dur, id_fare = headers.index('duration'), headers.index('fare')
        for row in reader:
            prev_tetime, stime = eval(row[id_prev_tetime]), eval(row[id_stime])
            setup_time = stime - prev_tetime
            #
            prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime)
            s1, s2 = prev_tetime_datetime.strftime(
                "%a"), prev_tetime_datetime.hour
            s3 = row[id_prev_teloc]
            #
            a = row[id_sloc]
            #
            dur, fare = eval(row[id_dur]), eval(row[id_fare])
            alter_a = OUT_AP if a == IN_AP else IN_AP
            if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0:
                op_cost = 0
            else:
                op_cost = (setup_time + dur) * state_action_fare_dur[
                    (s1, s2, s3, alter_a)][0] / state_action_fare_dur[
                        (s1, s2, s3, alter_a)][1]
            economic_profit = fare - op_cost
            #
            whole_rev += economic_profit
            whole_count += 1

            for i, argmax_as in enumerate(list_argmax_as):
                if argmax_as[(s1, s2, s3)] == a:
                    list_sub_rev[i] += economic_profit
                    list_sub_count[i] += 1
            count += 1
            if count % MOD_STAN == 0:
                print '%s, %d' % (yymm, count)
                logging_msg('%s, %d' % (yymm, count))
                for i in xrange(len(result_pkls)):
                    result_fn = result_pkls[i]
                    save_pickle_file(result_fn, [
                        whole_rev, whole_count, list_sub_rev[i],
                        list_sub_count[i]
                    ])
    for i in xrange(len(result_pkls)):
        result_fn = result_pkls[i]
        save_pickle_file(
            result_fn,
            [whole_rev, whole_count, list_sub_rev[i], list_sub_count[i]])
Esempio n. 26
0
def process_file(fn):
    _, _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    with open('%s/%s' % (airport_trips_dir, fn), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()

        id_tid, id_vid, id_did = headers.index('tid'), headers.index(
            'vid'), headers.index('did')
        id_st, id_et, id_dur = headers.index('start-time'), headers.index(
            'end-time'), headers.index('duration')
        id_fare = headers.index('fare')
        id_tm, id_pt_et = headers.index('trip-mode'), headers.index(
            'prev-trip-end-time')
        id_jqt, id_qt = headers.index('join-queue-time'), headers.index(
            'queue-time')

        with open('%s/ap-trip-op-ep-%s.csv' % (airport_trips_dir, yymm),
                  'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            new_headers = [
                'tid', 'vid', 'did', 'start-time', 'end-time', 'duration',
                'fare', 'trip-mode', 'prev-trip-end-time', 'join-queue-time',
                'queue-time', 'op-cost', 'economic', 'yy', 'mm', 'dd', 'hh'
            ]
            writer.writerow(new_headers)
            for row in reader:
                jqt, st, et = eval(row[id_jqt]), eval(row[id_st]), eval(
                    row[id_et])
                dur, fare = eval(row[id_dur]), eval(row[id_fare])
                if st - jqt < Q_LIMIT_MIN:
                    qt = Q_LIMIT_MIN
                elif Q_LIMIT_MAX < st - jqt:
                    qt = Q_LIMIT_MAX
                else:
                    qt = st - jqt
                modi_jqt = st - qt
                jqt_datetime = datetime.datetime.fromtimestamp(modi_jqt)
                st_datetime = datetime.datetime.fromtimestamp(st)
                et_datetime = datetime.datetime.fromtimestamp(et)
                op_cost = 0
                st_yyyy, st_mm, st_dd, st_hh = st_datetime.year, st_datetime.month, st_datetime.day, st_datetime.hour
                if jqt_datetime.hour == st_datetime.hour:
                    try:
                        op_cost_per_sec = op_costs[(st_yyyy, st_mm, st_dd,
                                                    st_hh)]
                    except KeyError:
                        alternative_datetime = st_datetime - datetime.timedelta(
                            hours=1)
                        a_yyyy, a_mm, a_dd, a_hh = \
                        alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour
                        op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)]
                    op_cost += qt * op_cost_per_sec
                else:
                    tp = datetime.datetime(st_datetime.year, st_datetime.month,
                                           st_datetime.day, st_datetime.hour)
                    tp_timestamp = time.mktime(tp.timetuple())
                    p_jqt_st = (tp_timestamp - modi_jqt) / qt
                    prev_dt = st_datetime - datetime.timedelta(hours=1)
                    try:
                        op_cost_per_sec = op_costs[(prev_dt.year,
                                                    prev_dt.month, prev_dt.day,
                                                    prev_dt.hour)]
                    except KeyError:
                        alternative_datetime = prev_dt - datetime.timedelta(
                            hours=1)
                        a_yyyy, a_mm, a_dd, a_hh = \
                        alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour
                        op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)]
                    op_cost += op_cost_per_sec * qt * p_jqt_st
                    #
                    try:
                        op_cost_per_sec = op_costs[(st_yyyy, st_mm, st_dd,
                                                    st_hh)]
                    except KeyError:
                        alternative_datetime = st_datetime - datetime.timedelta(
                            hours=1)
                        a_yyyy, a_mm, a_dd, a_hh = \
                        alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour
                        op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)]
                    op_cost += op_cost_per_sec * qt * (1 - p_jqt_st)
                if st_datetime.hour == et_datetime.hour:
                    try:
                        op_cost_per_sec = op_costs[(st_yyyy, st_mm, st_dd,
                                                    st_hh)]
                    except KeyError:
                        alternative_datetime = st_datetime - datetime.timedelta(
                            hours=1)
                        a_yyyy, a_mm, a_dd, a_hh = \
                        alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour
                        op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)]
                    op_cost += dur * op_cost_per_sec
                else:
                    # This part don't regards cases when duration is more than a hour
                    tp = datetime.datetime(et_datetime.year, et_datetime.month,
                                           et_datetime.day, et_datetime.hour)
                    tp_timestamp = time.mktime(tp.timetuple())
                    p_st_et = (tp_timestamp - st) / dur
                    next_dt = st_datetime + datetime.timedelta(hours=1)
                    try:
                        op_cost_per_sec = op_costs[(st_yyyy, st_mm, st_dd,
                                                    st_hh)]
                    except KeyError:
                        alternative_datetime = st_datetime - datetime.timedelta(
                            hours=1)
                        a_yyyy, a_mm, a_dd, a_hh = \
                        alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour
                        op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)]
                    op_cost += op_cost_per_sec * qt * p_st_et
                    #
                    try:
                        op_cost_per_sec = op_costs[(next_dt.year,
                                                    next_dt.month, next_dt.day,
                                                    next_dt.hour)]
                    except KeyError:
                        alternative_datetime = next_dt - datetime.timedelta(
                            hours=1)
                        a_yyyy, a_mm, a_dd, a_hh = \
                        alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour
                        op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)]
                    op_cost += op_cost_per_sec * qt * (1 - p_st_et)
                economic_profit = fare - op_cost
                #
                writer.writerow([
                    row[id_tid], row[id_vid], row[id_did], st, et, dur, fare,
                    row[id_tm], row[id_pt_et], modi_jqt, qt, op_cost,
                    economic_profit, st_yyyy, st_mm, st_dd, st_hh
                ])
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
def process_files(yymm, q_lerning_ended_dir):
    candi_pkl_files = []
    for dn in q_lerning_ended_dir:
        if os.path.exists('%s/%s/results-%s.pkl' % (for_learning_dir, dn, yymm)):
            continue
        candi_pkl_files.append('%s/%s/%s-q-value-fare-dur-%s.pkl' % (for_learning_dir, dn, dn, yymm))
    result_pkls = [os.path.dirname(pkl_path) + '/results-%s.pkl'% yymm for pkl_path in candi_pkl_files]
    #
    list_argmax_as = []
    state_action_fare_dur = None
    for pkl_file_path in candi_pkl_files:
        Qsa_value, state_action_fare_dur = load_picle_file(pkl_file_path)
        argmax_as = {}
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in [IN_AP, OUT_AP]:
                    argmax_as[(s1, s2, s3)] = IN_AP if Qsa_value[(s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else OUT_AP
        list_argmax_as.append(argmax_as)
    #
    whole_rev, whole_count = 0, 0
    list_sub_rev, list_sub_count = [0 for _ in xrange(len(candi_pkl_files))], [0 for _ in xrange(len(candi_pkl_files))]
    
    count = 0        
    with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_prev_tetime, id_prev_teloc = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location')
        id_stime, id_sloc = headers.index('start-time'), headers.index('start-location'),
        id_dur, id_fare = headers.index('duration'), headers.index('fare')
        for row in reader:
            prev_tetime, stime = eval(row[id_prev_tetime]), eval(row[id_stime]) 
            setup_time = stime - prev_tetime
            #
            prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime)
            s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour
            s3 = row[id_prev_teloc]
            #
            a = row[id_sloc]
            #
            dur, fare = eval(row[id_dur]), eval(row[id_fare])
            alter_a = OUT_AP if a == IN_AP else IN_AP
            if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0:
                op_cost = 0
            else:
                op_cost = (setup_time + dur) * state_action_fare_dur[(s1, s2, s3, alter_a)][0] / state_action_fare_dur[(s1, s2, s3, alter_a)][1] 
            economic_profit = fare - op_cost
            #
            whole_rev += economic_profit
            whole_count += 1
            
            for i, argmax_as in enumerate(list_argmax_as):
                if argmax_as[(s1, s2, s3)] == a:
                    list_sub_rev[i] += economic_profit
                    list_sub_count[i] += 1
            count += 1
            if count % MOD_STAN == 0:
                print '%s, %d' % (yymm, count)
                logging_msg('%s, %d' % (yymm, count))
                for i in xrange(len(result_pkls)):
                    result_fn = result_pkls[i]
                    save_pickle_file(result_fn, [whole_rev, whole_count, list_sub_rev[i], list_sub_count[i]])
    for i in xrange(len(result_pkls)):
        result_fn = result_pkls[i]
        save_pickle_file(result_fn, [whole_rev, whole_count, list_sub_rev[i], list_sub_count[i]])
Esempio n. 28
0
def process_files(yymm):
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    #
    init_csv_files(yymm)
    #
    full_dids = sorted([
        eval(x) for x in load_picle_file('%s/%s%s.pkl' %
                                         (full_shift_dir,
                                          monthly_full_did_prefix, yymm))
    ])
    s_df = pd.read_csv('%s/%s%s.csv' % (full_shift_dir, sh_full_prefix, yymm))
    trip_df = pd.read_csv('%s/%s%s.csv' % (trips_dir, trip_prefix, yymm))
    ap_trip_df = pd.read_csv('%s/%s%s.csv' %
                             (airport_trips_dir, ap_trip_op_ep_prefix, yymm))
    #
    yy, mm = int(yymm[:2]), int(yymm[2:])
    for did in full_dids:
        # General
        did_sh = s_df[(s_df['driver-id'] == did)]
        pro_dur = sum(did_sh['productive-duration']) * SEC
        did_wt = trip_df[(trip_df['did'] == did)]
        total_fare = sum(did_wt['fare'])
        if pro_dur > 0 and total_fare != 0:
            total_prod = total_fare / pro_dur
            with open(
                    '%s/%s%s.csv' %
                (individual_detail_dir, general_prefix, yymm),
                    'a') as w_csvfile:
                writer = csv.writer(w_csvfile)
                writer.writerow([yy, mm, did, pro_dur, total_fare, total_prod])
        #
        did_ap = ap_trip_df[(ap_trip_df['did'] == did)]
        prev_in_ap_trip = did_ap[(did_ap['trip-mode'] == DInAP_PInAP)]
        prev_out_ap_trip = did_ap[(did_ap['trip-mode'] == DOutAP_PInAP)]
        #
        if len(did_ap) != 0:
            # prev in ap trip
            ap_qu, ap_dur = sum(prev_in_ap_trip['queue-time']), sum(
                prev_in_ap_trip['duration'])
            ap_fare = sum(prev_in_ap_trip['fare'])
            ap_op_cost, ap_eco_profit = sum(prev_in_ap_trip['op-cost']), sum(
                prev_in_ap_trip['economic'])
            if ap_qu + ap_dur > 0 and ap_fare != 0:
                ap_prod = ap_fare / (ap_qu + ap_dur)
                with open(
                        '%s/%s%s.csv' %
                    (individual_detail_dir, prev_in_ap_prefix, yymm),
                        'a') as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    writer.writerow([
                        yy, mm, did, ap_qu, ap_dur, ap_fare, ap_prod,
                        ap_op_cost, ap_eco_profit
                    ])
            #
            # prev out ap trip
            ap_qu, ap_dur = sum(prev_out_ap_trip['queue-time']), sum(
                prev_out_ap_trip['duration'])
            ap_fare = sum(prev_out_ap_trip['fare'])
            ap_op_cost, ap_eco_profit = sum(prev_out_ap_trip['op-cost']), sum(
                prev_out_ap_trip['economic'])
            if ap_qu + ap_dur > 0 and ap_fare != 0:
                ap_prod = ap_fare / (ap_qu + ap_dur)
                with open(
                        '%s/%s%s.csv' %
                    (individual_detail_dir, prev_out_ap_prefix, yymm),
                        'a') as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    writer.writerow([
                        yy, mm, did, ap_qu, ap_dur, ap_fare, ap_prod,
                        ap_op_cost, ap_eco_profit
                    ])
    print 'End the file; %s' % yymm
    logging_msg('End the file; %s' % yymm)
Esempio n. 29
0
def process_file(fn):
    _, _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm 
    logging_msg('handle the file; %s' % yymm)
    #
    ap_pkl_files = get_all_files(logs_dir, 'ap-crossing-time-', '.pkl')
    ap_pkl_file_path = None
    for pkl_fn in ap_pkl_files:
        _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-')
        if pkl_yymm == yymm:
            ap_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn)
            break
    else:
        assert False, yymm
    ap_crossing_times = load_picle_file(ap_pkl_file_path)
    #
    ns_pkl_files = get_all_files(logs_dir, 'ns-crossing-time-', '.pkl')
    ns_pkl_file_path = None
    for pkl_fn in ns_pkl_files:
        _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-')
        if pkl_yymm == yymm:
            ns_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn)
            break
    else:
        assert False, yymm
    ns_crossing_times = load_picle_file(ns_pkl_file_path)
    #
    init_csv_files(yymm)
    
    with open('%s/%s' % (trips_dir, fn), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        header_id = {h : i for i, h in enumerate(headers)}
        for row in reader:
            tid, did = row[header_id['tid']], row[header_id['did']]
            et, duration = row[header_id['end-time']], row[header_id['duration']]
            fare = row[header_id['fare']]
            #
            ap_tm, ns_tm = int(row[header_id['ap-trip-mode']]), int(row[header_id['ns-trip-mode']]) 
            vid, st, prev_tet = row[header_id['vid']], eval(row[header_id['start-time']]), eval(row[header_id['prev-trip-end-time']])
            #
            is_ap_trip, is_ns_trip = False, False 
            #
            if ap_tm == DInAP_PInAP:
                is_ap_trip = True
                ap_join_queue_time = prev_tet
            elif ap_tm == DOutAP_PInAP:
                is_ap_trip = True
                try:
                    i = bisect(ap_crossing_times[vid], st)
                except KeyError:
                    logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']]))
                    continue
                ap_join_queue_time = ap_crossing_times[vid][i - 1] if i != 0 else ap_crossing_times[vid][0]
            if is_ap_trip:
                with open('%s/airport-trip-%s.csv' % (airport_trips_dir, yymm), 'a') as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    ap_queue_time = st - ap_join_queue_time
                    new_row = [tid, vid, did, st, et, duration, fare, prev_tet,
                                ap_tm, ap_join_queue_time, ap_queue_time]
                    writer.writerow(new_row)
            #
            if ns_tm == DInNS_PInNS:
                is_ns_trip = True
                ns_join_queue_time = prev_tet
            elif ns_tm == DOutNS_PInNS:
                is_ns_trip = True
                try:
                    i = bisect(ns_crossing_times[vid], st)
                except KeyError:
                    logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']]))
                    continue
                ns_join_queue_time = ns_crossing_times[vid][i - 1] if i != 0 else ns_crossing_times[vid][0]
            if is_ns_trip:
                with open('%s/nightsafari-trip-%s.csv' % (nightsafari_trips_dir, yymm), 'a') as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    ns_queue_time = st - ns_join_queue_time
                    new_row = [tid, vid, did, st, et, duration, fare, prev_tet,
                                ns_tm, ns_join_queue_time, ns_queue_time]
                    writer.writerow(new_row)        
    print 'end the file; %s' % yymm 
    logging_msg('end the file; %s' % yymm)
def process_files(yymm):
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    trip_df = pd.read_csv('%s/%s%s.csv' % (trips_dir, trip_prefix, yymm))
    #
    yyyy, mm = 2000 + int(yymm[:2]), int(yymm[2:])
    dd, hh = 1, 0
    cur_day_time = datetime.datetime(yyyy, mm, dd, hh)
    if mm == 12:
        next_yyyy, next_mm = yyyy + 1, 1
    else:
        next_yyyy, next_mm = yyyy, mm + 1
    last_day_time = datetime.datetime(next_yyyy, next_mm, dd, hh)
    #
    st_label = 'start-time'
    ap_tm_lable, ns_tm_lable = 'ap-trip-mode', 'ns-trip-mode'
    dur_lable, fare_label = 'duration', 'fare'
    #
    ap_tm = [DInAP_PInAP, DInAP_POutAP, DOutAP_PInAP, DOutAP_POutAP]
    ns_tm = [DInNS_PInNS, DInNS_POutNS, DOutNS_PInNS, DOutNS_POutNS]
    #
    while cur_day_time != last_day_time:
        next_day_time = cur_day_time + datetime.timedelta(hours=1)
        st_timestamp, et_timestamp = time.mktime(
            cur_day_time.timetuple()), time.mktime(next_day_time.timetuple())
        #
        yyyy, mm, dd, hh = cur_day_time.year, cur_day_time.month, cur_day_time.day, cur_day_time.hour
        #
        filtered_trip = trip_df[(st_timestamp <= trip_df[st_label])
                                & (trip_df[st_label] < et_timestamp)]
        #
        gp_f_trip = filtered_trip.groupby([ap_tm_lable])
        tm_num_totalDuration_totalFare = [[tm, 0, 0, 0] for tm in ap_tm]
        tm_num_df = gp_f_trip.count()[fare_label].to_frame(
            'total_tm_num').reset_index()
        for tm, num in tm_num_df.values:
            tm_num_totalDuration_totalFare[tm][1] += num
        tm_dur_df = gp_f_trip.sum()[dur_lable].to_frame(
            'total_tm_dur').reset_index()
        for tm, dur in tm_dur_df.values:
            tm_num_totalDuration_totalFare[tm][2] += dur
        tm_fare_df = gp_f_trip.sum()[fare_label].to_frame(
            'total_tm_fare').reset_index()
        for tm, fare in tm_fare_df.values:
            tm_num_totalDuration_totalFare[tm][3] += fare
        save_as_csv(ap_fn, yymm, dd, hh, tm_num_totalDuration_totalFare)
        #
        gp_f_trip = filtered_trip.groupby([ns_tm_lable])
        tm_num_totalDuration_totalFare = [[tm, 0, 0, 0] for tm in ns_tm]
        tm_num_df = gp_f_trip.count()[fare_label].to_frame(
            'total_tm_num').reset_index()
        for tm, num in tm_num_df.values:
            tm_num_totalDuration_totalFare[tm][1] += num
        tm_dur_df = gp_f_trip.sum()[dur_lable].to_frame(
            'total_tm_dur').reset_index()
        for tm, dur in tm_dur_df.values:
            tm_num_totalDuration_totalFare[tm][2] += dur
        tm_fare_df = gp_f_trip.sum()[fare_label].to_frame(
            'total_tm_fare').reset_index()
        for tm, fare in tm_fare_df.values:
            tm_num_totalDuration_totalFare[tm][3] += fare
        save_as_csv(ns_fn, yymm, dd, hh, tm_num_totalDuration_totalFare)
        #
        cur_day_time = next_day_time
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
Esempio n. 31
0
def process_file(ALPHA, GAMMA, ALPHA_GAMMA_dir, yymm):
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    #
    print yymm
    if yymm == '0901':
        prev_yymm = None
    elif yymm == '1001':
        prev_yymm = '0911'
    elif yymm == '1011':
        prev_yymm = '1009'
    else:
        yy, mm = int(yymm[:2]), int(yymm[2:])
        prev_yymm = '%02d%02d' % (yy, mm - 1)
    #
    if not prev_yymm:
        Qsa_value, state_action_fare_dur = {}, {}
        locations = [IN_AP, OUT_AP]
        actions = [IN_AP, OUT_AP]
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in locations:
                    for a in actions:
                        Qsa_value[(s1, s2, s3, a)] = 0
                        state_action_fare_dur[(s1, s2, s3, a)] = [0, 0]
    else:
        Qsa_value, state_action_fare_dur = load_picle_file(
            '%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' %
            (ALPHA_GAMMA_dir, ALPHA, GAMMA, prev_yymm))
    #

    with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm),
              'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_prev_tetime, id_prev_teloc = headers.index(
            'prev-trip-end-time'), headers.index('prev-trip-end-location')
        id_stime, id_sloc = headers.index('start-time'), headers.index(
            'start-location'),
        id_etime, id_eloc = headers.index('end-time'), headers.index(
            'end-location'),
        id_dur, id_fare = headers.index('duration'), headers.index('fare')
        #
        count = 0
        for row in reader:
            prev_tetime, stime, etime = eval(row[id_prev_tetime]), eval(
                row[id_stime]), eval(row[id_etime])
            setup_time = stime - prev_tetime
            #
            if setup_time < 0 or HOUR * 2 < setup_time:
                continue
            #
            prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime)
            s1, s2 = prev_tetime_datetime.strftime(
                "%a"), prev_tetime_datetime.hour
            s3 = row[id_prev_teloc]
            #
            etime_datetime = datetime.datetime.fromtimestamp(etime)
            new_s1, new_s2 = etime_datetime.strftime("%a"), etime_datetime.hour
            new_s3 = row[id_eloc]
            #
            a = row[id_sloc]
            dur, fare = eval(row[id_dur]), eval(row[id_fare])
            #
            state_action_fare_dur[(s1, s2, s3, a)][0] += fare
            state_action_fare_dur[(s1, s2, s3, a)][1] += setup_time + dur
            #
            if Qsa_value[(new_s1, new_s2, new_s3, IN_AP)] > Qsa_value[(
                    new_s1, new_s2, new_s3, OUT_AP)]:
                future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3, IN_AP)]
            else:
                future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3,
                                                OUT_AP)]
            #
            alter_a = OUT_AP if a == IN_AP else IN_AP
            if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0:
                op_cost = 0
            else:
                op_cost = (setup_time + dur) * state_action_fare_dur[
                    (s1, s2, s3, alter_a)][0] / state_action_fare_dur[
                        (s1, s2, s3, alter_a)][1]
            qrs = fare - op_cost + GAMMA * future_max_q_value
            Qsa_value[(s1, s2, s3, a)] = \
                        (1 - ALPHA) * Qsa_value[(s1, s2, s3, a)] + ALPHA * qrs
            count += 1
            if count % MOD_STAN == 0:
                print '%s, %d' % (yymm, count)
                logging_msg('%s, %d' % (yymm, count))
                save_pickle_file(
                    '%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' %
                    (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm),
                    [Qsa_value, state_action_fare_dur])
        save_pickle_file(
            '%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' %
            (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm),
            [Qsa_value, state_action_fare_dur])
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
Esempio n. 32
0
def process_file(fn):
    _, _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    #
    ap_pkl_files = get_all_files(logs_dir, 'ap-crossing-time-', '.pkl')
    ap_pkl_file_path = None
    for pkl_fn in ap_pkl_files:
        _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-')
        if pkl_yymm == yymm:
            ap_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn)
            break
    else:
        assert False, yymm
    ap_crossing_times = load_picle_file(ap_pkl_file_path)
    #
    ns_pkl_files = get_all_files(logs_dir, 'ns-crossing-time-', '.pkl')
    ns_pkl_file_path = None
    for pkl_fn in ns_pkl_files:
        _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-')
        if pkl_yymm == yymm:
            ns_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn)
            break
    else:
        assert False, yymm
    ns_crossing_times = load_picle_file(ns_pkl_file_path)
    #
    init_csv_files(yymm)

    with open('%s/%s' % (trips_dir, fn), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        header_id = {h: i for i, h in enumerate(headers)}
        for row in reader:
            tid, did = row[header_id['tid']], row[header_id['did']]
            et, duration = row[header_id['end-time']], row[
                header_id['duration']]
            fare = row[header_id['fare']]
            #
            ap_tm, ns_tm = int(row[header_id['ap-trip-mode']]), int(
                row[header_id['ns-trip-mode']])
            vid, st, prev_tet = row[header_id['vid']], eval(
                row[header_id['start-time']]), eval(
                    row[header_id['prev-trip-end-time']])
            #
            is_ap_trip, is_ns_trip = False, False
            #
            if ap_tm == DInAP_PInAP:
                is_ap_trip = True
                ap_join_queue_time = prev_tet
            elif ap_tm == DOutAP_PInAP:
                is_ap_trip = True
                try:
                    i = bisect(ap_crossing_times[vid], st)
                except KeyError:
                    logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']]))
                    continue
                ap_join_queue_time = ap_crossing_times[vid][
                    i - 1] if i != 0 else ap_crossing_times[vid][0]
            if is_ap_trip:
                with open('%s/airport-trip-%s.csv' % (airport_trips_dir, yymm),
                          'a') as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    ap_queue_time = st - ap_join_queue_time
                    new_row = [
                        tid, vid, did, st, et, duration, fare, prev_tet, ap_tm,
                        ap_join_queue_time, ap_queue_time
                    ]
                    writer.writerow(new_row)
            #
            if ns_tm == DInNS_PInNS:
                is_ns_trip = True
                ns_join_queue_time = prev_tet
            elif ns_tm == DOutNS_PInNS:
                is_ns_trip = True
                try:
                    i = bisect(ns_crossing_times[vid], st)
                except KeyError:
                    logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']]))
                    continue
                ns_join_queue_time = ns_crossing_times[vid][
                    i - 1] if i != 0 else ns_crossing_times[vid][0]
            if is_ns_trip:
                with open(
                        '%s/nightsafari-trip-%s.csv' %
                    (nightsafari_trips_dir, yymm), 'a') as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    ns_queue_time = st - ns_join_queue_time
                    new_row = [
                        tid, vid, did, st, et, duration, fare, prev_tet, ns_tm,
                        ns_join_queue_time, ns_queue_time
                    ]
                    writer.writerow(new_row)
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
Esempio n. 33
0
def process_files(yymm):
    old_time = time.time()
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    begin_timestamp = datetime.datetime(2009, 1, 1, 0)
    last_timestamp = datetime.datetime(2011, 2, 1, 0)
    hourly_total, time_period_order = {}, []
    while begin_timestamp < last_timestamp:
        yyyy, mm, dd, hh = begin_timestamp.year, begin_timestamp.month, begin_timestamp.day, begin_timestamp.hour
        k = (yyyy, mm, dd, hh)
        hourly_total[k] = [0 for _ in range(len([GEN_DUR, GEN_FARE]))]
        time_period_order.append(k)
        begin_timestamp += datetime.timedelta(hours=1)
    #
    st_label, et_label, dur_label, fare_label = 'start-time', 'end-time', 'duration', 'fare'
    # Productive duration
    yyyy, mm = 2000 + int(yymm[:2]), int(yymm[2:])
    with open('%s/%s%s.csv' % (shift_pro_dur_dir, shift_pro_dur_prefix, yymm),
              'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        hid = {h: i for i, h in enumerate(headers)}
        for row in reader:
            dd, hh = eval(row[hid['dd']]), eval(row[hid['hh']])
            hourly_total[(yyyy, mm, dd, hh)][GEN_DUR] += eval(
                row[hid['pro-dur']]) * 60  # unit change; Minute -> Second
            if (time.time() - old_time) > TIME_ALARM == 0:
                old_time = time.time()
                print 'handling; %s' % yymm
                logging_msg('handling; %s' % yymm)
    # Total fare
    with open('%s/%s%s.csv' % (trips_dir, trip_prefix, yymm),
              'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        hid = {h: i for i, h in enumerate(headers)}
        for row in reader:
            st_ts, et_ts = eval(row[hid[st_label]]), eval(row[hid[et_label]])
            dur, fare = eval(row[hid[dur_label]]), eval(row[hid[fare_label]])
            #
            st_dt, et_dt = datetime.datetime.fromtimestamp(
                st_ts), datetime.datetime.fromtimestamp(et_ts)
            #
            if st_dt.hour == et_dt.hour:
                hourly_total[(st_dt.year, st_dt.month, st_dt.day,
                              st_dt.hour)][GEN_FARE] += fare
            else:
                next_ts_dt = datetime.datetime(
                    st_dt.year, st_dt.month, st_dt.day,
                    st_dt.hour) + datetime.timedelta(hours=1)
                tg_year, tg_month, tg_day, tg_hour = \
                        next_ts_dt.year, next_ts_dt.month, next_ts_dt.day, next_ts_dt.hour
                tg_dt = datetime.datetime(tg_year, tg_month, tg_day, tg_hour)
                tg_ts = time.mktime(tg_dt.timetuple())
                prop = (tg_ts - st_ts) / dur
                hourly_total[(st_dt.year, st_dt.month, st_dt.day,
                              st_dt.hour)][GEN_FARE] += fare * prop
                while True:
                    if tg_dt.hour == et_dt.hour:
                        prop = (et_ts - tg_ts) / dur
                        hourly_total[(et_dt.year, et_dt.month, et_dt.day,
                                      et_dt.hour)][GEN_FARE] += fare * prop
                        break
                    prop = HOUR / dur
                    hourly_total[(tg_dt.year, tg_dt.month, tg_dt.day,
                                  tg_dt.hour)][GEN_FARE] += fare * prop
                    tg_dt += datetime.timedelta(hours=1)
            if (time.time() - old_time) > TIME_ALARM == 0:
                old_time = time.time()
                print 'handling; %s' % yymm
                logging_msg('handling; %s' % yymm)
    with open(
            '%s/%s%s.csv' %
        (general_dur_fare_dir, general_dur_fare_prefix, yymm),
            'wt') as w_csvfile:
        writer = csv.writer(w_csvfile)
        header = ['yy', 'mm', 'dd', 'hh', 'gen-duration', 'gen-fare']
        writer.writerow(header)
        for yyyy, mm, dd, hh in time_period_order:
            gen_dur, gen_fare = hourly_total[(yyyy, mm, dd, hh)]
            writer.writerow([yyyy - 2000, mm, dd, hh, gen_dur, gen_fare])
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)