def run(): init_multiprocessor(11) count_num_jobs = 0 # for tm in ['spendingTime', 'roamingTime']: for tm in ['spendingTime']: for year in ['2009', '2010', '2011', '2012']: gt_dpath = dpaths[tm, year, 'groupTrips'] gt_prefix = prefixs[tm, year, 'groupTrips'] check_dir_create(gt_dpath) # gs_dpath = dpaths[tm, year, 'groupShifts'] # for dpath in [gt_dpath, gs_dpath]: # check_dir_create(dpath) # gp_dpath = dpaths[tm, year, 'groupPartition'] gp_prefix = prefixs[tm, year, 'groupPartition'] gp_summary_fpath = '%s/%ssummary.csv' % (gp_dpath, gp_prefix) # gs_df = pd.read_csv(gp_summary_fpath) for gn in gs_df['groupName'].values: igG = ig.Graph.Read_Pickle('%s/%s%s.pkl' % (gp_dpath, gp_prefix, gn)) groupDrivers = set() for e in igG.es: did0, did1 = [igG.vs[nIndex]['name'] for nIndex in e.tuple] groupDrivers.add(did0) groupDrivers.add(did1) # process_file(tm, year, gn, groupDrivers) put_task(process_file, [tm, year, gn, groupDrivers]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): check_dir_create(ss_trips_dpath) # init_multiprocessor(11) count_num_jobs = 0 y = 9 for m in range(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue put_task(process_month, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): check_dir_create(shift_pro_dur_dir) # init_multiprocessor(11) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue # process_file(yymm) put_task(process_file, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): check_dir_create(productivity_dir) # init_multiprocessor(11) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue # process_files('1007') put_task(process_files, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): check_dir_create(trips_dpath) # init_multiprocessor(11) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: # both years data are corrupted continue put_task(tripMode_prevTripTime, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): remove_create_dir(ap_ep_dir); remove_create_dir(ns_ep_dir) # init_multiprocessor(3) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue # process_files(yymm) put_task(process_files, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): check_dir_create(logs_last_day_dir) # init_multiprocessor(8) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: # both years data_20160826 are corrupted continue # process_file(yymm) put_task(process_file, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): remove_create_dir(ap_ep_dir) remove_create_dir(ns_ep_dir) # init_multiprocessor(3) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue # process_files(yymm) put_task(process_files, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): check_dir_create(ap_crossing_dir); check_dir_create(ns_crossing_dir) # init_multiprocessor(11) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: # both years data are corrupted continue # process_file(yymm) put_task(process_file, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): for dpath in [economicProfit_ap_dpath, economicProfit_ns_dpath]: check_dir_create(dpath) init_multiprocessor(6) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue # process_files(yymm) put_task(process_files, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): for dpath in [ssDriverTrip_dpath, ssDriverShiftProDur_dpath, ssDriverEP_ap_dpath, ssDriverEP_ns_dpath]: check_dir_create(dpath) # init_multiprocessor(11) count_num_jobs = 0 for y in xrange(10, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue # process_files(yymm) put_task(process_files, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): check_dir_create(logs_dir) # init_multiprocessor(8) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: # both years data_20160826 are corrupted continue # process_files(yymm) put_task(process_file, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): for dpath in [queueingTime_ap_dpath, queueingTime_ns_dpath]: check_dir_create(dpath) # init_multiprocessor(6) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: # both years data are corrupted continue # process_file(yymm) put_task(process_file, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): for dpath in [log_dpath, log_last_day_dpath, crossingTime_ap_dpath, crossingTime_ns_dpath]: check_dir_create(dpath) # init_multiprocessor(11) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: # both years data_20160826 are corrupted continue put_task(log_location_labeling, [yymm]) # put_task(log_last_day, [yymm]) # put_task(process_file, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): for dn in [ftd_gen_stat_dir, ftd_prev_in_ap_stat_dir, ftd_prev_out_ap_stat_dir, ftd_prev_in_ns_stat_dir, ftd_prev_out_ns_stat_dir]: remove_create_dir(dn) # init_multiprocessor() count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue # process_files(yymm) put_task(process_files, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): init_multiprocessor(6) count_num_jobs = 0 tm = 'spendingTime' # for year in ['2009', '2010', '2011', '2012']: for year in ['2009']: gds_dpath = dpaths[tm, year, 'groupDriverStats'] check_dir_create(gds_dpath) # gm_dpath = dpaths[tm, year, 'groupMarginal'] gm_prefix = prefixs[tm, year, 'groupMarginal'] for fn in get_all_files(gm_dpath, '%s*.csv' % gm_prefix): _, _, _, gn = fn[:-len('.csv')].split('-') # process_file(tm, year, gn) put_task(process_file, [tm, year, gn]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): for path in [ftd_trips_dir, ftd_shift_dir]: remove_create_dir(path) # init_multiprocessor(3) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue # process_files(yymm) put_task(process_files, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): for dpath in [ ssDriverTrip_dpath, ssDriverShiftProDur_dpath, ssDriverEP_ap_dpath, ssDriverEP_ns_dpath ]: check_dir_create(dpath) # init_multiprocessor(11) count_num_jobs = 0 for y in xrange(10, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue # process_files(yymm) put_task(process_files, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): init_multiprocessor(6) count_num_jobs = 0 for tm in ['spendingTime']: # for year in ['2009', '2010', '2011', '2012']: for year in ['2009']: gm_dpath = dpaths[tm, year, 'groupMarginal'] check_dir_create(gm_dpath) # gp_dpath = dpaths[tm, year, 'groupPartition'] gp_prefix = prefixs[tm, year, 'groupPartition'] for fn in get_all_files(gp_dpath, '%s*.pkl' % gp_prefix): _, _, _, gn = fn[:-len('.pkl')].split('-') if gn == 'drivers' or gn == 'original': continue # process_file(tm, year, gn) put_task(process_file, [tm, year, gn]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): check_dir_create(tfZ_TP_dpath) numWorker = 6 init_multiprocessor(numWorker) count_num_jobs = 0 numReducers = numWorker * 10 # yyyy = '20%02d' % 12 logger.info('loading driversRelations %s' % yyyy) driversRelations = load_pickle_file(driversRelations_fpaths[yyyy]) whole_drivers = driversRelations.keys() driver_subsets = [[] for _ in range(numReducers)] for i, did in enumerate(whole_drivers): driver_subsets[i % numReducers].append(did) for i, driver_subset in enumerate(driver_subsets): # process_files(yyyy, i, driver_subset, driversRelations) pickUp_drivers = set() for did1 in driver_subset: pickUp_drivers = pickUp_drivers.union(driversRelations[did1]) put_task(process_files, [yyyy, i, driver_subset, pickUp_drivers]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): init_multiprocessor(6) count_num_jobs = 0 tm = 'baseline' # for tm in ['spendingTime', 'roamingTime']: # for year in ['2009', '2010', '2011', '2012']: for year in ['2009']: gz_dpath = dpaths[tm, year, 'groupZones'] check_dir_create(gz_dpath) # gt_dpath = dpaths[tm, year, 'groupTrips'] gt_prefix = prefixs[tm, year, 'groupTrips'] for fn in get_all_files(gt_dpath, '%s*' % gt_prefix): if len(fn[:-len('.csv')].split('-')) != 4: continue _, _, _, gn = fn[:-len('.csv')].split('-') if gn == 'X': continue gt_fpath = '%s/%s' % (gt_dpath, fn) # process_file(tm, year, gt_fpath) put_task(process_file, [tm, year, gt_fpath]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): init_multiprocessor(6) count_num_jobs = 0 tm = 'spendingTime' # for tm in ['spendingTime', 'roamingTime']: # for year in ['2009', '2010', '2011', '2012']: for year in ['2009']: gz_dpath = dpaths[tm, year, 'groupZones'] check_dir_create(gz_dpath) # gt_dpath = dpaths[tm, year, 'groupTrips'] gt_prefix = prefixs[tm, year, 'groupTrips'] for fn in get_all_files(gt_dpath, '%s*' % gt_prefix): if len(fn[:-len('.csv')].split('-')) != 4: continue _, _, _, gn = fn[:-len('.csv')].split('-') if gn == 'X': continue gt_fpath = '%s/%s' % (gt_dpath, fn) # process_file(tm, year, gt_fpath) put_task(process_file, [tm, year, gt_fpath]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)