def get_month_small_all_df(th = 7): p = m_Pool(64) ids = get_small_user_ids(th) all_df_list = p.map(get_month_by_id,ids) print 'Waiting for all subprocesses done...' p.close() p.join() return pd.concat(all_df_list)
def mearge_holiday_day_df_all(): p = m_Pool(64) for day in range(1,31): p.apply_async(mearge_holiday_day_df,args=(day,)) #p.apply_async(predict_using_prophet, args=(arg,)) print 'Waiting for all subprocesses done...' p.close() p.join()
def mearge_holiday_month_df_all(): holiday_df = get_holiday_df(1) festday_df = get_festday_df(1) p = m_Pool(64) for f_id,df_path in enumerate(get_month_df_path()): p.apply_async(mearge_holiday_month_df,(holiday_df,festday_df,f_id,df_path,)) print 'Waiting for all subprocesses done...' p.close() p.join()
def mearge_prophet_holiday_month_df_all(): all_df_list = [] p = m_Pool(64) path_list = get_holiday_month_df_path() all_df_list = p.map(mearge_prophet_holiday_month_df,path_list) print 'Waiting for all subprocesses done...' p.close() p.join() '''
def get_month_all_df(): p = m_Pool(64) path_list = get_prophet_holiday_month_df_path() all_df_list = p.map(get_month_by_path,path_list) print 'Waiting for all subprocesses done...' p.close() p.join() return pd.concat(all_df_list) '''
def make_history_month_features_all(): pw_df_list = [] dataset = get_dataset() dataset.power_consumption = dataset.power_consumption for user_id in get_user_id_list(): print user_id if not check_empty(user_id): user_df = filter_user_id(dataset, user_id).resample('1D').mean().fillna(1) #add to list pw_df_list.append((user_id, user_df)) #make_features(user_id,user_df) p = m_Pool(64) for arg in pw_df_list: p.apply_async(make_history_month_features, args=(arg)) print 'Waiting for all subprocesses done...' p.close() p.join()
def predict_tf_all(path = None): result_list = [] p = m_Pool(31) result_list = p.map(predict_tf_once,range(1,32)) p.close() p.join() print 'writing...' result_df = pd.DataFrame(index = range(1)) for day,result in result_list: day_s = str(day) if len(day_s)<=1: day_s = '0'+day_s result_df['201610'+day_s] = result result_df = result_df.T result_df.columns = ['predict_power_consumption'] if path == None: date = str(pd.Timestamp(time.ctime())).replace(' ','_').replace(':','_') path = './result/'+date+'.csv' result_df.to_csv(path,index_label='predict_date') l = map(lambda day:pd.DataFrame.from_csv('./result/predict_part/%d.csv'%day),range(1,32)) t = pd.concat(l) t.to_csv('./result/predict_part/'+date+'.csv')
def predict_tf_one_shop_all(shop_id,start_date = '2016-10-1'): p = m_Pool(30) for day in range(1,32): p.apply_async(predict_tf_one_shop,(day,1416,start_date)) p.close() p.join()
print 'Waiting for all subprocesses done...' p.close() p.join() ''' def create_path(): for path in _save_paths: if not os.path.exists(path): os.mkdir(path) create_path() def rebuild_predict_feature_all_mt(pos): func,path = zip(_create_feature_funcs,_save_paths)[pos] print path+':' rebuild_predict_feature_all(create_feature_func = func,save_path = path) p = m_Pool(3) for pos in range(_feature_length): print pos p.apply_async(rebuild_predict_feature_all_mt,args=(pos,)) #time.sleep(10) print 'Waiting for all subprocesses done...' p.close() p.join() p = m_Pool(7) for day in range(1,32): print day p.apply_async(train_tf_once_percent,args=(day,)) #time.sleep(10)
user_df = dataset[dataset.user_id == user_id] assert user_df.power_consumption.sum() == len(user_df) return False a = pd.DataFrame.from_csv('./features/%d.csv' % user_id) assert a.ds.iloc[-1] == '2016-09-30' return True #return a.ds.iloc[-1] == '2016-09-30' if __name__ == '__main__': dataset = get_dataset() '''for user_id in set(dataset.user_id): predict_using_prophet(dataset[dataset.user_id == user_id]) ''' p = m_Pool(64) for arg in set(dataset.user_id): arg_df = dataset[dataset.user_id == arg] p.apply_async(predict_using_prophet, args=(arg_df, )) #p.apply_async(predict_using_prophet, args=(arg,)) print 'Waiting for all subprocesses done...' p.close() p.join() ''' all_one_list = [] for user_id in set(dataset.user_id): if not check(dataset,user_id): all_one_list.append(user_id) '''