def do_train_test_split(work_dir, filetype, uid2, uid3, max_dt2, max_dt3): print("=== Train Test Split ===") tmp = work_dir.split('/') tmp[1] = tmp[1] + "_trevte" to_path = "/".join(tmp) paths, tags = get_file_list(work_dir) try: rmt(to_path) except: pass finally: os.mkdir(to_path) os.mkdir("{}/train".format(to_path)) os.mkdir("{}/eval".format(to_path)) os.mkdir("{}/test".format(to_path)) print("Working on dir: {}\nSave result to: {}".format(work_dir, to_path)) #print(paths) for i, p in tqdm(enumerate(paths)): uid2, uid3, max_dt2, max_dt3 = train_test_split([p, tags[i], to_path], filetype, uid2, uid3, max_dt2, max_dt3) return uid2, uid3, max_dt2, max_dt3
def rmtree(path): if not osp.exists(path): return try: rmt(path) except: pass
def do_dt_shift(df, uids): # check sharing tmp folder try: rmt("./_tmp(all)") except: pass finally: os.mkdir("./_tmp(all)") print("Start dt shift...") for uid in tqdm(uids): dt_shift(df[df['UUID'] == uid].copy()) print("Merging Result...") paths, tags = get_file_list("./_tmp(all)") print("DT Shifted, store in merged") return paths
def rmtree(path): if osp.exists(path): rmt(path)
ecs_rank_list = "ECS_{}".format(rank[0]) # In[ ]: ### first merge each UUID data with day unit # In[ ]: paths, tags = get_file_list("./cs_shift_result/") c_paths, c_tags = get_file_list("./csall_shift_result/") e_paths, e_tags = get_file_list("./csecs_shift_result/") # In[ ]: try: rmt("./_days_cs") rmt("./_days_cs_all") rmt("./_days_cs_ecs") except: pass finally: os.mkdir("./_days_cs") os.mkdir("./_days_cs_all") os.mkdir("./_days_cs_ecs") # In[ ]: def do_date_join(date_uni, path, dataset): mer = pd.read_csv(path) domain = []
def removeFolder(path): if not shutil.rmt(path): print ('path is removed successfully') else: print ('unable to remove the path')
# In[9]: pq_CS_all = pq_CS_all[[ 'UUID', 'cate', 'date', 'datetime', 'domain', 'dt', 'type' ]] pq_history = pq_history[[ 'UUID', 'cate', 'date', 'datetime', 'domain', 'dt', 'type' ]] pq_CS_ECS = pq_CS_ECS[[ 'UUID', 'cate', 'date', 'datetime', 'domain', 'dt', 'type' ]] # In[11]: try: rmt("./csall_shift_result") rmt("./cs_shift_result") rmt("./csecs_shift_result") except: pass finally: os.mkdir("./csall_shift_result") os.mkdir("./cs_shift_result") os.mkdir("./csecs_shift_result") # In[26]: time_bucket = { "L00_5S": Interval(0, 5 * 1e3, upper_closed=False), "L01_20S": Interval(5 * 1e3, 20 * 1e3, upper_closed=False), "L02_2M": Interval(20 * 1e3, 2 * 60 * 1e3, upper_closed=False),