def var_sepe(data_): ds = data_.shape data, miss_data1, W1, ori_W1 = handle_data(data_) lou, K, conv_thre, fb = 3e-3, 100, 1e-5, 0.85 est_ori = halrtc_cpt(miss_data1, lou, conv_thre, K, W1, fb) var_list = [] mean_list, svd_list = [], [] for r in range(ds[0]): var_list.append(np.var(data[r])) mean_list.append(np.mean(data[r])) var_dict = {var_list[i]: i for i in range(ds[0])} var_assign = [[], [], []] for var in var_dict: if var < 50: var_assign[0].append(var_dict[var]) elif var < 150: var_assign[1].append(var_dict[var]) else: var_assign[2].append(var_dict[var]) est_var = np.zeros_like(data) for va in var_assign: est_va = halrtc_cpt(miss_data1[va], lou, conv_thre, K, W1[va], fb) est_var[va] = est_va eva_var = rmse_mape_rse(est_var, data, (W1 | (ori_W1 == False))) print('sepe', eva_var) est_ori = halrtc_cpt(miss_data1, lou, conv_thre, K, W1, fb) eva_ori = rmse_mape_rse(est_ori, data, (W1 | (ori_W1 == False))) print('ori', eva_ori) return
def week_sepe(handle_train, handle_test): data, miss_data1, W1, ori_W1 = handle_train halrtc_para = [lou, K, conv_thre, fb] = [3e-3, 100, 1e-5] data_test, miss_data_test, Wtest, ori_Wtest = handle_test ds_test = data_test.shape est_ori = halrtc_cpt(miss_data_test, lou, conv_thre, K, Wtest, fb) weekday2, weekend2 = [], [] for i in range(ds_test[1]): if i % 7 < 5: weekday2.append(i) else: weekend2.append(i) est1 = halrtc_cpt(miss_data_test[:, weekday2, :], lou, conv_thre, K, Wtest[:, weekday2, :], fb) eva1 = rmse_mape_rse(est1, data_test[:, weekday2, :], (Wtest | (ori_Wtest == False))[:, weekday2, :]) est2 = halrtc_cpt(miss_data_test[:, weekend2, :], lou, conv_thre, K, Wtest[:, weekend2, :], fb) eva2 = rmse_mape_rse(est2, data_test[:, weekend2, :], (Wtest | (ori_Wtest == False))[:, weekend2, :]) print('weekday', eva1) print( 'ori_weekday', rmse_mape_rse(est_ori[:, weekday2, :], data_test[:, weekday2, :], (Wtest | (ori_Wtest == False))[:, weekday2, :])) print('weekend', eva2) print( 'ori_weekend', rmse_mape_rse(est_ori[:, weekend2, :], data_test[:, weekend2, :], (Wtest | (ori_Wtest == False))[:, weekend2, :])) return
def test_simM(data_, r, dis_matrix, k): r_dis = dis_matrix[r] dis_dict = {r_dis[i]: i for i in range(len(r_dis))} sim_road = [dis_dict[j] for j in sorted(dis_dict.keys())[:k + 1]] data, miss_data1, W1, ori_W1 = handle_data(data_) lou, K, conv_thre, fb = 3e-3, 100, 1e-5, 0.85 est_ori = halrtc_cpt(miss_data1, lou, conv_thre, K, W1, fb) eva_ori = rmse_mape_rse(est_ori[r], data[r], (W1 | (ori_W1 == False))[r]) est_sim = halrtc_cpt(miss_data1[sim_road], lou, conv_thre, K, W1[sim_road], fb) eva_sim = rmse_mape_rse(est_sim[0], data[r], (W1 | (ori_W1 == False))[r]) print(eva_ori) print(eva_sim) return
def test_labels(cn, labels, handle_info): halrtc_para = [lou, K, conv_thre, fb] = [3e-3, 100, 1e-5] data_test, miss_data_test, Wtest, ori_Wtest = handle_info ds_test = data_test.shape est_ori = halrtc_cpt(miss_data_test, lou, conv_thre, K, Wtest, fb) for i in range(cn): print('label-' + str(i), (labels == i).sum()) temp_data = halrtc_cpt(miss_data_test[labels == i], lou, conv_thre, K, Wtest[labels == i], fb) #est_c = cluster_ha(labels, miss_data_test, Wtest, cn, halrtc_para, axis=0) eva_c = rmse_mape_rse(temp_data, data_test[labels == i], (Wtest | (ori_Wtest == False))[labels == i]) print('clu', eva_c) eva_test = rmse_mape_rse(est_ori[labels == i], data_test[labels == i], (Wtest | (ori_Wtest == False))[labels == i]) print('ori', eva_test) return
def compare_3d_4d(handle_train, handle_test): data, miss_data1, W1, ori_W1 = handle_train halrtc_para = [lou, K, conv_thre, fb] = [1e-3, 100, 1e-5] data_test, miss_data_test, Wtest, ori_Wtest = handle_test rW = Wtest | (ori_Wtest == False) data_size = data_test.shape weeks = data_size[1] // 7 ''' Nori_data = np.zeros((data_size[0], weeks, 7, data_size[2])) Nmiss_data = np.zeros_like(Nori_data) N_W = np.zeros_like(Nmiss_data) Nori_W = np.zeros_like(Nori_data) Nr_W = np.zeros_like(Nori_W) for i in range(data_size[1]): if i >= weeks * 7: break Nori_data[:, i // 7, i % 7, :] = data_test[:, i, :] Nori_W[:, i // 7, i % 7, :] = ori_Wtest[:, i, :] Nmiss_data[:, i // 7, i % 7, :] = miss_data_test[:, i, :] N_W[:, i // 7, i % 7, :] = Wtest[:, i, :] Nr_W[:, i // 7, i % 7, :] = rW[:, i, :] print(Nmiss_data.shape) time0 = time.time() est_4d = np.zeros_like(Nmiss_data) est_4d[L==0] = halrtc_cpt(Nmiss_data[L==0], 1e-3, 1e-4, 100, N_W[L==0], 0) est_4d[L == 1] = halrtc_cpt(Nmiss_data[L == 1], 1e-3, 1e-4, 100, N_W[L==1], 0) time1 = time.time() print('4d_halrtc:', rmse_mape_rse(est_4d, Nori_data, Nr_W)) print('4d_time', str(time1 - time0) + 's') ''' print('loss', (miss_data_test < 1).sum()) time0 = time.time() est_ori = halrtc_cpt(miss_data_test, lou, conv_thre, K, Wtest, fb) time1 = time.time() eva_ori = rmse_mape_rse(est_ori, data_test, rW) print('ori:', eva_ori) time_ori = time1 - time0 print('ori_time:', str(time_ori) + 's') return L = np.zeros(data_size[0]) var_mat, mean_mat = traffic_info(handle_train) for i in range(data_size[0]): #按方差均值划分 t = np.var(data[i]) if t < 50: s = round(t**0.5 / np.mean(data[i]), 2) L[i] = 1 #L = var_cluster(handle_train) time0 = time.time() est_3d = cluster_ha(L, miss_data_test, Wtest, 2, halrtc_para) time1 = time.time() eva_3d = rmse_mape_rse(est_3d, data_test, rW) print('3d:', eva_3d) time_3d = time1 - time0 print('3d_time:', str(time_3d) + 's') return eva_ori, time_ori, eva_3d, time_3d
def test_week_cluster(handle_train, handle_test, choise='var'): #按一周七天分别聚类 data, miss_data1, W1, ori_W1 = handle_train ds = data.shape train = {} halrtc_para = [lou, K, conv_thre, fb] = [3e-3, 100, 1e-5] data_test, miss_data_test, Wtest, ori_Wtest = handle_test est_ori = halrtc_cpt(miss_data_test, lou, conv_thre, K, Wtest, fb) ds_test = data_test.shape test, est_clu = {}, {} est_week = np.zeros_like(data_test) K_n, max_d = 2, 100 for i in range(7): train[i], test[i] = [], [] days_train = [] for k in range(ds[1]): if k % 7 == i: days_train.append(k) for s in handle_train: train[i].append(s[:, days_train, :]) days_test = [] for k in range(ds_test[1]): if k % 7 == i: days_test.append(k) for t in handle_test: test[i].append(t[:, days_test, :]) L = np.zeros(ds[0]) for j in range(ds[0]): if np.var(data[j, i]) > 50: L[j] = 1 #L = SC_1(train[i][0], 6, K_n, method='pearson') est_clu[i] = cluster_ha(L, test[i][1], test[i][2], K_n, halrtc_para) #est_clu[i] = test_cluster(train[i], test[i])[-1] for p in range(est_clu[i].shape[1]): est_week[:, p * 7 + i, :] = est_clu[i][:, p, :] eva_week = rmse_mape_rse(est_clu[i], data_test[:, days_test, :], (Wtest | (ori_Wtest == False))[:, days_test, :]) print('clu', i, eva_week) eva_i = rmse_mape_rse(est_ori[:, days_test, :], data_test[:, days_test, :], (Wtest | (ori_Wtest == False))[:, days_test, :]) print('ori', i, eva_i) return
def clusters_train(handle_train, handle_test): K_n = 2 halrtc_para = [lou, K, conv_thre, fb] = [1e-3, 100, 1e-5] data, miss_data1, W1, ori_W1 = handle_train data_test, miss_data_test, Wtest, ori_Wtest = handle_test est_ori = halrtc_cpt(miss_data_test, lou, conv_thre, K, Wtest, fb) time0 = time.time() var_mat, mean_mat = traffic_info(handle_test) clf = KMeans(n_clusters=K_n) S = clf.fit(var_mat) L = S.labels_ print(L) ''' L = np.zeros(209) var_list = [] std_list = [] var_mean = np.zeros((209,2)) for i in range(209): #按方差均值划分 #if np.mean(var_mat[i]) > 50: #t = np.std(data[i])/np.mean(data[i]) t = np.mean(var_mat[i]) var_list.append(t) var_mean[0] = np.var(data[i]) var_mean[1] = np.mean(data[i]) if t < 50: s = round(t**0.5/np.mean(data[i]),2) L[i] = 1 return L ''' #est_Kmeans[L==1] = est_ori[L==1] est_Kmeans = cluster_ha(L, miss_data_test, Wtest, K_n, halrtc_para) time1 = time.time() for i in range(K_n): print( 'ori_' + str(i), rmse_mape_rse(est_ori[L == i], data_test[L == i], (Wtest | (ori_Wtest == False))[L == i])) print( 'KM_' + str(i), rmse_mape_rse(est_Kmeans[L == i], data_test[L == i], (Wtest | (ori_Wtest == False))[L == i])) print('Kmeans:', rmse_mape_rse(est_Kmeans, data_test, Wtest | (ori_Wtest == False))) print('Kmeans_time:', str(time1 - time0) + 's') time0 = time.time() est_ori = halrtc_cpt(miss_data_test, lou, conv_thre, K, Wtest, fb) time1 = time.time() eva_ori = rmse_mape_rse(est_ori, data_test, (Wtest | (ori_Wtest == False))) print('ori:', eva_ori) print('ori_time:', str(time1 - time0) + 's') ''' time0 = time.time() L = SC_1(data,10,K_n,method='pearson') est_SC = cluster_ha(L,miss_data_test,Wtest,K_n,halrtc_para) time1 = time.time() print('SC:',rmse_mape_rse(est_SC,data_test,Wtest|(ori_Wtest==False))) print('SC_time:',str(time1-time0)+'s') time0 = time.time() L = road_Kmeans(data,ori_W1,K_n,W1,method='pearson')[0] est_Kmeans = cluster_ha(L, miss_data_test, Wtest, K_n,halrtc_para) time1 = time.time() print('Kmeans:', rmse_mape_rse(est_Kmeans, data_test, Wtest | (ori_Wtest == False))) print('Kmeans_time:',str(time1-time0)+'s') ''' return
def test_cluster(handle_train, handle_test, choise='var'): p_tr = 0.9 halrtc_para = [lou, K, conv_thre, fb] = [1.3e-3, 100, 1e-5] data_test, miss_data_test, Wtest, ori_Wtest = handle_test rW = Wtest | (ori_Wtest == False) time_a = time.time() #lou = 1/(3*T_SVD(miss_data_test,p_tr)[0]) est_ori = halrtc_cpt(miss_data_test, lou, conv_thre, K, Wtest, fb) eva_ori = rmse_mape_rse(est_ori, data_test, (Wtest | (ori_Wtest == False))) K_n = 2 max_d = 100 time_s = time.time() ori_time = time_s - time_a print('ori', eva_ori) print('ori_time', str(ori_time) + 's') ts = data_test.shape L = np.zeros(ts[0]) Road = np.array(range(ts[0])) max_c = 0 est_clu = np.zeros_like(data_test) #handle_train = handle_test #change var_mat, mean_mat = traffic_info(handle_train) iter = 0 while True: HD_test = [] for temp in handle_test: HD_test.append(temp[L == max_c]) var_mat = var_mat[L == max_c] mean_mat = mean_mat[L == max_c] L, c = AHC(handle_train, choise, var_mat, mean_mat, K_n, max_d) if c < 0.65 or iter > 3: break #test_labels(K_n,L,HD_test) HD_train = [] max_c, max_n = -1, 0 for i in range(K_n): if (L == i).sum() > max_n: max_c = i max_n = (L == i).sum() for i in range(K_n): if i != max_c: list_i = Road[L == i] lou = 1 / (3 * T_SVD(miss_data_test[list_i], p_tr)[0]) est_clu[list_i] = halrtc_cpt(miss_data_test[list_i], lou, conv_thre, K, Wtest[list_i], fb) print( 'clu', rmse_mape_rse(est_clu[list_i], data_test[list_i], rW[list_i])) print( 'ori', rmse_mape_rse(est_ori[list_i], data_test[list_i], rW[list_i])) for temp in handle_train: HD_train.append(temp[L == max_c]) handle_train = HD_train.copy() handle_test = HD_test.copy() Road = Road[L == max_c] iter += 1 #print(Road) #if Road.size < 80: # break #最后一部分聚类与否 clf = KMeans(n_clusters=K_n) data, miss_data1, W1, ori_W1 = HD_train ds1 = data.shape mean_mat = np.zeros(ds1[:2]) for r in range(ds1[0]): for d in range(ds1[1]): mean_mat[r, d] = np.mean(data[r, d, :]) #mean_mat[r, d] = np.std(data[r,d,:])/np.mean(data[r,d,:]) s = clf.fit(mean_mat) L = np.array(s.labels_) for i in range(K_n): list_i = Road[L == i] #lou = 1 / (3 * T_SVD(miss_data_test[list_i], p_tr)[0]) est_clu[list_i] = halrtc_cpt(miss_data_test[list_i], lou, conv_thre, K, Wtest[list_i], fb) #lou = 1 / (3 * T_SVD(miss_data_test[Road], p_tr)[0]) #est_clu[Road] = halrtc_cpt(miss_data_test[Road], lou, conv_thre, K, Wtest[Road], fb) eva_clu = rmse_mape_rse(est_clu, data_test, (Wtest | (ori_Wtest == False))) time_e = time.time() ori_time = time_s - time_a print('ori', eva_ori) print('ori_time', str(ori_time) + 's') print('clu', eva_clu) clu_time = time_e - time_s print('clu_time', str(clu_time) + 's') return eva_ori, ori_time, eva_clu, clu_time, est_clu