def pure_feature_classification(): trainX, trainY, testX, testY = get_pure_feature(False) trainX_2D, trainY_1D = trainX[:, subSum - 1, :], trainY[:, subSum - 1, 0] result = lgb_train_model_with_split(pd.DataFrame(trainX_2D), pd.DataFrame(trainY_1D), 2011) print_res(result) return result
def GCN_classfication(): trainX, trainY, testX, testY = get_input_data( GCNPathName+"%d_%d" %(embSize,testNum), "/fGCNembedding",True, False) trainX_2D, trainY_1D = trainX[:, subSum-1, :], trainY[:, subSum-1, 0] ftrainX, ftrainY, ftestX, ftestY = get_pure_feature(False) inputX=np.concatenate((ftrainX[:,subSum-1,:], trainX[:, subSum-1, :]), axis=1) result = lgb_train_model_with_split( pd.DataFrame(inputX), pd.DataFrame(trainY_1D), 2011) print_res(result)
def GCN_classfication(): gcn_res, rcnt = [0, 0, 0, 0], 5 rs = 7 for i in range(rcnt): for idx in range(subSum - 1, subSum): print(idx) print(tolFeaturePathName + "/tolFeature_%d.pkl" % idx) tolFeature = load_pickle(tolFeaturePathName, "/tolFeature_%d.pkl" % idx) tolFeature_df = pd.DataFrame(tolFeature, columns=[ 'label', 'AF1', 'AF2', 'AF3', 'AF4', 'AF5', 'AF6', 'AF7', 'AF8' ]) sp_muldG = load_pickle(muldigPathName, "/G_%d.pkl" % idx) #sp_mulG = load_pickle(mulgPathName+"/G_%d.pkl" % idx) print(mulgPathName + "/G_%d.pkl" % idx) y_cols_name = ['label'] x_cols_name = [ x for x in tolFeature_df.columns if x not in y_cols_name ] global scipy_adj_matrix, train_x, train_y train_x = dcopy(tolFeature_df[x_cols_name]) train_y = dcopy(tolFeature_df[y_cols_name]) pos_cnt, neg_cnt = int( train_y.sum()), int(len(train_y) - train_y.sum()) scipy_adj_matrix = get_scipy_adj_matrix(sp_muldG) print('pos node cnts:', pos_cnt) print('neg node cnts:', neg_cnt, 'pos/all ratio:', pos_cnt / (pos_cnt + neg_cnt)) embSize = 8 fGCNembedding = get_GCN_embedding(epoch=6, lr=0.0035, weight_decay=1e-6, esize=embSize, random_seed=rs + i) print("finish calculate embedding data!") save_pickle(fGCNembedding, GCNPathName + "%d" % testNum, "/fGCNembedding_%d.pkl" % idx) trainX, trainY, testX, testY = get_input_data( GCNPathName + "%d" % testNum, "/fGCNembedding", True, False, True) trainX_2D, trainY_1D = trainX[:, subSum - 1, :], trainY[:, subSum - 1, 0] lgb_res = lgb_train_model_with_split(pd.DataFrame(trainX_2D), pd.DataFrame(trainY_1D), 2011) print_res(lgb_res) for j in range(len(gcn_res)): gcn_res[j] += lgb_res[j] gcn_res = [i / rcnt for i in gcn_res] gc.collect() return gcn_res
def LSTM_classification(modelFileName): # 不切分训练测试集 new_model = load_RNNmodel(modelFileName) trainX, trainY, testX, testY = get_pure_feature(False) print(trainX.shape,trainY.shape) trainX_emb, testX_emb = get_autoEncoder_Embedding_Layer( trainX, trainX, new_model) #实际上传入的test也是trainX 因为在这里不切分数据集 trainX_2D = trainX_emb[:, subSum-1, :] trainY_1D = trainY[:, subSum-1, 0] result = lgb_train_model_with_split( pd.DataFrame(trainX_2D), pd.DataFrame(trainY_1D), 2011) print_res(result)
def GCN_LSTM_classification(modelFileName): new_model = load_RNNmodel(modelFileName) trainX, trainY, testX, testY = get_input_data( GCNPathName+"%d_%d" %(embSize,testNum), "/fGCNembedding", True, False) #trainX= trainX[:, :, :8] ftrainX, ftrainY, ftestX, ftestY = get_pure_feature(False) print(trainX.shape,trainY.shape) trainX_emb, testX_emb = get_autoEncoder_Embedding_Layer( trainX, trainX, new_model) print(trainX_emb.shape) for i in range(subSum-1,subSum): #print(str(i)+":------") inputX=np.concatenate((ftrainX[:,i,:], trainX_emb[:, i, :]), axis=1) #inputX=trainX_emb[:, i, :] print(inputX.shape) trainX_2D, trainY_1D =inputX, trainY[:,subSum-1, 0] result = lgb_train_model_with_split( pd.DataFrame(trainX_2D), pd.DataFrame(trainY_1D), 2011) print_res(result)
'AF5', 'AF6', 'AF7', 'AF8' ]) sp_muldG = load_pickle(muldigPathName, "/G_%d.pkl" % idx) #print(mulgPathName+"/G_%d.pkl" % idx) y_cols_name = ['label'] x_cols_name = [x for x in tolFeature_df.columns if x not in y_cols_name] global train_x, train_y train_x = dcopy(tolFeature_df[x_cols_name]) train_y = dcopy(tolFeature_df[y_cols_name]) pos_cnt, neg_cnt = int(train_y.sum()), int(len(train_y) - train_y.sum()) print("start!") get_trainx_trainy() gcn_res = GCN_classfication() print_res(gcn_res) # Deepwalk_classification def Deepwalk_classification(): global train_x, train_y dw_res, rcnt = [0, 0, 0, 0], 5 for i in tqdm(range((rcnt))): node_feas, labels = dcopy(train_x.values), dcopy(train_y.values) embe_feas = read_embeds(DeepwalkPathName_new + '/embeds_dw_%d.dat' % i) np_fea_lab = np.hstack((node_feas, embe_feas, labels)) columns_name = ['f%02d' % i for i in range(np_fea_lab.shape[1] - 1)] + ['label'] df_fea_lab = pd.DataFrame(data=np_fea_lab, columns=columns_name, dtype=float)