def arma_ex(term='first'): prp = PreProcessor(term) # 数据管理器 preMapping = get_testroad_adjoin(prp) for pre_id in tqdm.tqdm(preMapping.keys()): instand_id = preMapping[pre_id] try: pred_pre = predict(prp.get_roadflow_by_road(instand_id)) except Exception as e: print(instand_id, end='\t') print(e) continue pred_pre.to_frame() pred_pre = pred_pre.dropna(axis=0, how="any") for i in range(len(pred_pre)): pred_pre.iloc[[i]] = int(pred_pre.iloc[[i]]) pred = pd.DataFrame(pred_pre.values, columns=['value']) pred['timestamp'] = pred_pre.index pred['date'] = pred['timestamp'].apply(lambda x: x.strftime('%d')) pred['timeBegin'] = pred['timestamp'].apply( lambda x: x.strftime('%H:%M')) pred['crossroadID'] = instand_id pred['min_time'] = pred['timestamp'].apply( lambda x: int(x.strftime('%M'))) pred = pred[pred['min_time'] >= 30] pred.drop(['timestamp'], axis=1, inplace=True) order = ['date', 'crossroadID', 'timeBegin', 'value'] pred = pred[order] pred.to_csv(r'data\tmp\{}\pred_{}.csv'.format(term, pre_id), index=False, columns=['date', 'crossroadID', 'timeBegin', 'value'])
def plot_roadflow(): # ******载入数据****** day = 3 prp = PreProcessor() # 数据管理器 dfFlow, dFlow = prp.get_roadflow(day) # 原始车流数据表,车流量时序数据 # *****绘图示例****** key = list(dFlow.keys())[0] seFolw = dFlow[key] seFolw.plot() plt.title(f'{day}号交通口{key}车流量时序图') plt.ylabel('车流量/5min') plt.xlabel('时间/t') plt.show()
def regression_ex(term='final'): keylst = [ 100115, 100245, 100246, 100374, 100003, 100004, 100020, 100285, 100159, 100287, 100288, 100164, 100300, 100179, 100053, 100183, 100315, 100061, 100193, 100066, 100457, 100343, 100217, 100434, 100249, 100316, 100329, 100019, 100340, 100041, 100069 ] keylst = [val for val in keylst for i in range(3024)] from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score prp = PreProcessor(term) # 数据管理器 train_x, train_y, test_x = prp.load_traindata() # 训练模型 lr = LinearRegression() # print(train_x.iloc[:, 0:1]) lr.fit(train_x.iloc[:, 0:1].values, train_y) test_y = lr.predict(test_x.values) # print(test_y) return test_y
def test_hmm_all(): """ This method tests the entire workflow: PreProcessor: populate the label codebook, feature codebook, and instance list from the dataset """ # Please specify the dataset path here train_set_path = "C:\\Users\\DIAOSHUO\\Dropbox\\SNLP\\cs134assn2\\np_chunking_wsj_15_18_train" test_set_path = "C:\\Users\\DIAOSHUO\\Dropbox\\SNLP\\cs134assn2\\np_chunking_wsj_20_test" # Preprocess pp = PreProcessor()#use hmm = HMM() hmm.label_codebook= pp.get_label_codebook() hmm.feature_codebook = pp.get_feature_codebook() #train_set = pp.make_instance_list() # default, the dataset path is the training set, np_chunking_wsj_15_18_train #test_set = pp.make_instance_list(test_set_path) train_instance_list = pp.get_instance_list(train_set_path) test_instance_list = pp.get_instance_list(test_set_path) #running 3 kinds of test split_train_test(hmm, train_instance_list, [0.8,0.2]) n_fold_cross_validation(hmm,train_instance_list) simple_train_test(hmm,train_instance_list, test_instance_list)
def test_hmm(self): pp = PreProcessor() pp.test_preprocess() instance_list = pp.get_instance_list() self.label_codebook=pp.get_label_codebook() self.feature_codebook=pp.get_feature_codebook() self.train(instance_list) print "\ntransition_count_table--------------------" print self.transition_count_table print "\ntransition_matrix-------------------------" print self.transition_matrix print "\ninitial_state_count_table------------------" print self.initial_state_count_table print "\ntermination_state_count_table------------------" print self.termination_state_count_table print "\nemission matrix----------------------------" print self.emission_matrix for i in range(10): self.test_classify_instance(instance_list[i])
100193, 100066, 100457, 100343, 100217, 100434, 100249, 100316, 100329, 100019, 100340, 100041, 100069 ] keylst = [val for val in keylst for i in range(3024)] from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score prp = PreProcessor(term) # 数据管理器 train_x, train_y, test_x = prp.load_traindata() # 训练模型 lr = LinearRegression() # print(train_x.iloc[:, 0:1]) lr.fit(train_x.iloc[:, 0:1].values, train_y) test_y = lr.predict(test_x.values) # print(test_y) return test_y if __name__ == '__main__': term = 'final' # 初赛:first;复赛:final # term = 'first' # 初赛:first;复赛:final prp = PreProcessor(term) # 数据管理器 # train_x, train_y, test_x = prp.load_traindata() # arma_ex(term) # 时序模型 fe = FeatureEn(prp) # fe.get_train_data() # keylst = fe.get_text_data() # print(keylst) # ap(fe) regression_ex(term)
import os os.environ['CUDA_DIVICE_ORDER'] = 'PCI_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = '1' from pre_process import PreProcessor wav_dir = './NSynth' A_dist_list = [] A = PreProcessor() A.experiment_A(wav_dir=wav_dir) A.get_dist_meanstd(dist_list=A_dist_list) B_dist_list = [] B = PreProcessor() B.experiment_B(wav_dir=wav_dir) B.get_dist_meanstd(dist_list=B_dist_list)