Exemple #1
0
def arma_ex(term='first'):
    prp = PreProcessor(term)  # 数据管理器
    preMapping = get_testroad_adjoin(prp)
    for pre_id in tqdm.tqdm(preMapping.keys()):
        instand_id = preMapping[pre_id]
        try:
            pred_pre = predict(prp.get_roadflow_by_road(instand_id))
        except Exception as e:
            print(instand_id, end='\t')
            print(e)
            continue
        pred_pre.to_frame()
        pred_pre = pred_pre.dropna(axis=0, how="any")
        for i in range(len(pred_pre)):
            pred_pre.iloc[[i]] = int(pred_pre.iloc[[i]])
        pred = pd.DataFrame(pred_pre.values, columns=['value'])
        pred['timestamp'] = pred_pre.index
        pred['date'] = pred['timestamp'].apply(lambda x: x.strftime('%d'))
        pred['timeBegin'] = pred['timestamp'].apply(
            lambda x: x.strftime('%H:%M'))
        pred['crossroadID'] = instand_id
        pred['min_time'] = pred['timestamp'].apply(
            lambda x: int(x.strftime('%M')))
        pred = pred[pred['min_time'] >= 30]
        pred.drop(['timestamp'], axis=1, inplace=True)
        order = ['date', 'crossroadID', 'timeBegin', 'value']
        pred = pred[order]
        pred.to_csv(r'data\tmp\{}\pred_{}.csv'.format(term, pre_id),
                    index=False,
                    columns=['date', 'crossroadID', 'timeBegin', 'value'])
Exemple #2
0
def plot_roadflow():
    # ******载入数据******
    day = 3
    prp = PreProcessor()  # 数据管理器
    dfFlow, dFlow = prp.get_roadflow(day)  # 原始车流数据表,车流量时序数据
    # *****绘图示例******
    key = list(dFlow.keys())[0]
    seFolw = dFlow[key]
    seFolw.plot()
    plt.title(f'{day}号交通口{key}车流量时序图')
    plt.ylabel('车流量/5min')
    plt.xlabel('时间/t')
    plt.show()
Exemple #3
0
def regression_ex(term='final'):
    keylst = [
        100115, 100245, 100246, 100374, 100003, 100004, 100020, 100285, 100159,
        100287, 100288, 100164, 100300, 100179, 100053, 100183, 100315, 100061,
        100193, 100066, 100457, 100343, 100217, 100434, 100249, 100316, 100329,
        100019, 100340, 100041, 100069
    ]
    keylst = [val for val in keylst for i in range(3024)]
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LinearRegression
    from sklearn.metrics import mean_squared_error, r2_score
    prp = PreProcessor(term)  # 数据管理器
    train_x, train_y, test_x = prp.load_traindata()
    # 训练模型
    lr = LinearRegression()
    # print(train_x.iloc[:, 0:1])
    lr.fit(train_x.iloc[:, 0:1].values, train_y)
    test_y = lr.predict(test_x.values)
    # print(test_y)
    return test_y
Exemple #4
0
def test_hmm_all():
	"""
	This method tests the entire workflow:
	PreProcessor:
		populate the label codebook, feature codebook, and instance list from the dataset

	"""
	# Please specify the dataset path here
	train_set_path = "C:\\Users\\DIAOSHUO\\Dropbox\\SNLP\\cs134assn2\\np_chunking_wsj_15_18_train"
	test_set_path = "C:\\Users\\DIAOSHUO\\Dropbox\\SNLP\\cs134assn2\\np_chunking_wsj_20_test"
	# Preprocess
	pp = PreProcessor()#use 
	hmm = HMM()
	hmm.label_codebook= pp.get_label_codebook()
	hmm.feature_codebook = pp.get_feature_codebook()
	#train_set = pp.make_instance_list() # default, the dataset path is the training set, np_chunking_wsj_15_18_train
	#test_set = pp.make_instance_list(test_set_path) 
	train_instance_list = pp.get_instance_list(train_set_path)
	test_instance_list = pp.get_instance_list(test_set_path)

	#running 3 kinds of test
	split_train_test(hmm, train_instance_list, [0.8,0.2])
	n_fold_cross_validation(hmm,train_instance_list)
	simple_train_test(hmm,train_instance_list, test_instance_list)
Exemple #5
0
	def test_hmm(self):
		pp = PreProcessor()
		pp.test_preprocess()
		instance_list = pp.get_instance_list()
		self.label_codebook=pp.get_label_codebook()
		self.feature_codebook=pp.get_feature_codebook()
		self.train(instance_list)
		print "\ntransition_count_table--------------------"
		print self.transition_count_table
		print "\ntransition_matrix-------------------------"
		print self.transition_matrix
		print "\ninitial_state_count_table------------------"
		print self.initial_state_count_table
		print "\ntermination_state_count_table------------------"
		print self.termination_state_count_table
		print "\nemission matrix----------------------------"
		print self.emission_matrix

		for i in range(10):
			self.test_classify_instance(instance_list[i])
Exemple #6
0
        100193, 100066, 100457, 100343, 100217, 100434, 100249, 100316, 100329,
        100019, 100340, 100041, 100069
    ]
    keylst = [val for val in keylst for i in range(3024)]
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LinearRegression
    from sklearn.metrics import mean_squared_error, r2_score
    prp = PreProcessor(term)  # 数据管理器
    train_x, train_y, test_x = prp.load_traindata()
    # 训练模型
    lr = LinearRegression()
    # print(train_x.iloc[:, 0:1])
    lr.fit(train_x.iloc[:, 0:1].values, train_y)
    test_y = lr.predict(test_x.values)
    # print(test_y)
    return test_y


if __name__ == '__main__':
    term = 'final'  # 初赛:first;复赛:final
    # term = 'first'  # 初赛:first;复赛:final
    prp = PreProcessor(term)  # 数据管理器
    # train_x, train_y, test_x = prp.load_traindata()
    # arma_ex(term)  # 时序模型
    fe = FeatureEn(prp)
    # fe.get_train_data()
    # keylst = fe.get_text_data()
    # print(keylst)
    # ap(fe)
    regression_ex(term)
Exemple #7
0
import os

os.environ['CUDA_DIVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
from pre_process import PreProcessor

wav_dir = './NSynth'

A_dist_list = []
A = PreProcessor()
A.experiment_A(wav_dir=wav_dir)
A.get_dist_meanstd(dist_list=A_dist_list)

B_dist_list = []
B = PreProcessor()
B.experiment_B(wav_dir=wav_dir)
B.get_dist_meanstd(dist_list=B_dist_list)