def predict(config, model_name='-1'): data, label = data_read(config.train_path) adjacency = get_adj(config.adj_path) x, y = normalize(data, label) x = x[:, :config.nodes] adj = adjacency[:config.nodes, :config.nodes] ph_adj = tf.placeholder(tf.float32, [config.nodes, config.nodes], 'adj') ph_data = tf.placeholder(tf.float32, [None, config.nodes, config.ts], 'data') ph_label = tf.placeholder(tf.float32, [None, 1], 'label') model = GCN(config.ts, 1, config.nodes, config.gl, config.dl) out = model(ph_data, ph_adj) sess = tf.Session() saver = tf.train.Saver() saver.restore(sess, './model/' + model_name + '/best_model.ckpt') batch_data, batch_label = get_batch(x, y, config.bs, config.ts, 'test') _data = batch_data.transpose([0, 2, 1]) out = sess.run(out, feed_dict={ ph_adj: adj, ph_data: _data, ph_label: batch_label[0] }) pre = un_normalize(out, label) print(analysis(pre, label[:-config.ts])) #axis = list(range(len(pre))) pdb.set_trace() plt.plot(pre) plt.plot(label[:-config.ts]) plt.show()
def __init__(self, chain, normalize=False): """ :param chain: a list of points (tuples) """ self.points, self.width, self.height = func.normalize(chain) if not normalize: self.points = chain
def train(config, model_name='-1'): data, label = data_read(config.train_path) adjacency = get_adj(config.adj_path) x, y = normalize(data, label) x = x[:, :config.nodes] x, y = trans_gcn(x, y, config.ts) adj = adjacency[:config.nodes, :config.nodes] ph_adj = tf.placeholder(tf.float32, [config.nodes, config.nodes], 'adj') ph_data = tf.placeholder(tf.float32, [None, config.nodes, config.ts], 'data') ph_label = tf.placeholder(tf.float32, [None, 1], 'label') model = GCN(config.ts, 1, config.nodes, config.gl, config.dl) out = model(ph_data, ph_adj) loss_op = compute_loss(out, ph_label) tf.summary.scalar('loss', loss_op) train_op = tf.train.AdamOptimizer(config.lr).minimize(loss_op) merge_op = tf.summary.merge_all() sess = tf.Session() sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter('./logdir/' + model_name) writer.add_graph(sess.graph) saver = tf.train.Saver() _loss = [] for i in range(config.epoch): batch_data, batch_label = get_batch(x, y, config.bs, config.ts) for j in range(len(batch_label)): _data = batch_data[j].transpose([0, 2, 1]) _, loss, summary = sess.run([train_op, loss_op, merge_op], feed_dict={ ph_adj: adj, ph_data: _data, ph_label: batch_label[j] }) _loss.append(loss) writer.add_summary(summary, i) if i % 100 == 0: print('epoch====={}\t\t\tloss======{}\n\n'.format( i, np.mean(_loss))) _loss = [] saver.save(sess, './model/' + model_name + '/best_model.ckpt')
def load_file(url_or_file, audio_url, file_audio_path, speed, port): # print("----------------------") # print("url_or_file : "+str(url_or_file)) # print("audio_url : "+str(audio_url)) # print("file_audio_path : "+str(file_audio_path)) # print("speed : "+str(speed)) # print("port : "+str(port)) ser = serial.Serial("/dev/" + str(port), int(speed), 8, 'N', 1) if (str(url_or_file) == "url"): url_file = "/tmp/yt.mp3" wget.download(str(audio_url), url_file) func.audio2wav(str(url_file), tmp_file, SampleRate, max_len) #os.remove(yt_file) else: func.audio2wav(str(file_audio_path), tmp_file, SampleRate, max_len) file_wav = wave.open(tmp_file, mode='rb') audio = func.file2mono(file_wav) file_wav.close() os.remove(tmp_file) audio = func.normalize(audio) num = [] for i in range(0, len(audio)): a = audio[i].to_bytes(2, 'little', signed=True) num.append(a[0]) num.append(a[1]) listofzeros = [0] * max_len * 44100 ser.write(listofzeros) # ? clear ram ser.write(num) # ! write data to ram quit()
numTrainData = len(trainDataList) numDevData = len(devDataList) numAttr = len(labelList) #print(numTrainData) #print(numDevData) #print(numAttr) trainDataArray = np.zeros((numTrainData, numAttr)) trainKeyArray = np.zeros((numTrainData, 1)) devDataArray = np.zeros((numDevData, numAttr)) #print(trainDataArray) #print(devDataArray) #print(trainDataList[0][0]) for i in range(numTrainData): trainDataArray[i, 0] = normalize(trainDataList[i][0], yearMax, yearMin) trainDataArray[i, 1] = normalize(trainDataList[i][1], recordLMax, recordLMin) trainDataArray[i, 2] = normalize(trainDataList[i][2], binaryMax, binaryMin) trainDataArray[i, 3] = normalize(trainDataList[i][3], binaryMax, binaryMin) for i in range(numTrainData): if trainKeyList[i][0] == 'yes': trainKeyArray[i][0] = 1 for i in range(numDevData): devDataArray[i, 0] = normalize(devDataList[i][0], yearMax, yearMin) devDataArray[i, 1] = normalize(devDataList[i][1], recordLMax, recordLMin) devDataArray[i, 2] = normalize(devDataList[i][2], binaryMax, binaryMin) devDataArray[i, 3] = normalize(devDataList[i][3], binaryMax, binaryMin)
numTrainData = len(trainDataList) numDevData = len(devDataList) numAttr = len(labelList) #print(numTrainData) #print(numDevData) #print(numAttr) trainDataArray = np.zeros((numTrainData, numAttr)) trainKeyArray = np.zeros((numTrainData, 1)) devDataArray = np.zeros((numDevData, numAttr)) #print(trainDataArray) #print(devDataArray) #print(trainDataList[0][0]) for i in range(numTrainData): trainDataArray[i, 0] = normalize(trainDataList[i][0], gradeMax, gradeMin) trainDataArray[i, 1] = normalize(trainDataList[i][1], gradeMax, gradeMin) trainDataArray[i, 2] = normalize(trainDataList[i][2], gradeMax, gradeMin) trainDataArray[i, 3] = normalize(trainDataList[i][3], gradeMax, gradeMin) trainDataArray[i, 4] = normalize(trainDataList[i][4], gradeMax, gradeMin) for i in range(numTrainData): trainKeyArray[i, 0] = float(trainKeyList[i][0]) / 100 for i in range(numDevData): devDataArray[i, 0] = normalize(devDataList[i][0], gradeMax, gradeMin) devDataArray[i, 1] = normalize(devDataList[i][1], gradeMax, gradeMin) devDataArray[i, 2] = normalize(devDataList[i][2], gradeMax, gradeMin) devDataArray[i, 3] = normalize(devDataList[i][3], gradeMax, gradeMin) devDataArray[i, 4] = normalize(devDataList[i][4], gradeMax, gradeMin)
metadata, plusone, 0) # Remove all texts written by more than one person from data # RAW DATA SETS NLtrain_y, NLtrain_X, NLtest_y, NLtest_X = func.natlan(metadata, data) GRtrain_y, GRtrain_X, GRtest_y, GRtest_X = func.grade(metadata, data) LEtrain_y, LEtrain_X, LEtest_y, LEtest_X = func.level(metadata, data) AUtrain_y20, AUtrain_X20, AUtest_y20, AUtest_X20 = func.author( metadata, data, 4, 20, 2) AUtrain_yAH, AUtrain_XAH, AUtest_yAH, AUtest_XAH = func.author( metadata, data, 4, 20, 2, (7, "AH")) AUtrain_y100, AUtrain_X100, AUtest_y100, AUtest_X100 = func.author( metadata, data, 4, 100, 2) # NORMALIZED DATA SETS NLtrain_Xn, NLtest_Xn = func.normalize(NLtrain_X, NLtest_X) GRtrain_Xn, GRtest_Xn = func.normalize(GRtrain_X, GRtest_X) LEtrain_Xn, LEtest_Xn = func.normalize(LEtrain_X, LEtest_X) AUtrain_Xn20, AUtest_Xn20 = func.normalize(AUtrain_X20, AUtest_X20) AUtrain_XnAH, AUtest_XnAH = func.normalize(AUtrain_XAH, AUtest_XAH) AUtrain_Xn100, AUtest_Xn100 = func.normalize(AUtrain_X100, AUtest_X100) filepath = '2539/documentation/' filename = 'BAWE.xls' mdata = pd.read_excel(filepath + filename, 'Sheet1') # Clean data mdata.grade[mdata.grade == ' '] = 'unknown' mdata.L1[mdata.L1 != 'English'] = 'Other' #----------------------------------------------------------------------------------------
def first_feature_engineering(train_old): ########################################################## ################ load train & data preprocessing########## ########################################################## train_features = pd.DataFrame() train_old['year'] = train_old['时间'].apply(lambda x: x[0:4]).astype('int32') train_old['month'] = train_old['时间'].apply( lambda x: get_month(x, train_list[i])).astype('int32') train_old['day'] = train_old['时间'].apply( lambda x: get_day(x, train_list[i])).astype('int32') train_old['hour'] = train_old['时间'].apply(lambda x: get_hour(x)).astype( 'int32') train_old['min'] = train_old['时间'].apply(lambda x: get_min(x)).astype( 'int32') train_features['month'] = normalize(train_old['month']) train_features['day'] = normalize(train_old['day']) train_features['time'] = normalize( train_old['时间'].apply(lambda x: get_time(x)).astype('int32')) train_features['辐照度'] = normalize(train_old['辐照度']) train_features['风速'] = normalize(train_old['风速']) train_features['风向'] = normalize(train_old['风向']) train_features['温度'] = normalize(train_old['温度']) train_features['压强'] = normalize(train_old['压强']) train_features['湿度'] = normalize(train_old['湿度']) train_features['dis2peak_辐照度'] = normalize( dis2peak(train_old['时间'].apply(lambda x: get_time(x)).astype('int32'), train_old['辐照度'], train_old['year'], train_old['month'], train_old['day'])) train_features = add_poly_features( train_features, ['辐照度', '风速', '风向', '温度', '压强', '湿度']) #### train_features = add_plus_features(train_features, ['辐照度', '风速', '风向', '温度', '压强', '湿度']) train_features = add_div_features(train_features, ['辐照度', '风速', '风向', '温度', '压强', '湿度']) label_final = train_old['实际功率'] # label label_1 = normalize(train_old['实发辐照度']) ###### new features ###temperature train_features['温度差'] = numerical_4_feature(train_old['温度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='var', time_period='allday') train_features['白天温度差'] = numerical_4_feature(train_old['温度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='var', time_period='daytime') train_features['夜晚温度差'] = numerical_4_feature(train_old['温度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='var', time_period='nighttime') train_features['温度std'] = numerical_4_feature(train_old['温度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='std', time_period='allday') train_features['白天温度std'] = numerical_4_feature(train_old['温度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='std', time_period='daytime') train_features['夜晚温度std'] = numerical_4_feature(train_old['温度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='std', time_period='nighttime') train_features['温度mean'] = numerical_4_feature(train_old['温度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='mean', time_period='allday') train_features['白天温度mean'] = numerical_4_feature(train_old['温度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='mean', time_period='daytime') train_features['夜晚温度mean'] = numerical_4_feature(train_old['温度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='mean', time_period='nighttime') train_features = add_poly_features(train_features, [ '温度差', '白天温度差', '夜晚温度差', '温度std', '白天温度std', '夜晚温度std', '温度mean', '白天温度mean', '夜晚温度mean' ]) ### humidity train_features['湿度差'] = numerical_4_feature(train_old['湿度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='var', time_period='allday') train_features['白天湿度差'] = numerical_4_feature(train_old['湿度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='var', time_period='daytime') train_features['夜晚湿度差'] = numerical_4_feature(train_old['湿度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='var', time_period='nighttime') train_features['湿度std'] = numerical_4_feature(train_old['湿度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='std', time_period='allday') train_features['白天湿度std'] = numerical_4_feature(train_old['湿度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='std', time_period='daytime') train_features['夜晚湿度std'] = numerical_4_feature(train_old['湿度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='std', time_period='nighttime') train_features['湿度mean'] = numerical_4_feature(train_old['温度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='mean', time_period='allday') train_features['白天湿度mean'] = numerical_4_feature(train_old['湿度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='mean', time_period='daytime') train_features['夜晚湿度mean'] = numerical_4_feature(train_old['湿度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='mean', time_period='nighttime') ### pressure train_features['压强差'] = numerical_4_feature(train_old['压强'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='var', time_period='allday') train_features['白天压强差'] = numerical_4_feature(train_old['压强'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='var', time_period='daytime') train_features['夜晚压强差'] = numerical_4_feature(train_old['压强'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='var', time_period='nighttime') train_features['压强std'] = numerical_4_feature(train_old['压强'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='std', time_period='allday') train_features['白天压强std'] = numerical_4_feature(train_old['压强'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='std', time_period='daytime') train_features['夜晚压强std'] = numerical_4_feature(train_old['压强'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='std', time_period='nighttime') train_features['压强mean'] = numerical_4_feature(train_old['压强'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='mean', time_period='allday') train_features['白天压强mean'] = numerical_4_feature(train_old['压强'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='mean', time_period='daytime') train_features['夜晚压强mean'] = numerical_4_feature(train_old['压强'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='mean', time_period='nighttime') ### wind speed train_features['风速差'] = numerical_4_feature(train_old['风速'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='var', time_period='allday') train_features['白天风速差'] = numerical_4_feature(train_old['风速'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='var', time_period='daytime') train_features['夜晚风速差'] = numerical_4_feature(train_old['风速'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='var', time_period='nighttime') train_features['风速std'] = numerical_4_feature(train_old['风速'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='std', time_period='allday') train_features['白天风速std'] = numerical_4_feature(train_old['风速'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='std', time_period='daytime') train_features['夜晚风速std'] = numerical_4_feature(train_old['风速'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='std', time_period='nighttime') train_features['风速mean'] = numerical_4_feature(train_old['风速'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='mean', time_period='allday') train_features['白天风速mean'] = numerical_4_feature(train_old['风速'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='mean', time_period='daytime') train_features['夜晚风速mean'] = numerical_4_feature(train_old['风速'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='mean', time_period='nighttime') ###irradiance train_features['max辐照度'] = numerical_4_feature(train_old['辐照度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='max', time_period='allday') train_features['白天辐照度差'] = numerical_4_feature(train_old['辐照度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='var', time_period='daytime') train_features['白天辐照度mean'] = numerical_4_feature(train_old['辐照度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='mean', time_period='daytime') train_features['白天辐照度std'] = numerical_4_feature(train_old['辐照度'], train_old['year'], train_old['month'], train_old['day'], train_old['辐照度'], method='std', time_period='daytime') print(train_list[i]) print(train_features.columns.tolist()) print(len(train_features.columns.tolist())) ######################################################### #############load test & data preprocessing############## ######################################################### test_old = pd.read_csv(path + test_list[i]) test_features = pd.DataFrame() test_old['year'] = test_old['时间'].apply(lambda x: x[0:4]).astype('int32') test_old['month'] = test_old['时间'].apply( lambda x: get_month(x, test_list[i])).astype('int32') test_old['day'] = test_old['时间'].apply( lambda x: get_day(x, test_list[i])).astype('int32') test_old['hour'] = test_old['时间'].apply(lambda x: get_hour(x)).astype( 'int32') test_old['min'] = test_old['时间'].apply(lambda x: get_min(x)).astype( 'int32') test_features['month'] = normalize( test_old['时间'].apply(lambda x: x[5:7]).astype('int32')) test_features['day'] = normalize( test_old['时间'].apply(lambda x: x[8:10]).astype('int32')) test_features['time'] = normalize( test_old['时间'].apply(lambda x: get_time(x)).astype('int32')) test_features['dis2peak_辐照度'] = normalize( dis2peak(test_old['时间'].apply(lambda x: get_time(x)).astype('int32'), test_old['辐照度'], test_old['year'], test_old['month'], test_old['day'])) test_features['辐照度'] = normalize(test_old['辐照度']) test_features['风速'] = normalize(test_old['风速']) test_features['风向'] = normalize(test_old['风向']) test_features['温度'] = normalize(test_old['温度']) test_features['压强'] = normalize(test_old['压强']) test_features['湿度'] = normalize(test_old['湿度']) test_features = add_poly_features(test_features, ['辐照度', '风速', '风向', '温度', '压强', '湿度']) test_features = add_plus_features(test_features, ['辐照度', '风速', '风向', '温度', '压强', '湿度']) test_features = add_div_features(test_features, ['辐照度', '风速', '风向', '温度', '压强', '湿度']) ##################### new features ###temperature test_features['温度差'] = numerical_4_feature(test_old['温度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='var', time_period='allday') test_features['白天温度差'] = numerical_4_feature(test_old['温度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='var', time_period='daytime') test_features['夜晚温度差'] = numerical_4_feature(test_old['温度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='var', time_period='nighttime') test_features['温度std'] = numerical_4_feature(test_old['温度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='std', time_period='daytime') test_features['白天温度std'] = numerical_4_feature(test_old['温度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='std', time_period='daytime') test_features['夜晚温度std'] = numerical_4_feature(test_old['温度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='std', time_period='nighttime') test_features['温度mean'] = numerical_4_feature(test_old['温度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='mean', time_period='allday') test_features['白天温度mean'] = numerical_4_feature(test_old['温度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='mean', time_period='daytime') test_features['夜晚温度mean'] = numerical_4_feature(test_old['温度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='mean', time_period='nighttime') test_features = add_poly_features(test_features, [ '温度差', '白天温度差', '夜晚温度差', '温度std', '白天温度std', '夜晚温度std', '温度mean', '白天温度mean', '夜晚温度mean' ]) ###humidity test_features['湿度差'] = numerical_4_feature(test_old['湿度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='var', time_period='allday') test_features['白天湿度差'] = numerical_4_feature(test_old['湿度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='var', time_period='daytime') test_features['夜晚湿度差'] = numerical_4_feature(test_old['湿度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='var', time_period='nighttime') test_features['湿度std'] = numerical_4_feature(test_old['湿度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='std', time_period='allday') test_features['白天湿度std'] = numerical_4_feature(test_old['湿度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='std', time_period='daytime') test_features['夜晚湿度std'] = numerical_4_feature(test_old['湿度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='std', time_period='nighttime') test_features['湿度mean'] = numerical_4_feature(test_old['温度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='mean', time_period='allday') test_features['白天湿度mean'] = numerical_4_feature(test_old['湿度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='mean', time_period='daytime') test_features['夜晚湿度mean'] = numerical_4_feature(test_old['湿度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='mean', time_period='nighttime') ### pressure test_features['压强差'] = numerical_4_feature(test_old['压强'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='var', time_period='allday') test_features['白天压强差'] = numerical_4_feature(test_old['压强'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='var', time_period='daytime') test_features['夜晚压强差'] = numerical_4_feature(test_old['压强'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='var', time_period='nighttime') test_features['压强std'] = numerical_4_feature(test_old['压强'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='std', time_period='allday') test_features['白天压强std'] = numerical_4_feature(test_old['压强'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='std', time_period='daytime') test_features['夜晚压强std'] = numerical_4_feature(test_old['压强'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='std', time_period='nighttime') test_features['压强mean'] = numerical_4_feature(test_old['压强'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='mean', time_period='allday') test_features['白天压强mean'] = numerical_4_feature(test_old['压强'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='mean', time_period='daytime') test_features['夜晚压强mean'] = numerical_4_feature(test_old['压强'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='mean', time_period='nighttime') ### wind speed test_features['风速差'] = numerical_4_feature(test_old['风速'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='var', time_period='allday') test_features['白天风速差'] = numerical_4_feature(test_old['风速'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='var', time_period='daytime') test_features['夜晚风速差'] = numerical_4_feature(test_old['风速'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='var', time_period='nighttime') test_features['风速std'] = numerical_4_feature(test_old['风速'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='std', time_period='allday') test_features['白天风速std'] = numerical_4_feature(test_old['风速'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='std', time_period='daytime') test_features['夜晚风速std'] = numerical_4_feature(test_old['风速'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='std', time_period='nighttime') test_features['风速mean'] = numerical_4_feature(test_old['风速'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='mean', time_period='allday') test_features['白天风速mean'] = numerical_4_feature(test_old['风速'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='mean', time_period='daytime') test_features['夜晚风速mean'] = numerical_4_feature(test_old['风速'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='mean', time_period='nighttime') ### irradiance test_features['max辐照度'] = numerical_4_feature(test_old['辐照度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='max', time_period='allday') test_features['白天辐照度差'] = numerical_4_feature(test_old['辐照度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='var', time_period='daytime') test_features['白天辐照度mean'] = numerical_4_feature(test_old['辐照度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='mean', time_period='daytime') test_features['白天辐照度std'] = numerical_4_feature(test_old['辐照度'], test_old['year'], test_old['month'], test_old['day'], test_old['辐照度'], method='std', time_period='daytime') print(test_list[i]) print(test_features.columns.tolist()) print(len(test_features.columns.tolist())) if (len(train_features.columns) != len(test_features.columns)): print( "\n \n warning : \n please check your features in your first train!!! \n train features and test features don't match to each other!!! \n" ) print("train features : ", train_features.columns.tolist()) print("test features : ", test_features.columns.tolist()) os.system("pause") return train_features, test_features, label_1, label_final
print("-" * 96) # that's just decorator for better visualisation print("Loading dataset".center(96)) """ so there we go loading csv file using pandas file must be in project directory (or just change path whatever) first column in our dataset is index column """ path = './creditrisk.csv' dataset = pd.read_csv(path, index_col=0) print("-" * 96) print("Normalizing data".center(96)) """ we need to normalize our data to be from 0 to 1 to prevent the neural network from favoring variables that can be multiple of others function normalize is in the func.py file """ age = normalize(dataset['Age'].values.tolist()) cr_amnt = normalize(dataset['Credit amount'].values.tolist()) duration = normalize(dataset['Duration'].values.tolist()) risk = dataset['Risk'].values.tolist() # For some reasons (eg. user input) we would need max and min values of the dataset age_min = min(dataset['Age'].values.tolist()) age_max = max(dataset['Age'].values.tolist()) cra_min = min(dataset['Credit amount'].values.tolist()) cra_max = max(dataset['Credit amount'].values.tolist()) dur_min = min(dataset['Duration'].values.tolist()) dur_max = max(dataset['Duration'].values.tolist()) """ our risk column is filled with good and bad values so we need to remake that to 0 and 1 so sigmoid function would give us an output """
metadata = pickle.load( open( "metadata.p", "rb" ) ) data = pickle.load( open( "dataset.p", "rb" ) ) plusone = np.where(metadata[:,9] != "1")[0] # Get indexes for all texts written by more than one person data, metadata = np.delete(data, plusone, 0), np.delete(metadata, plusone, 0) # Remove all texts written by more than one person from data # RAW DATA SETS NLtrain_y, NLtrain_X, NLtest_y, NLtest_X = func.natlan(metadata,data) GRtrain_y, GRtrain_X, GRtest_y, GRtest_X = func.grade(metadata,data) LEtrain_y, LEtrain_X, LEtest_y, LEtest_X = func.level(metadata,data) AUtrain_y20, AUtrain_X20, AUtest_y20, AUtest_X20 = func.author(metadata,data,4,20,2) AUtrain_yAH, AUtrain_XAH, AUtest_yAH, AUtest_XAH = func.author(metadata,data,4,20,2,(7,"AH")) AUtrain_y100, AUtrain_X100, AUtest_y100, AUtest_X100 = func.author(metadata,data,4,100,2) # NORMALIZED DATA SETS NLtrain_Xn, NLtest_Xn = func.normalize(NLtrain_X, NLtest_X) GRtrain_Xn, GRtest_Xn = func.normalize(GRtrain_X, GRtest_X) LEtrain_Xn, LEtest_Xn = func.normalize(LEtrain_X, LEtest_X) AUtrain_Xn20, AUtest_Xn20 = func.normalize(AUtrain_X20, AUtest_X20) AUtrain_XnAH, AUtest_XnAH = func.normalize(AUtrain_XAH, AUtest_XAH) AUtrain_Xn100, AUtest_Xn100 = func.normalize(AUtrain_X100, AUtest_X100) # TREE SELECTION FEATURES EVALUATION PLOTS func.inspect_tree_selection(NLtrain_Xn, NLtrain_y, "Native language") func.inspect_tree_selection(GRtrain_Xn, GRtrain_y, "Grade") func.inspect_tree_selection(LEtrain_Xn, LEtrain_y, "Academic level") func.inspect_tree_selection(AUtrain_Xn20, AUtrain_y20, "Author 20") func.inspect_tree_selection(AUtrain_XnAH, AUtrain_yAH, "Author AH20") func.inspect_tree_selection(AUtrain_Xn100, AUtrain_y100, "Author 100") #----------------------------------------------------------------------------------------
# chemin du fichier converti start_time = time.time() func.audio2wav(in_file, tmp_file, SampleRate, max_len) # objet Wave_read du fichier file_wav = wave.open(tmp_file, mode='rb') # Stereo to mono audio = func.file2mono(file_wav) file_wav.close() os.remove(tmp_file) # Normalization audio = func.normalize(audio) num = [] for i in range(0, len(audio)): a = audio[i].to_bytes(2, 'little', signed=True) num.append(a[0]) num.append(a[1]) """ fonctionne absolument bien """ listofzeros = [0] * max_len * 44100 print("Clearing RAM") ser.write(listofzeros) # ? clear ram print("Writing RAM")