def main(): print("have a test") print(os.path) print(os.getcwd()) data_in = pd.read_csv(os.path.join(data_map.datain, 'german_credit_data.csv')) print(data_in.info()) print(data_in.head()) clean_data = data_process.data_clean(data_in) print(clean_data.info()) print(clean_data.head()) #clean_data.to_csv(os.path.join(data_map.dataout, 'clean_data.csv'), index=False,encoding='utf-8') """ X, y = data_process.data_trans(clean_data) print(X.shape) print(y.shape) """ train_data, test_data = train_test_split(clean_data, test_size=1/4, random_state=10) data_process.dataset_view(train_data,test_data) X_train, y_train = data_process.data_trans(train_data) X_test, y_test = data_process.data_trans(test_data) model_name_para_dic = {'kNN':[5,10,15], 'LR':[0.01,1,100]} results_df = pd.DataFrame(columns=['Accuracy (%)', 'Time (s)'], index=list(model_name_para_dic.keys())) results_df.index.name = 'Model' for model_name, para in model_name_para_dic.items(): #print(model_name,para) best_acc, best_model, mean_duration = data_process.train_test_model(X_train,y_train,X_test,y_test,para,model_name) #print(best_acc*100) #print(best_model) #print(mean_duration) results_df.loc[model_name, 'Accuracy (%)'] = best_acc * 100 results_df.loc[model_name, 'Time (s)'] = mean_duration plt.figure() ax1 = plt.subplot(1, 2, 1) results_df.plot(y=['Accuracy (%)'], kind='bar', ylim=[60, 100], ax=ax1, title='Accuracy(%)', legend=False) ax2 = plt.subplot(1, 2, 2) results_df.plot(y=['Time (s)'], kind='bar', ax=ax2, title='Time(s)', legend=False) plt.tight_layout() plt.savefig(os.path.join(data_map.dataout, 'result.png'))
from sklearn.cross_validation import train_test_split from sklearn import metrics import tensorflow as tf from data_process import data_clean full_X, full = data_clean() sourceRow = 891 # 训练数据集的大小 # 原始数据集:特征 source_X = full_X.loc[0:sourceRow - 1, :] # 原始数据集:标签 source_y = full.loc[0:sourceRow - 1, 'Survived'] # 预测数据集:特征 pred_X = full_X.loc[sourceRow:, :] train_X, test_X, train_Y, test_Y = train_test_split(source_X, source_y, train_size=.75) feature_columns = [tf.contrib.layers.real_valued_column("", dimension=27)] model = tf.contrib.learn.DNNClassifier(hidden_units=[10, 20, 10], feature_columns=feature_columns) model.fit(test_X, test_Y) score = metrics.accuracy_score(test_Y, model.predict(test_X)) print(score)
def run_it(): #start time (IST) start_time = (datetime.datetime.utcnow() + datetime.timedelta(hours=5, minutes=30)).isoformat() #instruments_list nse_instruments, bse_instruments = data_process.instruments_list() #sockets and quote_types variables nse_sockets = [] bse_sockets = [] nse_quote_type = [ 'nse-m', 'nse-m', 'nse-m', 'nse-m', 'nse-m', 'nse-m', 'nse-s', 'nse-s', 'nse-s', 'nse-s', 'nse-s', 'nse-s', ] bse_quote_type = [ 'bse-m', 'bse-m', 'bse-m', 'bse-m', 'bse-m', 'bse-m', 'bse-s', 'bse-s', 'bse-s', 'bse-s', 'bse-s', 'bse-s', ] #socket object creation for i in range(0, 12): nse_sockets.append(alice_obj.socket(nse_quote_type[i])) bse_sockets.append(alice_obj.socket(bse_quote_type[i])) #subscription for i in range(0, 12): j = i i = i % 6 #instruments are shared between market and snapquote sockets if (i + 1) * 250 > len(nse_instruments) or (i + 1) * 250 > len( bse_instruments): nse_sockets[j].subscribe(nse_instruments[250 * i:len(nse_instruments)]) bse_sockets[j].subscribe(bse_instruments[250 * i:len(bse_instruments)]) else: nse_sockets[j].subscribe(nse_instruments[250 * i:250 * (i + 1)]) bse_sockets[j].subscribe(bse_instruments[250 * i:250 * (i + 1)]) #sleep for data collection; As of now it'll be 10 mins time.sleep(5) #data_clean ( exchange variable for file_name) nse_file_name = data_process.data_clean(nse_sockets, 'nse') bse_file_name = data_process.data_clean(bse_sockets, 'bse') #upload upload.upload(nse_file_name) upload.upload(bse_file_name) #delete those files after upload os.remove(nse_file_name) os.remove(bse_file_name) #endtime (IST) end_time = (datetime.datetime.utcnow() + datetime.timedelta(hours=5, minutes=30)).isoformat() #work_logs to be returned to flask's @app.route('/') work_logs = ' (IST) ' + 'start : ' + start_time + ' end : ' + end_time return work_logs