def main(): dates, datas, indexs, indexs_inv = rf.read_data(StatObj.data_path(), 0.5) fp_label, y_name = rf.read_label(StatObj.label_path()) line = [0] * len(y_name) for i, name in enumerate(y_name): id_value = indexs[name] line = plt.plot(datas[:, id_value], label=str(id_value)) plt.legend(str(id_value)) plt.show()
def test(): if os.path.exists("../Data/datas.npz") and os.path.exists( "../Data/datas.pkl"): # load datas with np.load('../Data/datas.npz') as obj: datas = obj['datas'] dates = obj['dates'] with open('../Data/datas.pkl', 'rb') as infile: indexs = pickle.load(infile) labels = pickle.load(infile) y_name = pickle.load(infile) else: # load datas labels, y_name = rf.read_label(label_path) dates, datas, indexs = rf.read_data(data_path, y_name, 0) # save datas np.savez('../Data/datas.npz', datas=datas, dates=dates) with open('../Data/datas.pkl', 'wb') as outfile: pickle.dump(indexs, outfile) pickle.dump(labels, outfile) pickle.dump(y_name, outfile) print("load %d data" % datas.shape[0]) # seperate feature(x_train) and prediction(y_train) y_index = [indexs[i] for i in y_name] y_data = datas[:, y_index] x_data = np.delete(datas, y_index, axis=1) print("val : %d label " % y_data.shape[1]) print("\n----------------------------------------------") print("train: %d feature" % x_data.shape[1]) # seperate train and validation dataset print("\nseperate data...\n") x_train, y_train, x_val, y_val = seperate_dataset(x_data, y_data, 0.8) print("train: %d cases" % x_train.shape[0]) print("val : %d cases" % x_val.shape[0]) print("\n----------------------------------------------") k = 20 feature_eng = SelectKBest(mutual_info_regression, k) x_train_new = feature_eng.fit_transform(x_train, y_train[:, 0]) x_val_new = feature_eng.transform(x_val) print("keep %d feature" % k) print("\n----------------------------------------------") feat_selected = feature_eng.get_support(True) print("-----------") for i in range(len(feat_selected)): print(indexs.inv[feat_selected[i]]) print("\n----------------------------------------------") print("train model...\n") # print(labels) a = labels.set_index('Unnamed: 0')['def'].to_dict() for i in feat_selected: print(a[i])
def write(): target = open("small_var.txt", 'w') target.truncate() dates, datas, indexs = rf.read_data(StatObj.data_path(), 0.5) fp_label, y_name = rf.read_label(StatObj.label_path()) a = np.ones() for name in y_name: id_value = indexs[name] lines = str(datas[:, id_value]) target.write(lines[1:-2]) target.write('\n') target.close()
def read_fast(): if os.path.exists("../Data/datas.npy"): # load datas datas = np.load('../Data/datas.npy') dates = np.load('../Data/dates.npy') outfile = open('../Data/indexs.pkl', 'rb') indexs = pickle.load(outfile) outfile.close() outfile = open('../Data/indexs_inv.pkl', 'rb') indexs_inv = pickle.load(outfile) outfile.close() outfile = open('../Data/labels.pkl', 'rb') labels = pickle.load(outfile) outfile.close() outfile = open('../Data/y_name.pkl', 'rb') y_name = pickle.load(outfile) outfile.close() else: # load datas labels, y_name = rf.read_label(label_path) dates, datas, indexs, indexs_inv = rf.read_data(data_path, y_name, 0) # save datas np.save('../Data/datas.npy', datas) np.save('../Data/dates.npy', dates) outfile = open('../Data/indexs.pkl', 'wb') pickle.dump(indexs, outfile) outfile.close() outfile = open('../Data/indexs_inv.pkl', 'wb') pickle.dump(indexs_inv, outfile) outfile.close() outfile = open('../Data/labels.pkl', 'wb') pickle.dump(labels, outfile) outfile.close() outfile = open('../Data/y_name.pkl', 'wb') pickle.dump(y_name, outfile) outfile.close() return dates, datas, indexs, indexs_inv
from sklearn.linear_model import Ridge import read_file as rf import numpy as np data_path = "../Data_M.csv" label_path = "../Dico_M.csv" labels, y_name = rf.read_label(label_path) dates, datas, indexs = rf.read_data(data_path, 0.3) # seperate feature(x_train) and prediction(y_train) y_index = [indexs[i] for i in y_name] y_train = datas[:, y_index] x_train = np.delete(datas, y_index, axis=1) clf = Ridge(alpha=1.0) clf.fit(x_train[:-2, :], y_train[2:, 0]) print(clf.score(x_train[:-2, :], y_train[2:, 0])) print(clf.coef_)
if os.path.exists("../Data/datas.npz") and os.path.exists("../Data/datas.pkl"): print "0" # load datas with np.load('../Data/datas.npz') as obj: datas = obj['datas'] dates = obj['dates'] with open('../Data/datas.pkl', 'rb') as infile: indexs = pickle.load(infile) labels = pickle.load(infile) y_name = pickle.load(infile) else: print "1" # load datas labels, y_name = rf.read_label(label_path) dates, datas, indexs = rf.read_data(data_path, y_name, 0) # save datas np.savez('../Data/datas.npz', datas=datas, dates=dates) with open('../Data/datas.pkl', 'wb') as outfile: pickle.dump(indexs, outfile) pickle.dump(labels, outfile) pickle.dump(y_name, outfile) with open('../Data/kmeans.pkl', 'rb') as infile: tf_class = pickle.load(infile) print("load %d data" % datas.shape[0]) # seperate feature(x_train) and prediction(y_train)