def OpenFileDialog(self): self.remove_columnNames_from_comboBox(self.cmbx_x_axis_data) self.remove_columnNames_from_comboBox(self.cmbx_y_axis_data) open_file_dialog = QFileDialog.getOpenFileName(self, 'Open file', "", "CSV files (*.csv)") if open_file_dialog == ('', ''): print(f'>>Info: No file was Selected: {(self.fname)}') # self.fname = None # self.dataframe = pd.DataFrame() else: self.fname = open_file_dialog[0] print(f'>>Info: Selected File: {(self.fname)}') self.file_size = str(os.path.getsize(self.fname)/1e6) #in MB, string self.dataframe = read_data(self.fname, pd.DataFrame()).data() self.file_rows, self.file_cols = self.dataframe.shape model = pandasModel(self.dataframe) self.table_data.setModel(model) self.textBox_filePath.setText(self.fname) self.textBox_fileSize.setText(self.file_size) self.add_columnNames_to_comboBox(self.cmbx_x_axis_data, self.dataframe) self.add_columnNames_to_comboBox(self.cmbx_y_axis_data, self.dataframe) self.textBox_fileColumns.setText(str(self.file_cols)) self.textBox_fileRows.setText(str(self.file_rows))
def get_single_image(infile, nth_image): # read in the aps file, it comes in as shape(512, 620, 16) img = read_data(infile) # transpose so that the slice is the first dimension shape(16, 620, 512) img = img.transpose() return np.flipud(img[nth_image])
def main(): dates, datas, indexs, indexs_inv = rf.read_data(StatObj.data_path(), 0.5) fp_label, y_name = rf.read_label(StatObj.label_path()) line = [0] * len(y_name) for i, name in enumerate(y_name): id_value = indexs[name] line = plt.plot(datas[:, id_value], label=str(id_value)) plt.legend(str(id_value)) plt.show()
def test(): if os.path.exists("../Data/datas.npz") and os.path.exists( "../Data/datas.pkl"): # load datas with np.load('../Data/datas.npz') as obj: datas = obj['datas'] dates = obj['dates'] with open('../Data/datas.pkl', 'rb') as infile: indexs = pickle.load(infile) labels = pickle.load(infile) y_name = pickle.load(infile) else: # load datas labels, y_name = rf.read_label(label_path) dates, datas, indexs = rf.read_data(data_path, y_name, 0) # save datas np.savez('../Data/datas.npz', datas=datas, dates=dates) with open('../Data/datas.pkl', 'wb') as outfile: pickle.dump(indexs, outfile) pickle.dump(labels, outfile) pickle.dump(y_name, outfile) print("load %d data" % datas.shape[0]) # seperate feature(x_train) and prediction(y_train) y_index = [indexs[i] for i in y_name] y_data = datas[:, y_index] x_data = np.delete(datas, y_index, axis=1) print("val : %d label " % y_data.shape[1]) print("\n----------------------------------------------") print("train: %d feature" % x_data.shape[1]) # seperate train and validation dataset print("\nseperate data...\n") x_train, y_train, x_val, y_val = seperate_dataset(x_data, y_data, 0.8) print("train: %d cases" % x_train.shape[0]) print("val : %d cases" % x_val.shape[0]) print("\n----------------------------------------------") k = 20 feature_eng = SelectKBest(mutual_info_regression, k) x_train_new = feature_eng.fit_transform(x_train, y_train[:, 0]) x_val_new = feature_eng.transform(x_val) print("keep %d feature" % k) print("\n----------------------------------------------") feat_selected = feature_eng.get_support(True) print("-----------") for i in range(len(feat_selected)): print(indexs.inv[feat_selected[i]]) print("\n----------------------------------------------") print("train model...\n") # print(labels) a = labels.set_index('Unnamed: 0')['def'].to_dict() for i in feat_selected: print(a[i])
def write(): target = open("small_var.txt", 'w') target.truncate() dates, datas, indexs = rf.read_data(StatObj.data_path(), 0.5) fp_label, y_name = rf.read_label(StatObj.label_path()) a = np.ones() for name in y_name: id_value = indexs[name] lines = str(datas[:, id_value]) target.write(lines[1:-2]) target.write('\n') target.close()
def plot_image_set(infile): # read in the aps file, it comes in as shape(512, 620, 16) img = read_data(infile) # transpose so that the slice is the first dimension shape(16, 620, 512) img = img.transpose() # show the graphs fig, axarr = plt.subplots(nrows=4, ncols=4, figsize=(10,10)) i = 0 for row in range(4): for col in range(4): #resized_img = cv2.resize(img[i], (0,0), fx=0.1, fy=0.1) #axarr[row, col].imshow(np.flipud(resized_img), cmap=COLORMAP) axarr[row, col].imshow(np.flipud(img[i]), cmap=COLORMAP) i += 1 print('Done!')
def read_fast(): if os.path.exists("../Data/datas.npy"): # load datas datas = np.load('../Data/datas.npy') dates = np.load('../Data/dates.npy') outfile = open('../Data/indexs.pkl', 'rb') indexs = pickle.load(outfile) outfile.close() outfile = open('../Data/indexs_inv.pkl', 'rb') indexs_inv = pickle.load(outfile) outfile.close() outfile = open('../Data/labels.pkl', 'rb') labels = pickle.load(outfile) outfile.close() outfile = open('../Data/y_name.pkl', 'rb') y_name = pickle.load(outfile) outfile.close() else: # load datas labels, y_name = rf.read_label(label_path) dates, datas, indexs, indexs_inv = rf.read_data(data_path, y_name, 0) # save datas np.save('../Data/datas.npy', datas) np.save('../Data/dates.npy', dates) outfile = open('../Data/indexs.pkl', 'wb') pickle.dump(indexs, outfile) outfile.close() outfile = open('../Data/indexs_inv.pkl', 'wb') pickle.dump(indexs_inv, outfile) outfile.close() outfile = open('../Data/labels.pkl', 'wb') pickle.dump(labels, outfile) outfile.close() outfile = open('../Data/y_name.pkl', 'wb') pickle.dump(y_name, outfile) outfile.close() return dates, datas, indexs, indexs_inv
from sklearn.linear_model import Ridge import read_file as rf import numpy as np data_path = "../Data_M.csv" label_path = "../Dico_M.csv" labels, y_name = rf.read_label(label_path) dates, datas, indexs = rf.read_data(data_path, 0.3) # seperate feature(x_train) and prediction(y_train) y_index = [indexs[i] for i in y_name] y_train = datas[:, y_index] x_train = np.delete(datas, y_index, axis=1) clf = Ridge(alpha=1.0) clf.fit(x_train[:-2, :], y_train[2:, 0]) print(clf.score(x_train[:-2, :], y_train[2:, 0])) print(clf.coef_)
classes.append((a, high + 1)) return classes def calculate_classes_count(classes, numbers): counts = [0] * len(classes) for number in numbers: for i, class_range in enumerate(classes): if (class_range[0] <= number and number < class_range[1]): counts[i] += 1 break return counts def create_table(classes, counts): print('\tclasses\t\tcount') for i, class_range in enumerate(classes): print(str(class_range[0]) + '\t-\t' + str(class_range[1]) + '\t' + str(counts[i])) if __name__ == '__main__': data = read_file.read_data('test.txt') try: n = float(input('Enter class size : ')) classes = create_classes(data, n) counts = calculate_classes_count(classes, data) create_table(classes, counts) except ValueError: print('Invalid number')
print "0" # load datas with np.load('../Data/datas.npz') as obj: datas = obj['datas'] dates = obj['dates'] with open('../Data/datas.pkl', 'rb') as infile: indexs = pickle.load(infile) labels = pickle.load(infile) y_name = pickle.load(infile) else: print "1" # load datas labels, y_name = rf.read_label(label_path) dates, datas, indexs = rf.read_data(data_path, y_name, 0) # save datas np.savez('../Data/datas.npz', datas=datas, dates=dates) with open('../Data/datas.pkl', 'wb') as outfile: pickle.dump(indexs, outfile) pickle.dump(labels, outfile) pickle.dump(y_name, outfile) with open('../Data/kmeans.pkl', 'rb') as infile: tf_class = pickle.load(infile) print("load %d data" % datas.shape[0]) # seperate feature(x_train) and prediction(y_train) y_index = [indexs[i] for i in y_name]
y, cv=5, scoring='accuracy') print("Tree-Classifier with 5 Cross validation Accuracy:", (np.mean(np.sqrt(abs(scores6))))) model7 = DecisionTreeClassifier() scores7 = cross_validation.cross_val_score(model7, X, y, cv=5, scoring='accuracy') print("Decision-Tree-Classifier with 5 Cross validation Accuracy:", (np.mean(np.sqrt(abs(scores7))))) model8 = MLPClassifier(solver='adam', alpha=0.01, hidden_layer_sizes=(10, 10)) scores8 = cross_validation.cross_val_score(model8, X, y, cv=5, scoring='accuracy') print("MLP classifier's with 5 Cross validation Accuracy:", (np.mean(np.sqrt(abs(scores8))))) X, y = read_data("data/final_dataset.csv") np.random.seed(0) Cross_Validation(X, y)
def data_generators(data_generator: str, batch_repeat: int = 10, batch_sleep: float = 0.5, timezone: str = 'utc', enable_timezone_range: bool = True, token: str = None, tag: str = None, initial_configs: bool = False, data_dir: str = os.path.join(ROOT_PATH, 'data'), compress: bool = False, exception: bool = False) -> dict: """ Based on the parameters generate a data set :args: data_generator:str - which data set to generated batch_repeat:int - number of rows per batch batch_sleep:float - sleep time between rows or a specific batch timezone:str - whether to set the timezone in UTC or local enable_timezone_range:bool - whether or not to set timestamp within a "range" token:str - linode token tag:str - group of linode nodes to get data from. If not gets from all nodes associated to token initial_configs:bool - whether this is the first time the configs are being deployed data_dir:str - for data_generator type file directory containing data to read compress:bool - whether the content in data_dir is compressed exception:bool - whether or not to print error message(s) :params: payloads:dict - generated data :reeturn: payloads """ if data_generator == 'linode': import linode payloads = linode.get_linode_data( token=token, tag=tag, initial_configs=initial_configs, timezone=timezone, enable_timezone_range=enable_timezone_range, exception=exception) elif data_generator == 'percentagecpu': import percentagecpu_sensor payloads = percentagecpu_sensor.get_percentagecpu_data( timezone=timezone, enable_timezone_range=enable_timezone_range, sleep=batch_sleep, repeat=batch_repeat) elif data_generator == 'ping': import ping_sensor payloads = ping_sensor.get_ping_data( timezone=timezone, enable_timezone_range=enable_timezone_range, sleep=batch_sleep, repeat=batch_repeat) elif data_generator == 'power': import power_company payloads = power_company.data_generator( timezone=timezone, enable_timezone_range=enable_timezone_range, sleep=batch_sleep, repeat=batch_repeat) elif data_generator == 'synchrophasor': import power_company_synchrophasor payloads = power_company_synchrophasor.data_generator( timezone=timezone, enable_timezone_range=enable_timezone_range, sleep=batch_sleep, repeat=batch_repeat) elif data_generator == 'trig': import trig payloads = trig.trig_value(timezone=timezone, enable_timezone_range=enable_timezone_range, sleep=batch_sleep, repeat=batch_repeat) elif data_generator == 'aiops': import customer_aiops payloads = customer_aiops.get_aiops_data(timezone=timezone, sleep=batch_sleep, repeat=batch_repeat) elif data_generator == 'file': import read_file payloads = read_file.read_data(dir_path=data_dir, compress=compress, exception=exception) return payloads