def load_PD_data(): data = [] # Read the training data file = open('data/train_data.txt') reader = csv.reader(file) for row in reader: data.append(row) file.close() X = np.array([x[1:-2] for x in data]).astype(np.float) y = np.array([x[-2] for x in data]).astype(np.float) y_label = np.array([x[-1] for x in data]).astype(np.float) y_label = y_label.astype(int) del data # free up the memory #X = preprocessing.scale(X) #print(X.shape) #return X, y, y_label sel_fetaures = [4, 1, 3, 0, 16] #sel_fetaures = [11, 20, 19, 12, 7] #sel_fetaures = [0, 16] X_star = X[:, sel_fetaures] X = np.delete(X, np.s_[sel_fetaures], axis=1) return X, y, X_star
def load_kc2_data(): data = [] # Read the training data file = open('data/kc2_csv.csv') reader = csv.reader(file) next(reader, None) # skip the headers for row in reader: data.append(row) file.close() X = np.array([x[:-1] for x in data]).astype(np.float) print(X.shape) y = np.array([x[-1] for x in data]) ''' scaler = StandardScaler() scaler.fit(X) X = scaler.transform(X) ''' y[y == 'yes'] = 1 y[y == 'no'] = 0 y = y.astype(int) x_star = X[:, 14:21] X = X[:, 0:14] del data # free up the memory return X, y, x_star
def load_CBM_data(): data = [] # Read the training data file = open('data/CBM_data.txt') reader = csv.reader(file, delimiter=',') #next(reader) for row in reader: data.append(row) file.close() X = np.array([x[1:-2] for x in data]).astype(np.float) y = np.array([x[-1] for x in data]).astype(np.float) del data # free up the memory #X = preprocessing.scale(X) #y = preprocessing.scale(y) print(X.shape) return X, y
def load_energy_data(): data = [] # Read the training data file = open('data/ENB2012_data.csv') reader = csv.reader(file) for row in reader: data.append(row) file.close() X = np.array([x[:-2] for x in data]).astype(np.float) y = np.array([x[-2] for x in data]).astype(np.float) x_star = np.array([x[-1] for x in data]).astype(np.float) del data # free up the memory #X = preprocessing.scale(X) print(X.shape) return X, y, x_star
def load_concrete_data(): data = [] # Read the training data file = open('data/Concrete_Data.csv') reader = csv.reader(file) for row in reader: data.append(row) file.close() X = np.array([x[:-1] for x in data]).astype(np.float) y = np.array([x[-1] for x in data]).astype(np.float) del data # free up the memory sel_fetaures = [6, 7] X_star = X[:, sel_fetaures] X = np.delete(X, np.s_[sel_fetaures], axis=1) return X, y, X_star
def load_wpbc_data(): data = [] # Read the training data file = open('data/wpbc.data') reader = csv.reader(file, delimiter=',') for row in reader: data.append(row) file.close() X = np.array([x[3:-3] for x in data]).astype(float) y = np.array([x[2] for x in data]).astype(float) y_label = np.array([x[1] for x in data]) y_label[y_label == 'R'] = 1 y_label[y_label == 'N'] = 0 y_label = y_label.astype(int) del data # free up the memory #X = preprocessing.scale(X) #y = preprocessing.scale(y) print(X.shape) return X, y, y_label
def load_ionosphere_data(): data = [] # Read the training data file = open('data/ionosphere.data') reader = csv.reader(file) for row in reader: data.append(row) file.close() X = np.array([x[:-1] for x in data]).astype(np.float) print(X.shape) y = np.array([x[-1] for x in data]) #x_star = np.array([x[-1] for x in data]).astype(np.float) y[y == 'g'] = 1 y[y == 'b'] = 0 y = y.astype(int) x_star = X[:, [4, 5, 20, 21]] X = np.delete(X, np.s_[4, 5, 20, 21], axis=1) del data # free up the memory #X = preprocessing.scale(X) return X, y, x_star
def load_gridStability_data(): data = [] # Read the training data file = open('data/grid_stability.csv') reader = csv.reader(file) for row in reader: data.append(row) file.close() X = np.array([x[:-2] for x in data]).astype(np.float) y = np.array([x[-2] for x in data]).astype(np.float) y_label = np.array([x[-1] for x in data]) y_label[y_label == 'stable'] = 1 y_label[y_label == 'unstable'] = 0 y_label = y_label.astype(int) del data # free up the memory #X = preprocessing.scale(X) print(X.shape) return X, y, y_label
def load_parkinsons_data(): data = [] # Read the training data file = open('data/parkinsons.data') reader = csv.reader(file) next(reader) # skip the headers for row in reader: data.append(row) file.close() X = np.array([x[1:] for x in data]).astype(np.float) y_label = np.array([x[-7] for x in data]).astype(np.float) y_label = y_label.astype(int) #delete label from the features X = np.delete(X, -7, axis=1) del data # free up the memory sel_fetaures = [19, 4, 10, 13, 9, 12, 11, 8, 21, 18] #sel_fetaures = [21, 18] X_star = X[:, sel_fetaures] X = np.delete(X, np.s_[sel_fetaures], axis=1) return X, y_label, X_star