def prepare(filenames): X = np.array([]) Y = np.array([]) for each in filenames: data = pd.read_csv(each) x = data.iloc[:, :-1].values y = data.iloc[:, 1].values X = np.append(X, normalize(x)) Y = np.append(Y, normalize(y)) return X, Y
def _normalize(self, x_train, y_train, x_test, y_test): if not self.has_normalized: self.max_x = np.max(x_train) self.min_x = np.min(x_train) self.max_y = np.max(y_train) self.min_y = np.min(y_train) x_train, y_train = normalize(x_train), normalize(y_train) if x_test is not None: x_test = (x_test - self.min_x) / (self.max_x - self.min_x + 1) if y_test is not None: y_test = (y_test - self.min_y) / (self.max_y - self.min_y + 1) return x_train, y_train, x_test, y_test else: print("has already normalized...")
def generate_X(filename, sample=None): ''' take a sample from data ''' print('Generating X...') data = pd.read_csv(filename) X, Y = normalize(data.iloc[:, 0].values), normalize(data.iloc[:, 1].values) if sample: idx = np.random.choice(np.arange(len(X)), sample, replace=False) X = X[idx] Y = Y[idx] data = np.zeros((len(X), 2)) data[:, 0] = X data[:, 1] = Y return data
def process_X(X, betas): Xs = X[:, 0] dists = [] for x in Xs: dist = np.abs(np.asarray(x) - np.asarray(betas)).min() dists.append(dist) dists = normalize(dists) dists = np.power((1 - dists), 3) print(dists.max()) Y = np.array(dists) return Y
def find_locations(self, predicted_betas): indices = (-predicted_betas).argsort()[:self.num_breaks] indices = normalize(indices) return sorted(indices)
def find_locations(predicted_betas, num_breaks): indices = (-predicted_betas).argsort()[:num_breaks] indices = normalize(indices) return indices