def test_standardization(self): n_matrix = utils.standardization(self.n_matrix) assert_equal(np.hstack((n_matrix.ent_table, n_matrix.att_table[0][n_matrix.kfkds[0]])), (self.m - self.m.mean()) / self.m.std()) n_matrix = utils.standardization(self.n_matrix, axis=0) scaler = preprocess.StandardScaler() scaler.fit(self.m) assert_equal(np.hstack((n_matrix.ent_table, n_matrix.att_table[0][n_matrix.kfkds[0]])), scaler.transform(self.m))
def edr_dist(X, Y, epsilon): """Compute the EDR distance between X and Y using Dynamic Programming. :param X (array): time series feature array denoted by X :param Y (array): time series feature array denoted by Y :param epsilon (float): matching threshold :returns: distance between X and Y with the best alignment :Reference: L. Chen et al., "Robust and Fast Similarity Search for Moving Object Trajectories", 2005. """ X, Y = check_arrays(X, Y) X = standardization(X) Y = standardization(Y) dist = _edr_dist(X, Y, epsilon) return dist
def run(training_data, test_data, num_runs=10, num_kernels=100): results = np.zeros(num_runs) timings = np.zeros([4, num_runs]) # training transform, test transform, training, test Y_training, X_training = training_data[:, 0].astype(int), standardization(normalization(training_data[:, 1:])) Y_test, X_test = test_data[:, 0].astype(int), standardization(normalization(test_data[:, 1:])) for i in range(num_runs): input_length = X_training.shape[1] kernels = generate_kernels(input_length, num_kernels) # -- transform training ------------------------------------------------ time_a = time.perf_counter() X_training_transform = apply_kernels(X_training, kernels) time_b = time.perf_counter() timings[0, i] = time_b - time_a # -- transform test ---------------------------------------------------- time_a = time.perf_counter() X_test_transform = apply_kernels(X_test, kernels) time_b = time.perf_counter() timings[1, i] = time_b - time_a # -- training ---------------------------------------------------------- time_a = time.perf_counter() classifier = RidgeClassifierCV(alphas=10 ** np.linspace(-3, 3, 10), normalize=True) classifier.fit(X_training_transform, Y_training) time_b = time.perf_counter() timings[2, i] = time_b - time_a # -- test -------------------------------------------------------------- time_a = time.perf_counter() results[i] = classifier.score(X_test_transform, Y_test) time_b = time.perf_counter() timings[3, i] = time_b - time_a return results, timings
def main(args): # Fix Seed # random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) # Weights and Plots Path # paths = [args.weights_path, args.plots_path, args.numpy_path] for path in paths: make_dirs(path) # Prepare Data # data = load_data(args.which_data)[args.feature] data = data.copy() #df = pd.DataFrame(data) # display(data) df = standardization(data) print("Hello world!") val_start = "2020-02-08 01:00:00" test_start = "2020-07-19 01:00:00" df_train = df.loc[:val_start].copy() df_val = df.loc[val_start:test_start].copy() df_test = df.loc[test_start:].copy() torch.manual_seed(101) features = list(df.columns.difference(['value'])) display(df_train) train_dataset = SequenceDataset( df_train, target='value', features=features, sequence_length=args.seq_length )