def run(training_data, test_data, num_runs = 10, num_kernels = 10_000): results = np.zeros(num_runs) timings = np.zeros([4, num_runs]) # training transform, test transform, training, test Y_training, X_training = training_data[:, 0].astype(np.int), training_data[:, 1:] Y_test, X_test = test_data[:, 0].astype(np.int), test_data[:, 1:] for i in range(num_runs): input_length = X_training.shape[1] kernels = generate_kernels(input_length, num_kernels) # -- transform training ------------------------------------------------ time_a = time.perf_counter() X_training_transform = apply_kernels(X_training, kernels) time_b = time.perf_counter() timings[0, i] = time_b - time_a # -- transform test ---------------------------------------------------- time_a = time.perf_counter() X_test_transform = apply_kernels(X_test, kernels) time_b = time.perf_counter() timings[1, i] = time_b - time_a # -- training ---------------------------------------------------------- time_a = time.perf_counter() classifier = RidgeClassifierCV(alphas = 10 ** np.linspace(-3, 3, 10), normalize = True) classifier.fit(X_training_transform, Y_training) time_b = time.perf_counter() timings[2, i] = time_b - time_a # -- test -------------------------------------------------------------- time_a = time.perf_counter() results[i] = classifier.score(X_test_transform, Y_test) time_b = time.perf_counter() timings[3, i] = time_b - time_a return results, timings
Y_test, X_test = test_data[:, 0].astype(np.int32), test_data[:, 1:] print("Done.") # -- run ------------------------------------------------------------------- print(f"Performing runs".ljust(80 - 5, "."), end="", flush=True) _results = np.zeros(arguments.num_runs) _timings = np.zeros([4, arguments.num_runs ]) # trans. tr., trans. te., training, test for i in range(arguments.num_runs): input_length = X_training.shape[-1] kernels = generate_kernels(input_length, arguments.num_kernels) # -- transform training ------------------------------------------------ time_a = time.perf_counter() X_training_transform = apply_kernels(X_training, kernels) time_b = time.perf_counter() _timings[0, i] = time_b - time_a # -- transform test ---------------------------------------------------- time_a = time.perf_counter() X_test_transform = apply_kernels(X_test, kernels) time_b = time.perf_counter() _timings[1, i] = time_b - time_a
def run_additional(training_data, test_data, num_runs=10, num_kernels=10_000): # assumes variable length time series are padded with nan get_input_lengths = lambda X: X.shape[1] - (~np.isnan(np.flip(X, 1)) ).argmax(1) def rescale(X, reference_length): _X = np.zeros([len(X), reference_length]) input_lengths = get_input_lengths(X) for i in range(len(X)): _X[i] = np.interp(np.linspace(0, 1, reference_length), np.linspace(0, 1, input_lengths[i]), X[i][:input_lengths[i]]) return _X def interpolate_nan(X): _X = X.copy() good = ~np.isnan(X) for i in np.where(np.any(~good, 1))[0]: _X[i] = np.interp(np.arange(len(X[i])), np.where(good[i])[0], X[i][good[i]]) return _X results = np.zeros(num_runs) timings = np.zeros( [4, num_runs]) # training transform, test transform, training, test Y_training, X_training = training_data[:, 0].astype(np.int), training_data[:, 1:] Y_test, X_test = test_data[:, 0].astype(np.int), test_data[:, 1:] variable_lengths = False # handle three cases: (1) same lengths, no missing values; (2) same lengths, # missing values; and (3) variable lengths, no missing values if np.any(np.isnan(X_training)): input_lengths_training = get_input_lengths(X_training) input_lengths_training_max = input_lengths_training.max() input_lengths_test = get_input_lengths(X_test) # missing values (same lengths) if np.all(input_lengths_training == input_lengths_training_max): X_training = interpolate_nan(X_training) X_test = interpolate_nan(X_test) # variable lengths (no missing values) else: variable_lengths = True num_folds = 10 cross_validation_results = np.zeros([2, num_folds]) # normalise time series X_training = (X_training - np.nanmean(X_training, axis=1, keepdims=True) ) / (np.nanstd(X_training, axis=1, keepdims=True) + 1e-8) X_test = (X_test - np.nanmean(X_test, axis=1, keepdims=True)) / ( np.nanstd(X_test, axis=1, keepdims=True) + 1e-8) for i in range(num_runs): # -- variable lengths -------------------------------------------------- if variable_lengths: kernels = generate_kernels(input_lengths_training_max, num_kernels) time_a = time.perf_counter() X_training_transform_rescale = apply_kernels( rescale(X_training, input_lengths_training_max), kernels) X_training_transform_jagged = apply_kernels_jagged( X_training, kernels, input_lengths_training) time_b = time.perf_counter() timings[0, i] = time_b - time_a # indices for cross-validation folds I = np.random.permutation(len(X_training)) I = np.array_split(I, num_folds) time_a = time.perf_counter() # j = 0 -> rescale # j = 1 -> "as is" ("jagged") for j in range(2): for k in range(num_folds): VA, *TR = np.roll(I, k, axis=0) TR = np.concatenate(TR) classifier = RidgeClassifierCV(alphas=10**np.linspace( -3, 3, 10), normalize=True) if j == 0: # rescale classifier.fit(X_training_transform_rescale[TR], Y_training[TR]) cross_validation_results[j][k] = classifier.score( X_training_transform_rescale[VA], Y_training[VA]) elif j == 1: # jagged classifier.fit(X_training_transform_jagged[TR], Y_training[TR]) cross_validation_results[j][k] = classifier.score( X_training_transform_jagged[VA], Y_training[VA]) best = cross_validation_results.sum(1).argmax() time_b = time.perf_counter() timings[2, i] = time_b - time_a classifier = RidgeClassifierCV(alphas=10**np.linspace(-3, 3, 10), normalize=True) if best == 0: # rescale time_a = time.perf_counter() X_test_transform_rescale = apply_kernels( rescale(X_test, input_lengths_training_max), kernels) time_b = time.perf_counter() timings[1, i] = time_b - time_a time_a = time.perf_counter() classifier.fit(X_training_transform_rescale, Y_training) time_b = time.perf_counter() timings[2, i] += time_b - time_a time_a = time.perf_counter() results[i] = classifier.score(X_test_transform_rescale, Y_test) time_b = time.perf_counter() timings[3, i] = time_b - time_a elif best == 1: # jagged time_a = time.perf_counter() X_test_transform_jagged = apply_kernels_jagged( X_test, kernels, input_lengths_test) time_b = time.perf_counter() timings[1, i] = time_b - time_a time_a = time.perf_counter() classifier.fit(X_training_transform_jagged, Y_training) time_b = time.perf_counter() timings[2, i] += time_b - time_a time_a = time.perf_counter() results[i] = classifier.score(X_test_transform_jagged, Y_test) time_b = time.perf_counter() timings[3, i] = time_b - time_a # -- same lengths ------------------------------------------------------ else: kernels = generate_kernels(X_training.shape[1], num_kernels) # -- transform training -------------------------------------------- time_a = time.perf_counter() X_training_transform = apply_kernels(X_training, kernels) time_b = time.perf_counter() timings[0, i] = time_b - time_a # -- transform test ------------------------------------------------ time_a = time.perf_counter() X_test_transform = apply_kernels(X_test, kernels) time_b = time.perf_counter() timings[1, i] = time_b - time_a # -- training ------------------------------------------------------ time_a = time.perf_counter() classifier = RidgeClassifierCV(alphas=10**np.linspace(-3, 3, 10), normalize=True) classifier.fit(X_training_transform, Y_training) time_b = time.perf_counter() timings[2, i] = time_b - time_a # -- test ---------------------------------------------------------- time_a = time.perf_counter() results[i] = classifier.score(X_test_transform, Y_test) time_b = time.perf_counter() timings[3, i] = time_b - time_a return results, timings
delimiter=",") test_data = np.loadtxt( f"{arguments.input_path}/{dataset_name}/{dataset_name}_TEST.txt", delimiter=",") print("Done.") # -- precompile ------------------------------------------------------------ if not compiled: print(f"Compiling ROCKET functions (once only)".ljust(80 - 5, "."), end="", flush=True) _ = generate_kernels(100, 10) apply_kernels(np.zeros_like(training_data)[:, 1:], _) apply_kernels_jagged( np.zeros_like(training_data)[:, 1:], _, np.array([training_data.shape[1]] * len(training_data))) compiled = True print("Done.") # -- run ------------------------------------------------------------------- print(f"Performing runs".ljust(80 - 5, "."), end="", flush=True) results, timings = run_additional(training_data, test_data, num_runs=arguments.num_runs,
# here, validation data is always the first 2 ** 11 = 2,048 examples validation_data = pd.read_csv(arguments.training_path, header=None, nrows=2**11).values Y_validation, X_validation = validation_data[:, 0], validation_data[:, 1:] training_data = pd.read_csv(arguments.training_path, header=None, skiprows=2**11, nrows=num_training_examples).values Y_training, X_training = training_data[:, 0], training_data[:, 1:] # -- generate kernels ------------------------------------------------------ kernels = generate_kernels(X_training.shape[1], arguments.num_kernels) # -- train ----------------------------------------------------------------- time_a = time.perf_counter() model, f_mean, f_std = train(X_training, Y_training, X_validation, Y_validation, kernels, arguments.num_kernels * 2, num_classes=24) time_b = time.perf_counter() results.loc[num_training_examples, "time_training_seconds"] = time_b - time_a