def run(training_data, test_data, num_runs = 10, num_kernels = 10_000): results = np.zeros(num_runs) timings = np.zeros([4, num_runs]) # training transform, test transform, training, test Y_training, X_training = training_data[:, 0].astype(np.int), training_data[:, 1:] Y_test, X_test = test_data[:, 0].astype(np.int), test_data[:, 1:] for i in range(num_runs): input_length = X_training.shape[1] kernels = generate_kernels(input_length, num_kernels) # -- transform training ------------------------------------------------ time_a = time.perf_counter() X_training_transform = apply_kernels(X_training, kernels) time_b = time.perf_counter() timings[0, i] = time_b - time_a # -- transform test ---------------------------------------------------- time_a = time.perf_counter() X_test_transform = apply_kernels(X_test, kernels) time_b = time.perf_counter() timings[1, i] = time_b - time_a # -- training ---------------------------------------------------------- time_a = time.perf_counter() classifier = RidgeClassifierCV(alphas = 10 ** np.linspace(-3, 3, 10), normalize = True) classifier.fit(X_training_transform, Y_training) time_b = time.perf_counter() timings[2, i] = time_b - time_a # -- test -------------------------------------------------------------- time_a = time.perf_counter() results[i] = classifier.score(X_test_transform, Y_test) time_b = time.perf_counter() timings[3, i] = time_b - time_a return results, timings
print(f"Performing runs".ljust(80 - 5, "."), end="", flush=True) _results = np.zeros(arguments.num_runs) _timings = np.zeros([4, arguments.num_runs ]) # trans. tr., trans. te., training, test for i in range(arguments.num_runs): input_length = X_training.shape[-1] kernels = generate_kernels(input_length, arguments.num_kernels) # -- transform training ------------------------------------------------ time_a = time.perf_counter() X_training_transform = apply_kernels(X_training, kernels) time_b = time.perf_counter() _timings[0, i] = time_b - time_a # -- transform test ---------------------------------------------------- time_a = time.perf_counter() X_test_transform = apply_kernels(X_test, kernels) time_b = time.perf_counter() _timings[1, i] = time_b - time_a # -- training ---------------------------------------------------------- time_a = time.perf_counter() classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
def run_additional(training_data, test_data, num_runs=10, num_kernels=10_000): # assumes variable length time series are padded with nan get_input_lengths = lambda X: X.shape[1] - (~np.isnan(np.flip(X, 1)) ).argmax(1) def rescale(X, reference_length): _X = np.zeros([len(X), reference_length]) input_lengths = get_input_lengths(X) for i in range(len(X)): _X[i] = np.interp(np.linspace(0, 1, reference_length), np.linspace(0, 1, input_lengths[i]), X[i][:input_lengths[i]]) return _X def interpolate_nan(X): _X = X.copy() good = ~np.isnan(X) for i in np.where(np.any(~good, 1))[0]: _X[i] = np.interp(np.arange(len(X[i])), np.where(good[i])[0], X[i][good[i]]) return _X results = np.zeros(num_runs) timings = np.zeros( [4, num_runs]) # training transform, test transform, training, test Y_training, X_training = training_data[:, 0].astype(np.int), training_data[:, 1:] Y_test, X_test = test_data[:, 0].astype(np.int), test_data[:, 1:] variable_lengths = False # handle three cases: (1) same lengths, no missing values; (2) same lengths, # missing values; and (3) variable lengths, no missing values if np.any(np.isnan(X_training)): input_lengths_training = get_input_lengths(X_training) input_lengths_training_max = input_lengths_training.max() input_lengths_test = get_input_lengths(X_test) # missing values (same lengths) if np.all(input_lengths_training == input_lengths_training_max): X_training = interpolate_nan(X_training) X_test = interpolate_nan(X_test) # variable lengths (no missing values) else: variable_lengths = True num_folds = 10 cross_validation_results = np.zeros([2, num_folds]) # normalise time series X_training = (X_training - np.nanmean(X_training, axis=1, keepdims=True) ) / (np.nanstd(X_training, axis=1, keepdims=True) + 1e-8) X_test = (X_test - np.nanmean(X_test, axis=1, keepdims=True)) / ( np.nanstd(X_test, axis=1, keepdims=True) + 1e-8) for i in range(num_runs): # -- variable lengths -------------------------------------------------- if variable_lengths: kernels = generate_kernels(input_lengths_training_max, num_kernels) time_a = time.perf_counter() X_training_transform_rescale = apply_kernels( rescale(X_training, input_lengths_training_max), kernels) X_training_transform_jagged = apply_kernels_jagged( X_training, kernels, input_lengths_training) time_b = time.perf_counter() timings[0, i] = time_b - time_a # indices for cross-validation folds I = np.random.permutation(len(X_training)) I = np.array_split(I, num_folds) time_a = time.perf_counter() # j = 0 -> rescale # j = 1 -> "as is" ("jagged") for j in range(2): for k in range(num_folds): VA, *TR = np.roll(I, k, axis=0) TR = np.concatenate(TR) classifier = RidgeClassifierCV(alphas=10**np.linspace( -3, 3, 10), normalize=True) if j == 0: # rescale classifier.fit(X_training_transform_rescale[TR], Y_training[TR]) cross_validation_results[j][k] = classifier.score( X_training_transform_rescale[VA], Y_training[VA]) elif j == 1: # jagged classifier.fit(X_training_transform_jagged[TR], Y_training[TR]) cross_validation_results[j][k] = classifier.score( X_training_transform_jagged[VA], Y_training[VA]) best = cross_validation_results.sum(1).argmax() time_b = time.perf_counter() timings[2, i] = time_b - time_a classifier = RidgeClassifierCV(alphas=10**np.linspace(-3, 3, 10), normalize=True) if best == 0: # rescale time_a = time.perf_counter() X_test_transform_rescale = apply_kernels( rescale(X_test, input_lengths_training_max), kernels) time_b = time.perf_counter() timings[1, i] = time_b - time_a time_a = time.perf_counter() classifier.fit(X_training_transform_rescale, Y_training) time_b = time.perf_counter() timings[2, i] += time_b - time_a time_a = time.perf_counter() results[i] = classifier.score(X_test_transform_rescale, Y_test) time_b = time.perf_counter() timings[3, i] = time_b - time_a elif best == 1: # jagged time_a = time.perf_counter() X_test_transform_jagged = apply_kernels_jagged( X_test, kernels, input_lengths_test) time_b = time.perf_counter() timings[1, i] = time_b - time_a time_a = time.perf_counter() classifier.fit(X_training_transform_jagged, Y_training) time_b = time.perf_counter() timings[2, i] += time_b - time_a time_a = time.perf_counter() results[i] = classifier.score(X_test_transform_jagged, Y_test) time_b = time.perf_counter() timings[3, i] = time_b - time_a # -- same lengths ------------------------------------------------------ else: kernels = generate_kernels(X_training.shape[1], num_kernels) # -- transform training -------------------------------------------- time_a = time.perf_counter() X_training_transform = apply_kernels(X_training, kernels) time_b = time.perf_counter() timings[0, i] = time_b - time_a # -- transform test ------------------------------------------------ time_a = time.perf_counter() X_test_transform = apply_kernels(X_test, kernels) time_b = time.perf_counter() timings[1, i] = time_b - time_a # -- training ------------------------------------------------------ time_a = time.perf_counter() classifier = RidgeClassifierCV(alphas=10**np.linspace(-3, 3, 10), normalize=True) classifier.fit(X_training_transform, Y_training) time_b = time.perf_counter() timings[2, i] = time_b - time_a # -- test ---------------------------------------------------------- time_a = time.perf_counter() results[i] = classifier.score(X_test_transform, Y_test) time_b = time.perf_counter() timings[3, i] = time_b - time_a return results, timings
test_data = np.loadtxt( f"{arguments.input_path}/{dataset_name}/{dataset_name}_TEST.txt", delimiter=",") print("Done.") # -- precompile ------------------------------------------------------------ if not compiled: print(f"Compiling ROCKET functions (once only)".ljust(80 - 5, "."), end="", flush=True) _ = generate_kernels(100, 10) apply_kernels(np.zeros_like(training_data)[:, 1:], _) apply_kernels_jagged( np.zeros_like(training_data)[:, 1:], _, np.array([training_data.shape[1]] * len(training_data))) compiled = True print("Done.") # -- run ------------------------------------------------------------------- print(f"Performing runs".ljust(80 - 5, "."), end="", flush=True) results, timings = run_additional(training_data, test_data, num_runs=arguments.num_runs, num_kernels=arguments.num_kernels)
def train( X, Y, X_validation, Y_validation, kernels, num_features, num_classes, minibatch_size=256, max_epochs=100, patience=2, # x10 minibatches; reset if loss improves tranche_size=2**11, cache_size=2**14): # as much as possible # -- init ------------------------------------------------------------------ def init(layer): if isinstance(layer, nn.Linear): nn.init.constant_(layer.weight.data, 0) nn.init.constant_(layer.bias.data, 0) # -- model ----------------------------------------------------------------- model = nn.Sequential(nn.Linear( num_features, num_classes)) # logistic / softmax regression loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters()) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, min_lr=1e-8) model.apply(init) # -- run ------------------------------------------------------------------- minibatch_count = 0 best_validation_loss = np.inf stall_count = 0 stop = False num_examples = len(X) num_tranches = np.int(np.ceil(num_examples / tranche_size)) cache = np.zeros((min(cache_size, num_examples), num_features)) cache_count = 0 for epoch in range(max_epochs): if epoch > 0 and stop: break for tranche_index in range(num_tranches): if epoch > 0 and stop: break a = tranche_size * tranche_index b = a + tranche_size Y_tranche = Y[a:b] # if cached, use cached transform; else transform and cache the result if b <= cache_count: X_tranche_transform = cache[a:b] else: X_tranche = X[a:b] X_tranche = (X_tranche - X_tranche.mean( axis=1, keepdims=True)) / X_tranche.std( axis=1, keepdims=True) # normalise time series X_tranche_transform = apply_kernels(X_tranche, kernels) if epoch == 0 and tranche_index == 0: # per-feature mean and standard deviation (estimated on first tranche) f_mean = X_tranche_transform.mean(0) f_std = X_tranche_transform.std(0) + 1e-8 # normalise and transform validation data X_validation = (X_validation - X_validation.mean( axis=1, keepdims=True)) / X_validation.std( axis=1, keepdims=True) # normalise time series X_validation_transform = apply_kernels( X_validation, kernels) X_validation_transform = ( X_validation_transform - f_mean) / f_std # normalise transformed features X_validation_transform = torch.FloatTensor( X_validation_transform) Y_validation = torch.LongTensor(Y_validation) X_tranche_transform = ( X_tranche_transform - f_mean) / f_std # normalise transformed features if b <= cache_size: cache[a:b] = X_tranche_transform cache_count = b X_tranche_transform = torch.FloatTensor(X_tranche_transform) Y_tranche = torch.LongTensor(Y_tranche) minibatches = torch.randperm( len(X_tranche_transform)).split(minibatch_size) for minibatch_index, minibatch in enumerate(minibatches): if epoch > 0 and stop: break # abandon undersized minibatches if minibatch_index > 0 and len(minibatch) < minibatch_size: break # -- (optional) minimal lr search ------------------------------ # default lr for Adam may cause training loss to diverge for a # large number of kernels; lr minimising training loss on first # update should ensure training loss converges if epoch == 0 and tranche_index == 0 and minibatch_index == 0: candidate_lr = 10**np.linspace(-1, -6, 6) best_lr = None best_training_loss = np.inf for lr in candidate_lr: lr_model = nn.Sequential( nn.Linear(num_features, num_classes)) lr_optimizer = optim.Adam(lr_model.parameters()) lr_model.apply(init) for param_group in lr_optimizer.param_groups: param_group["lr"] = lr # perform a single update lr_optimizer.zero_grad() Y_tranche_predictions = lr_model( X_tranche_transform[minibatch]) training_loss = loss_function(Y_tranche_predictions, Y_tranche[minibatch]) training_loss.backward() lr_optimizer.step() Y_tranche_predictions = lr_model(X_tranche_transform) training_loss = loss_function(Y_tranche_predictions, Y_tranche).item() if training_loss < best_training_loss: best_training_loss = training_loss best_lr = lr for param_group in optimizer.param_groups: param_group["lr"] = best_lr # -- training -------------------------------------------------- optimizer.zero_grad() Y_tranche_predictions = model(X_tranche_transform[minibatch]) training_loss = loss_function(Y_tranche_predictions, Y_tranche[minibatch]) training_loss.backward() optimizer.step() minibatch_count += 1 if minibatch_count % 10 == 0: Y_validation_predictions = model(X_validation_transform) validation_loss = loss_function(Y_validation_predictions, Y_validation) scheduler.step(validation_loss) if validation_loss.item() >= best_validation_loss: stall_count += 1 if stall_count >= patience: stop = True else: best_validation_loss = validation_loss.item() if not stop: stall_count = 0 return model, f_mean, f_std
time_b = time.perf_counter() results.loc[num_training_examples, "time_training_seconds"] = time_b - time_a # -- test ------------------------------------------------------------------ # read test data (here, we test on a subset of the full test data) test_data = pd.read_csv(arguments.test_path, header=None, nrows=2**11).values Y_test, X_test = test_data[:, 0].astype(np.int), test_data[:, 1:] # normalise and transform test data X_test = (X_test - X_test.mean(axis=1, keepdims=True)) / X_test.std( axis=1, keepdims=True) # normalise time series X_test_transform = apply_kernels(X_test, kernels) X_test_transform = (X_test_transform - f_mean) / f_std # normalise transformed features # predict model.eval() Y_test_predictions = model(torch.FloatTensor(X_test_transform)) results.loc[num_training_examples, "accuracy"] = ( Y_test_predictions.max(1)[1].numpy() == Y_test).mean() print("Done.") print(f" FINISHED ".center(80, "=")) results.to_csv(
print(f"Loading data".ljust(80 - 5, "."), end = "", flush = True) training_data = np.loadtxt(f"{arguments.input_path}/{dataset_name}/{dataset_name}_TRAIN.txt") test_data = np.loadtxt(f"{arguments.input_path}/{dataset_name}/{dataset_name}_TEST.txt") print("Done.") # -- precompile ------------------------------------------------------------ if not compiled: print(f"Compiling ROCKET functions (once only)".ljust(80 - 5, "."), end = "", flush = True) _ = generate_kernels(100, 10) apply_kernels(np.zeros_like(training_data)[:, 1:].astype(np.float32), _) compiled = True print("Done.") # -- run ------------------------------------------------------------------- print(f"Performing runs".ljust(80 - 5, "."), end = "", flush = True) results, timings = run(training_data, test_data, num_runs = arguments.num_runs, num_kernels = arguments.num_kernels) timings_mean = timings.mean(1) print("Done.")
return model, f_mean, f_std # == run ======================================================================= # -- precompile ROCKET functions ----------------------------------------------- print("Compiling ROCKET functions (once only)".ljust(80 - 5, "."), end="", flush=True) training_data = pd.read_csv(arguments.training_path, header=None, nrows=10).values _ = generate_kernels(20, 10) apply_kernels(np.zeros_like(training_data)[:, 1:], _) print("Done.") # -- run through dataset sizes ------------------------------------------------- all_num_training_examples = 2**np.arange(8, 20 + 1) results = pd.DataFrame(index=all_num_training_examples, columns=["accuracy", "time_training_seconds"], data=0) results.index.name = "num_training_examples" print(f" {arguments.num_kernels:,} Kernels ".center(80, "=")) for num_training_examples in all_num_training_examples: