Beispiel #1
0
def run(training_data, test_data, num_runs = 10, num_kernels = 10_000):

    results = np.zeros(num_runs)
    timings = np.zeros([4, num_runs]) # training transform, test transform, training, test

    Y_training, X_training = training_data[:, 0].astype(np.int), training_data[:, 1:]
    Y_test, X_test = test_data[:, 0].astype(np.int), test_data[:, 1:]

    for i in range(num_runs):

        input_length = X_training.shape[1]
        kernels = generate_kernels(input_length, num_kernels)

        # -- transform training ------------------------------------------------

        time_a = time.perf_counter()
        X_training_transform = apply_kernels(X_training, kernels)
        time_b = time.perf_counter()
        timings[0, i] = time_b - time_a

        # -- transform test ----------------------------------------------------

        time_a = time.perf_counter()
        X_test_transform = apply_kernels(X_test, kernels)
        time_b = time.perf_counter()
        timings[1, i] = time_b - time_a

        # -- training ----------------------------------------------------------

        time_a = time.perf_counter()
        classifier = RidgeClassifierCV(alphas = 10 ** np.linspace(-3, 3, 10), normalize = True)
        classifier.fit(X_training_transform, Y_training)
        time_b = time.perf_counter()
        timings[2, i] = time_b - time_a

        # -- test --------------------------------------------------------------

        time_a = time.perf_counter()
        results[i] = classifier.score(X_test_transform, Y_test)
        time_b = time.perf_counter()
        timings[3, i] = time_b - time_a

    return results, timings
    print(f"Performing runs".ljust(80 - 5, "."), end="", flush=True)

    _results = np.zeros(arguments.num_runs)
    _timings = np.zeros([4, arguments.num_runs
                         ])  # trans. tr., trans. te., training, test

    for i in range(arguments.num_runs):

        input_length = X_training.shape[-1]
        kernels = generate_kernels(input_length, arguments.num_kernels)

        # -- transform training ------------------------------------------------

        time_a = time.perf_counter()
        X_training_transform = apply_kernels(X_training, kernels)
        time_b = time.perf_counter()
        _timings[0, i] = time_b - time_a

        # -- transform test ----------------------------------------------------

        time_a = time.perf_counter()
        X_test_transform = apply_kernels(X_test, kernels)
        time_b = time.perf_counter()
        _timings[1, i] = time_b - time_a

        # -- training ----------------------------------------------------------

        time_a = time.perf_counter()
        classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10),
                                       normalize=True)
Beispiel #3
0
def run_additional(training_data, test_data, num_runs=10, num_kernels=10_000):

    # assumes variable length time series are padded with nan
    get_input_lengths = lambda X: X.shape[1] - (~np.isnan(np.flip(X, 1))
                                                ).argmax(1)

    def rescale(X, reference_length):
        _X = np.zeros([len(X), reference_length])
        input_lengths = get_input_lengths(X)
        for i in range(len(X)):
            _X[i] = np.interp(np.linspace(0, 1, reference_length),
                              np.linspace(0, 1, input_lengths[i]),
                              X[i][:input_lengths[i]])
        return _X

    def interpolate_nan(X):
        _X = X.copy()
        good = ~np.isnan(X)
        for i in np.where(np.any(~good, 1))[0]:
            _X[i] = np.interp(np.arange(len(X[i])),
                              np.where(good[i])[0], X[i][good[i]])
        return _X

    results = np.zeros(num_runs)
    timings = np.zeros(
        [4, num_runs])  # training transform, test transform, training, test

    Y_training, X_training = training_data[:,
                                           0].astype(np.int), training_data[:,
                                                                            1:]
    Y_test, X_test = test_data[:, 0].astype(np.int), test_data[:, 1:]

    variable_lengths = False

    # handle three cases: (1) same lengths, no missing values; (2) same lengths,
    # missing values; and (3) variable lengths, no missing values

    if np.any(np.isnan(X_training)):

        input_lengths_training = get_input_lengths(X_training)
        input_lengths_training_max = input_lengths_training.max()
        input_lengths_test = get_input_lengths(X_test)

        # missing values (same lengths)
        if np.all(input_lengths_training == input_lengths_training_max):

            X_training = interpolate_nan(X_training)
            X_test = interpolate_nan(X_test)

        # variable lengths (no missing values)
        else:

            variable_lengths = True
            num_folds = 10
            cross_validation_results = np.zeros([2, num_folds])

    # normalise time series
    X_training = (X_training - np.nanmean(X_training, axis=1, keepdims=True)
                  ) / (np.nanstd(X_training, axis=1, keepdims=True) + 1e-8)
    X_test = (X_test - np.nanmean(X_test, axis=1, keepdims=True)) / (
        np.nanstd(X_test, axis=1, keepdims=True) + 1e-8)

    for i in range(num_runs):

        # -- variable lengths --------------------------------------------------

        if variable_lengths:

            kernels = generate_kernels(input_lengths_training_max, num_kernels)

            time_a = time.perf_counter()
            X_training_transform_rescale = apply_kernels(
                rescale(X_training, input_lengths_training_max), kernels)
            X_training_transform_jagged = apply_kernels_jagged(
                X_training, kernels, input_lengths_training)
            time_b = time.perf_counter()
            timings[0, i] = time_b - time_a

            # indices for cross-validation folds
            I = np.random.permutation(len(X_training))
            I = np.array_split(I, num_folds)

            time_a = time.perf_counter()

            # j = 0 -> rescale
            # j = 1 -> "as is" ("jagged")
            for j in range(2):

                for k in range(num_folds):

                    VA, *TR = np.roll(I, k, axis=0)
                    TR = np.concatenate(TR)

                    classifier = RidgeClassifierCV(alphas=10**np.linspace(
                        -3, 3, 10),
                                                   normalize=True)

                    if j == 0:  # rescale

                        classifier.fit(X_training_transform_rescale[TR],
                                       Y_training[TR])
                        cross_validation_results[j][k] = classifier.score(
                            X_training_transform_rescale[VA], Y_training[VA])

                    elif j == 1:  # jagged

                        classifier.fit(X_training_transform_jagged[TR],
                                       Y_training[TR])
                        cross_validation_results[j][k] = classifier.score(
                            X_training_transform_jagged[VA], Y_training[VA])

            best = cross_validation_results.sum(1).argmax()
            time_b = time.perf_counter()
            timings[2, i] = time_b - time_a

            classifier = RidgeClassifierCV(alphas=10**np.linspace(-3, 3, 10),
                                           normalize=True)

            if best == 0:  # rescale

                time_a = time.perf_counter()
                X_test_transform_rescale = apply_kernels(
                    rescale(X_test, input_lengths_training_max), kernels)
                time_b = time.perf_counter()
                timings[1, i] = time_b - time_a

                time_a = time.perf_counter()
                classifier.fit(X_training_transform_rescale, Y_training)
                time_b = time.perf_counter()
                timings[2, i] += time_b - time_a

                time_a = time.perf_counter()
                results[i] = classifier.score(X_test_transform_rescale, Y_test)
                time_b = time.perf_counter()
                timings[3, i] = time_b - time_a

            elif best == 1:  # jagged

                time_a = time.perf_counter()
                X_test_transform_jagged = apply_kernels_jagged(
                    X_test, kernels, input_lengths_test)
                time_b = time.perf_counter()
                timings[1, i] = time_b - time_a

                time_a = time.perf_counter()
                classifier.fit(X_training_transform_jagged, Y_training)
                time_b = time.perf_counter()
                timings[2, i] += time_b - time_a

                time_a = time.perf_counter()
                results[i] = classifier.score(X_test_transform_jagged, Y_test)
                time_b = time.perf_counter()
                timings[3, i] = time_b - time_a

        # -- same lengths ------------------------------------------------------

        else:

            kernels = generate_kernels(X_training.shape[1], num_kernels)

            # -- transform training --------------------------------------------

            time_a = time.perf_counter()
            X_training_transform = apply_kernels(X_training, kernels)
            time_b = time.perf_counter()
            timings[0, i] = time_b - time_a

            # -- transform test ------------------------------------------------

            time_a = time.perf_counter()
            X_test_transform = apply_kernels(X_test, kernels)
            time_b = time.perf_counter()
            timings[1, i] = time_b - time_a

            # -- training ------------------------------------------------------

            time_a = time.perf_counter()
            classifier = RidgeClassifierCV(alphas=10**np.linspace(-3, 3, 10),
                                           normalize=True)
            classifier.fit(X_training_transform, Y_training)
            time_b = time.perf_counter()
            timings[2, i] = time_b - time_a

            # -- test ----------------------------------------------------------

            time_a = time.perf_counter()
            results[i] = classifier.score(X_test_transform, Y_test)
            time_b = time.perf_counter()
            timings[3, i] = time_b - time_a

    return results, timings
Beispiel #4
0
        test_data = np.loadtxt(
            f"{arguments.input_path}/{dataset_name}/{dataset_name}_TEST.txt",
            delimiter=",")

    print("Done.")

    # -- precompile ------------------------------------------------------------

    if not compiled:

        print(f"Compiling ROCKET functions (once only)".ljust(80 - 5, "."),
              end="",
              flush=True)

        _ = generate_kernels(100, 10)
        apply_kernels(np.zeros_like(training_data)[:, 1:], _)
        apply_kernels_jagged(
            np.zeros_like(training_data)[:, 1:], _,
            np.array([training_data.shape[1]] * len(training_data)))
        compiled = True

        print("Done.")

    # -- run -------------------------------------------------------------------

    print(f"Performing runs".ljust(80 - 5, "."), end="", flush=True)

    results, timings = run_additional(training_data,
                                      test_data,
                                      num_runs=arguments.num_runs,
                                      num_kernels=arguments.num_kernels)
Beispiel #5
0
def train(
        X,
        Y,
        X_validation,
        Y_validation,
        kernels,
        num_features,
        num_classes,
        minibatch_size=256,
        max_epochs=100,
        patience=2,  # x10 minibatches; reset if loss improves
        tranche_size=2**11,
        cache_size=2**14):  # as much as possible

    # -- init ------------------------------------------------------------------

    def init(layer):
        if isinstance(layer, nn.Linear):
            nn.init.constant_(layer.weight.data, 0)
            nn.init.constant_(layer.bias.data, 0)

    # -- model -----------------------------------------------------------------

    model = nn.Sequential(nn.Linear(
        num_features, num_classes))  # logistic / softmax regression
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters())
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     factor=0.5,
                                                     min_lr=1e-8)
    model.apply(init)

    # -- run -------------------------------------------------------------------

    minibatch_count = 0
    best_validation_loss = np.inf
    stall_count = 0
    stop = False

    num_examples = len(X)
    num_tranches = np.int(np.ceil(num_examples / tranche_size))

    cache = np.zeros((min(cache_size, num_examples), num_features))
    cache_count = 0

    for epoch in range(max_epochs):

        if epoch > 0 and stop:
            break

        for tranche_index in range(num_tranches):

            if epoch > 0 and stop:
                break

            a = tranche_size * tranche_index
            b = a + tranche_size

            Y_tranche = Y[a:b]

            # if cached, use cached transform; else transform and cache the result
            if b <= cache_count:

                X_tranche_transform = cache[a:b]

            else:

                X_tranche = X[a:b]
                X_tranche = (X_tranche - X_tranche.mean(
                    axis=1, keepdims=True)) / X_tranche.std(
                        axis=1, keepdims=True)  # normalise time series
                X_tranche_transform = apply_kernels(X_tranche, kernels)

                if epoch == 0 and tranche_index == 0:

                    # per-feature mean and standard deviation (estimated on first tranche)
                    f_mean = X_tranche_transform.mean(0)
                    f_std = X_tranche_transform.std(0) + 1e-8

                    # normalise and transform validation data
                    X_validation = (X_validation - X_validation.mean(
                        axis=1, keepdims=True)) / X_validation.std(
                            axis=1, keepdims=True)  # normalise time series
                    X_validation_transform = apply_kernels(
                        X_validation, kernels)
                    X_validation_transform = (
                        X_validation_transform -
                        f_mean) / f_std  # normalise transformed features
                    X_validation_transform = torch.FloatTensor(
                        X_validation_transform)
                    Y_validation = torch.LongTensor(Y_validation)

                X_tranche_transform = (
                    X_tranche_transform -
                    f_mean) / f_std  # normalise transformed features

                if b <= cache_size:

                    cache[a:b] = X_tranche_transform
                    cache_count = b

            X_tranche_transform = torch.FloatTensor(X_tranche_transform)
            Y_tranche = torch.LongTensor(Y_tranche)

            minibatches = torch.randperm(
                len(X_tranche_transform)).split(minibatch_size)

            for minibatch_index, minibatch in enumerate(minibatches):

                if epoch > 0 and stop:
                    break

                # abandon undersized minibatches
                if minibatch_index > 0 and len(minibatch) < minibatch_size:
                    break

                # -- (optional) minimal lr search ------------------------------

                # default lr for Adam may cause training loss to diverge for a
                # large number of kernels; lr minimising training loss on first
                # update should ensure training loss converges

                if epoch == 0 and tranche_index == 0 and minibatch_index == 0:

                    candidate_lr = 10**np.linspace(-1, -6, 6)

                    best_lr = None
                    best_training_loss = np.inf

                    for lr in candidate_lr:

                        lr_model = nn.Sequential(
                            nn.Linear(num_features, num_classes))
                        lr_optimizer = optim.Adam(lr_model.parameters())
                        lr_model.apply(init)

                        for param_group in lr_optimizer.param_groups:
                            param_group["lr"] = lr

                        # perform a single update
                        lr_optimizer.zero_grad()
                        Y_tranche_predictions = lr_model(
                            X_tranche_transform[minibatch])
                        training_loss = loss_function(Y_tranche_predictions,
                                                      Y_tranche[minibatch])
                        training_loss.backward()
                        lr_optimizer.step()

                        Y_tranche_predictions = lr_model(X_tranche_transform)
                        training_loss = loss_function(Y_tranche_predictions,
                                                      Y_tranche).item()

                        if training_loss < best_training_loss:
                            best_training_loss = training_loss
                            best_lr = lr

                    for param_group in optimizer.param_groups:
                        param_group["lr"] = best_lr

                # -- training --------------------------------------------------

                optimizer.zero_grad()
                Y_tranche_predictions = model(X_tranche_transform[minibatch])
                training_loss = loss_function(Y_tranche_predictions,
                                              Y_tranche[minibatch])
                training_loss.backward()
                optimizer.step()

                minibatch_count += 1

                if minibatch_count % 10 == 0:

                    Y_validation_predictions = model(X_validation_transform)
                    validation_loss = loss_function(Y_validation_predictions,
                                                    Y_validation)

                    scheduler.step(validation_loss)

                    if validation_loss.item() >= best_validation_loss:
                        stall_count += 1
                        if stall_count >= patience:
                            stop = True
                    else:
                        best_validation_loss = validation_loss.item()
                        if not stop:
                            stall_count = 0

    return model, f_mean, f_std
Beispiel #6
0
    time_b = time.perf_counter()

    results.loc[num_training_examples,
                "time_training_seconds"] = time_b - time_a

    # -- test ------------------------------------------------------------------

    # read test data (here, we test on a subset of the full test data)
    test_data = pd.read_csv(arguments.test_path, header=None,
                            nrows=2**11).values
    Y_test, X_test = test_data[:, 0].astype(np.int), test_data[:, 1:]

    # normalise and transform test data
    X_test = (X_test - X_test.mean(axis=1, keepdims=True)) / X_test.std(
        axis=1, keepdims=True)  # normalise time series
    X_test_transform = apply_kernels(X_test, kernels)
    X_test_transform = (X_test_transform -
                        f_mean) / f_std  # normalise transformed features

    # predict
    model.eval()
    Y_test_predictions = model(torch.FloatTensor(X_test_transform))

    results.loc[num_training_examples, "accuracy"] = (
        Y_test_predictions.max(1)[1].numpy() == Y_test).mean()

    print("Done.")

print(f" FINISHED ".center(80, "="))

results.to_csv(
Beispiel #7
0
    print(f"Loading data".ljust(80 - 5, "."), end = "", flush = True)

    training_data = np.loadtxt(f"{arguments.input_path}/{dataset_name}/{dataset_name}_TRAIN.txt")
    test_data = np.loadtxt(f"{arguments.input_path}/{dataset_name}/{dataset_name}_TEST.txt")

    print("Done.")

    # -- precompile ------------------------------------------------------------

    if not compiled:

        print(f"Compiling ROCKET functions (once only)".ljust(80 - 5, "."), end = "", flush = True)

        _ = generate_kernels(100, 10)
        apply_kernels(np.zeros_like(training_data)[:, 1:].astype(np.float32), _)
        compiled = True

        print("Done.")

    # -- run -------------------------------------------------------------------

    print(f"Performing runs".ljust(80 - 5, "."), end = "", flush = True)

    results, timings = run(training_data, test_data,
                           num_runs = arguments.num_runs,
                           num_kernels = arguments.num_kernels)
    timings_mean = timings.mean(1)

    print("Done.")
Beispiel #8
0
    return model, f_mean, f_std


# == run =======================================================================

# -- precompile ROCKET functions -----------------------------------------------

print("Compiling ROCKET functions (once only)".ljust(80 - 5, "."),
      end="",
      flush=True)

training_data = pd.read_csv(arguments.training_path, header=None,
                            nrows=10).values
_ = generate_kernels(20, 10)
apply_kernels(np.zeros_like(training_data)[:, 1:], _)

print("Done.")

# -- run through dataset sizes -------------------------------------------------

all_num_training_examples = 2**np.arange(8, 20 + 1)

results = pd.DataFrame(index=all_num_training_examples,
                       columns=["accuracy", "time_training_seconds"],
                       data=0)
results.index.name = "num_training_examples"

print(f" {arguments.num_kernels:,} Kernels ".center(80, "="))

for num_training_examples in all_num_training_examples: