Exemple #1
0
def run(training_data, test_data, num_runs = 10, num_kernels = 10_000):

    results = np.zeros(num_runs)
    timings = np.zeros([4, num_runs]) # training transform, test transform, training, test

    Y_training, X_training = training_data[:, 0].astype(np.int), training_data[:, 1:]
    Y_test, X_test = test_data[:, 0].astype(np.int), test_data[:, 1:]

    for i in range(num_runs):

        input_length = X_training.shape[1]
        kernels = generate_kernels(input_length, num_kernels)

        # -- transform training ------------------------------------------------

        time_a = time.perf_counter()
        X_training_transform = apply_kernels(X_training, kernels)
        time_b = time.perf_counter()
        timings[0, i] = time_b - time_a

        # -- transform test ----------------------------------------------------

        time_a = time.perf_counter()
        X_test_transform = apply_kernels(X_test, kernels)
        time_b = time.perf_counter()
        timings[1, i] = time_b - time_a

        # -- training ----------------------------------------------------------

        time_a = time.perf_counter()
        classifier = RidgeClassifierCV(alphas = 10 ** np.linspace(-3, 3, 10), normalize = True)
        classifier.fit(X_training_transform, Y_training)
        time_b = time.perf_counter()
        timings[2, i] = time_b - time_a

        # -- test --------------------------------------------------------------

        time_a = time.perf_counter()
        results[i] = classifier.score(X_test_transform, Y_test)
        time_b = time.perf_counter()
        timings[3, i] = time_b - time_a

    return results, timings
    Y_test, X_test = test_data[:, 0].astype(np.int32), test_data[:, 1:]

    print("Done.")

    # -- run -------------------------------------------------------------------

    print(f"Performing runs".ljust(80 - 5, "."), end="", flush=True)

    _results = np.zeros(arguments.num_runs)
    _timings = np.zeros([4, arguments.num_runs
                         ])  # trans. tr., trans. te., training, test

    for i in range(arguments.num_runs):

        input_length = X_training.shape[-1]
        kernels = generate_kernels(input_length, arguments.num_kernels)

        # -- transform training ------------------------------------------------

        time_a = time.perf_counter()
        X_training_transform = apply_kernels(X_training, kernels)
        time_b = time.perf_counter()
        _timings[0, i] = time_b - time_a

        # -- transform test ----------------------------------------------------

        time_a = time.perf_counter()
        X_test_transform = apply_kernels(X_test, kernels)
        time_b = time.perf_counter()
        _timings[1, i] = time_b - time_a
Exemple #3
0
def run_additional(training_data, test_data, num_runs=10, num_kernels=10_000):

    # assumes variable length time series are padded with nan
    get_input_lengths = lambda X: X.shape[1] - (~np.isnan(np.flip(X, 1))
                                                ).argmax(1)

    def rescale(X, reference_length):
        _X = np.zeros([len(X), reference_length])
        input_lengths = get_input_lengths(X)
        for i in range(len(X)):
            _X[i] = np.interp(np.linspace(0, 1, reference_length),
                              np.linspace(0, 1, input_lengths[i]),
                              X[i][:input_lengths[i]])
        return _X

    def interpolate_nan(X):
        _X = X.copy()
        good = ~np.isnan(X)
        for i in np.where(np.any(~good, 1))[0]:
            _X[i] = np.interp(np.arange(len(X[i])),
                              np.where(good[i])[0], X[i][good[i]])
        return _X

    results = np.zeros(num_runs)
    timings = np.zeros(
        [4, num_runs])  # training transform, test transform, training, test

    Y_training, X_training = training_data[:,
                                           0].astype(np.int), training_data[:,
                                                                            1:]
    Y_test, X_test = test_data[:, 0].astype(np.int), test_data[:, 1:]

    variable_lengths = False

    # handle three cases: (1) same lengths, no missing values; (2) same lengths,
    # missing values; and (3) variable lengths, no missing values

    if np.any(np.isnan(X_training)):

        input_lengths_training = get_input_lengths(X_training)
        input_lengths_training_max = input_lengths_training.max()
        input_lengths_test = get_input_lengths(X_test)

        # missing values (same lengths)
        if np.all(input_lengths_training == input_lengths_training_max):

            X_training = interpolate_nan(X_training)
            X_test = interpolate_nan(X_test)

        # variable lengths (no missing values)
        else:

            variable_lengths = True
            num_folds = 10
            cross_validation_results = np.zeros([2, num_folds])

    # normalise time series
    X_training = (X_training - np.nanmean(X_training, axis=1, keepdims=True)
                  ) / (np.nanstd(X_training, axis=1, keepdims=True) + 1e-8)
    X_test = (X_test - np.nanmean(X_test, axis=1, keepdims=True)) / (
        np.nanstd(X_test, axis=1, keepdims=True) + 1e-8)

    for i in range(num_runs):

        # -- variable lengths --------------------------------------------------

        if variable_lengths:

            kernels = generate_kernels(input_lengths_training_max, num_kernels)

            time_a = time.perf_counter()
            X_training_transform_rescale = apply_kernels(
                rescale(X_training, input_lengths_training_max), kernels)
            X_training_transform_jagged = apply_kernels_jagged(
                X_training, kernels, input_lengths_training)
            time_b = time.perf_counter()
            timings[0, i] = time_b - time_a

            # indices for cross-validation folds
            I = np.random.permutation(len(X_training))
            I = np.array_split(I, num_folds)

            time_a = time.perf_counter()

            # j = 0 -> rescale
            # j = 1 -> "as is" ("jagged")
            for j in range(2):

                for k in range(num_folds):

                    VA, *TR = np.roll(I, k, axis=0)
                    TR = np.concatenate(TR)

                    classifier = RidgeClassifierCV(alphas=10**np.linspace(
                        -3, 3, 10),
                                                   normalize=True)

                    if j == 0:  # rescale

                        classifier.fit(X_training_transform_rescale[TR],
                                       Y_training[TR])
                        cross_validation_results[j][k] = classifier.score(
                            X_training_transform_rescale[VA], Y_training[VA])

                    elif j == 1:  # jagged

                        classifier.fit(X_training_transform_jagged[TR],
                                       Y_training[TR])
                        cross_validation_results[j][k] = classifier.score(
                            X_training_transform_jagged[VA], Y_training[VA])

            best = cross_validation_results.sum(1).argmax()
            time_b = time.perf_counter()
            timings[2, i] = time_b - time_a

            classifier = RidgeClassifierCV(alphas=10**np.linspace(-3, 3, 10),
                                           normalize=True)

            if best == 0:  # rescale

                time_a = time.perf_counter()
                X_test_transform_rescale = apply_kernels(
                    rescale(X_test, input_lengths_training_max), kernels)
                time_b = time.perf_counter()
                timings[1, i] = time_b - time_a

                time_a = time.perf_counter()
                classifier.fit(X_training_transform_rescale, Y_training)
                time_b = time.perf_counter()
                timings[2, i] += time_b - time_a

                time_a = time.perf_counter()
                results[i] = classifier.score(X_test_transform_rescale, Y_test)
                time_b = time.perf_counter()
                timings[3, i] = time_b - time_a

            elif best == 1:  # jagged

                time_a = time.perf_counter()
                X_test_transform_jagged = apply_kernels_jagged(
                    X_test, kernels, input_lengths_test)
                time_b = time.perf_counter()
                timings[1, i] = time_b - time_a

                time_a = time.perf_counter()
                classifier.fit(X_training_transform_jagged, Y_training)
                time_b = time.perf_counter()
                timings[2, i] += time_b - time_a

                time_a = time.perf_counter()
                results[i] = classifier.score(X_test_transform_jagged, Y_test)
                time_b = time.perf_counter()
                timings[3, i] = time_b - time_a

        # -- same lengths ------------------------------------------------------

        else:

            kernels = generate_kernels(X_training.shape[1], num_kernels)

            # -- transform training --------------------------------------------

            time_a = time.perf_counter()
            X_training_transform = apply_kernels(X_training, kernels)
            time_b = time.perf_counter()
            timings[0, i] = time_b - time_a

            # -- transform test ------------------------------------------------

            time_a = time.perf_counter()
            X_test_transform = apply_kernels(X_test, kernels)
            time_b = time.perf_counter()
            timings[1, i] = time_b - time_a

            # -- training ------------------------------------------------------

            time_a = time.perf_counter()
            classifier = RidgeClassifierCV(alphas=10**np.linspace(-3, 3, 10),
                                           normalize=True)
            classifier.fit(X_training_transform, Y_training)
            time_b = time.perf_counter()
            timings[2, i] = time_b - time_a

            # -- test ----------------------------------------------------------

            time_a = time.perf_counter()
            results[i] = classifier.score(X_test_transform, Y_test)
            time_b = time.perf_counter()
            timings[3, i] = time_b - time_a

    return results, timings
Exemple #4
0
            delimiter=",")
        test_data = np.loadtxt(
            f"{arguments.input_path}/{dataset_name}/{dataset_name}_TEST.txt",
            delimiter=",")

    print("Done.")

    # -- precompile ------------------------------------------------------------

    if not compiled:

        print(f"Compiling ROCKET functions (once only)".ljust(80 - 5, "."),
              end="",
              flush=True)

        _ = generate_kernels(100, 10)
        apply_kernels(np.zeros_like(training_data)[:, 1:], _)
        apply_kernels_jagged(
            np.zeros_like(training_data)[:, 1:], _,
            np.array([training_data.shape[1]] * len(training_data)))
        compiled = True

        print("Done.")

    # -- run -------------------------------------------------------------------

    print(f"Performing runs".ljust(80 - 5, "."), end="", flush=True)

    results, timings = run_additional(training_data,
                                      test_data,
                                      num_runs=arguments.num_runs,
Exemple #5
0
    # here, validation data is always the first 2 ** 11 = 2,048 examples
    validation_data = pd.read_csv(arguments.training_path,
                                  header=None,
                                  nrows=2**11).values
    Y_validation, X_validation = validation_data[:, 0], validation_data[:, 1:]

    training_data = pd.read_csv(arguments.training_path,
                                header=None,
                                skiprows=2**11,
                                nrows=num_training_examples).values
    Y_training, X_training = training_data[:, 0], training_data[:, 1:]

    # -- generate kernels ------------------------------------------------------

    kernels = generate_kernels(X_training.shape[1], arguments.num_kernels)

    # -- train -----------------------------------------------------------------

    time_a = time.perf_counter()
    model, f_mean, f_std = train(X_training,
                                 Y_training,
                                 X_validation,
                                 Y_validation,
                                 kernels,
                                 arguments.num_kernels * 2,
                                 num_classes=24)
    time_b = time.perf_counter()

    results.loc[num_training_examples,
                "time_training_seconds"] = time_b - time_a