def test_moving_block_bootstrap(
        time_series_sample: np.ndarray,
        number_of_time_series_boostrap_replications: int):
    y = time_series_sample
    B = number_of_time_series_boostrap_replications
    b_star = optimal_block_length(y)
    block_length = math.ceil(b_star[0].b_star_cb)

    lower_ci_bounds = (0.35, 0.38)
    upper_ci_bounds = (0.48, 0.50)
    bse_bounds = (0.03, 0.07)

    _test_block_bootstrap_generic(
        bootstrap_function=moving_block_bootstrap,
        y=time_series_sample,
        block_length=block_length,
        replications=number_of_time_series_boostrap_replications,
        lower_ci_bounds=lower_ci_bounds,
        upper_ci_bounds=upper_ci_bounds,
        bse_bounds=bse_bounds,
        test_rng_link=True)

    _test_block_bootstrap_generic(
        bootstrap_function=moving_block_bootstrap_vectorized,
        y=time_series_sample,
        block_length=block_length,
        replications=number_of_time_series_boostrap_replications,
        lower_ci_bounds=lower_ci_bounds,
        upper_ci_bounds=upper_ci_bounds,
        bse_bounds=bse_bounds)
Example #2
0
def main():

    # make analysis environment
    limit_gpu_memory()
    args = parse_arguments()
    seed_every_thing(args["seed"])
    write_out_dir = path.normpath(
        path.join(getcwd(), 'reports', args["out_dir"]))
    makedirs(write_out_dir, exist_ok=True)

    print('-' * 140)

    if args["train_mode"] == 'pre-train':

        for source in listdir('dataset/source'):

            # skip source dataset without pickle file
            data_dir_path = path.join('dataset', 'source', source)
            if not path.exists(f'{data_dir_path}/X_train.pkl'): continue

            # make output directory
            write_result_out_dir = path.join(write_out_dir, args["train_mode"],
                                             source)
            makedirs(write_result_out_dir, exist_ok=True)

            # load dataset
            X_train, y_train, X_test, y_test = \
                read_data_from_dataset(data_dir_path)
            period = (len(y_train) + len(y_test)) // 30
            X_train = np.concatenate(
                (X_train, X_test),
                axis=0)  # no need for test data when pre-training
            y_train = np.concatenate(
                (y_train, y_test),
                axis=0)  # no need for test data when pre-training
            X_train, X_valid, y_train, y_valid =  \
                train_test_split(X_train, y_train, test_size=args["valid_ratio"], shuffle=False)
            print(f'\nSource dataset : {source}')
            print(f'\nX_train : {X_train.shape[0]}')
            print(f'\nX_valid : {X_valid.shape[0]}')

            # construct the model
            file_path = path.join(write_result_out_dir, 'best_model.hdf5')
            callbacks = make_callbacks(file_path)
            input_shape = (period, X_train.shape[1])
            model = build_model(input_shape, args["gpu"], write_result_out_dir)

            # train the model
            bsize = len(y_train) // args["nb_batch"]
            RTG = ReccurentTrainingGenerator(X_train,
                                             y_train,
                                             batch_size=bsize,
                                             timesteps=period,
                                             delay=1)
            RVG = ReccurentTrainingGenerator(X_valid,
                                             y_valid,
                                             batch_size=bsize,
                                             timesteps=period,
                                             delay=1)
            H = model.fit_generator(RTG,
                                    validation_data=RVG,
                                    epochs=args["nb_epochs"],
                                    verbose=1,
                                    callbacks=callbacks)
            save_lr_curve(H, write_result_out_dir)

            # clear memory up
            keras.backend.clear_session()
            save_arguments(args, write_result_out_dir)
            print('\n' * 2 + '-' * 140 + '\n' * 2)

    elif args["train_mode"] == 'transfer-learning':

        for target in listdir('dataset/target'):

            # skip target in the absence of pickle file
            if not path.exists(f'dataset/target/{target}/X_train.pkl'):
                continue

            for source in listdir(f'{write_out_dir}/pre-train'):

                # make output directory
                write_result_out_dir = path.join(write_out_dir,
                                                 args["train_mode"], target,
                                                 source)
                pre_model_path = f'{write_out_dir}/pre-train/{source}/best_model.hdf5'
                if not path.exists(pre_model_path): continue
                makedirs(write_result_out_dir, exist_ok=True)

                # load dataset
                data_dir_path = f'dataset/target/{target}'
                X_train, y_train, X_test, y_test = \
                    read_data_from_dataset(data_dir_path)
                period = (len(y_train) + len(y_test)) // 30
                X_train, X_valid, y_train, y_valid = \
                    train_test_split(X_train, y_train, test_size=args["valid_ratio"], shuffle=False)
                print(f'\nTarget dataset : {target}')
                print(f'\nX_train : {X_train.shape[0]}')
                print(f'\nX_valid : {X_valid.shape[0]}')
                print(f'\nX_test : {X_test.shape[0]}')

                # construct the model
                pre_model = load_model(pre_model_path)
                file_path = path.join(write_result_out_dir,
                                      'transferred_best_model.hdf5')
                callbacks = make_callbacks(file_path)
                input_shape = (period, X_train.shape[1])
                model = build_model(input_shape,
                                    args["gpu"],
                                    write_result_out_dir,
                                    pre_model=pre_model,
                                    freeze=args["freeze"])

                # train the model
                bsize = len(y_train) // args["nb_batch"]
                RTG = ReccurentTrainingGenerator(X_train,
                                                 y_train,
                                                 batch_size=bsize,
                                                 timesteps=period,
                                                 delay=1)
                RVG = ReccurentTrainingGenerator(X_valid,
                                                 y_valid,
                                                 batch_size=bsize,
                                                 timesteps=period,
                                                 delay=1)
                H = model.fit_generator(RTG,
                                        validation_data=RVG,
                                        epochs=args["nb_epochs"],
                                        verbose=1,
                                        callbacks=callbacks)
                save_lr_curve(H, write_result_out_dir)

                # prediction
                best_model = load_model(file_path)
                RPG = ReccurentPredictingGenerator(X_test,
                                                   batch_size=1,
                                                   timesteps=period)
                y_test_pred = best_model.predict_generator(RPG)

                # save log for the model
                y_test = y_test[-len(y_test_pred):]
                save_prediction_plot(y_test, y_test_pred, write_result_out_dir)
                save_yy_plot(y_test, y_test_pred, write_result_out_dir)
                mse_score = save_mse(y_test,
                                     y_test_pred,
                                     write_result_out_dir,
                                     model=best_model)
                args["mse"] = mse_score
                save_arguments(args, write_result_out_dir)
                keras.backend.clear_session()
                print('\n' * 2 + '-' * 140 + '\n' * 2)

    elif args["train_mode"] == 'without-transfer-learning':

        for target in listdir('dataset/target'):

            # make output directory
            write_result_out_dir = path.join(write_out_dir, args["train_mode"],
                                             target)
            makedirs(write_result_out_dir, exist_ok=True)

            # load dataset
            data_dir_path = path.join('dataset', 'target', target)
            X_train, y_train, X_test, y_test = \
                read_data_from_dataset(data_dir_path)
            period = (len(y_train) + len(y_test)) // 30
            X_train, X_valid, y_train, y_valid =  \
                train_test_split(X_train, y_train, test_size=args["valid_ratio"], shuffle=False)
            print(f'\nTarget dataset : {target}')
            print(f'\nX_train : {X_train.shape[0]}')
            print(f'\nX_valid : {X_valid.shape[0]}')
            print(f'\nX_test : {X_test.shape[0]}')

            # construct the model
            file_path = path.join(write_result_out_dir, 'best_model.hdf5')
            callbacks = make_callbacks(file_path)
            input_shape = (period, X_train.shape[1])
            model = build_model(input_shape, args["gpu"], write_result_out_dir)

            # train the model
            bsize = len(y_train) // args["nb_batch"]
            RTG = ReccurentTrainingGenerator(X_train,
                                             y_train,
                                             batch_size=bsize,
                                             timesteps=period,
                                             delay=1)
            RVG = ReccurentTrainingGenerator(X_valid,
                                             y_valid,
                                             batch_size=bsize,
                                             timesteps=period,
                                             delay=1)
            H = model.fit_generator(RTG,
                                    validation_data=RVG,
                                    epochs=args["nb_epochs"],
                                    verbose=1,
                                    callbacks=callbacks)
            save_lr_curve(H, write_result_out_dir)

            # prediction
            best_model = load_model(file_path)
            RPG = ReccurentPredictingGenerator(X_test,
                                               batch_size=1,
                                               timesteps=period)
            y_test_pred = best_model.predict_generator(RPG)

            # save log for the model
            y_test = y_test[-len(y_test_pred):]
            save_prediction_plot(y_test, y_test_pred, write_result_out_dir)
            save_yy_plot(y_test, y_test_pred, write_result_out_dir)
            mse_score = save_mse(y_test,
                                 y_test_pred,
                                 write_result_out_dir,
                                 model=best_model)
            args["mse"] = mse_score
            save_arguments(args, write_result_out_dir)

            # clear memory up
            keras.backend.clear_session()
            print('\n' * 2 + '-' * 140 + '\n' * 2)

    elif args["train_mode"] == 'bagging':

        for target in listdir('dataset/target'):

            # make output directory
            write_result_out_dir = path.join(write_out_dir, args["train_mode"],
                                             target)
            makedirs(write_result_out_dir, exist_ok=True)

            # load dataset
            data_dir_path = path.join('dataset', 'target', target)
            X_train, y_train, X_test, y_test = \
                read_data_from_dataset(data_dir_path)
            period = (len(y_train) + len(y_test)) // 30

            # make subsets
            b_star = optimal_block_length(y_train)
            b_star_cb = math.ceil(b_star[0].b_star_cb)
            print(f'optimal block length for circular bootstrap = {b_star_cb}')
            subsets_y_train = circular_block_bootstrap(
                y_train,
                block_length=b_star_cb,
                replications=args["nb_subset"],
                replace=True)
            subsets_X_train = []
            for i in range(X_train.shape[1]):
                np.random.seed(0)
                X_cb = circular_block_bootstrap(X_train[:, i],
                                                block_length=b_star_cb,
                                                replications=args["nb_subset"],
                                                replace=True)
                subsets_X_train.append(X_cb)
            subsets_X_train = np.array(subsets_X_train)
            subsets_X_train = subsets_X_train.transpose(1, 2, 0)

            # train the model for each subset
            model_dir = path.join(write_result_out_dir, 'model')
            makedirs(model_dir, exist_ok=True)
            for i_subset, (i_X_train, i_y_train) in enumerate(
                    zip(subsets_X_train, subsets_y_train)):

                i_X_train, i_X_valid, i_y_train, i_y_valid = \
                    train_test_split(i_X_train, i_y_train, test_size=args["valid_ratio"], shuffle=False)

                # construct the model
                file_path = path.join(model_dir, f'best_model_{i_subset}.hdf5')
                callbacks = make_callbacks(file_path, save_csv=False)
                input_shape = (period, i_X_train.shape[1]
                               )  # x_train.shape[2] is number of variable
                model = build_model(input_shape,
                                    args["gpu"],
                                    write_result_out_dir,
                                    savefig=False)

                # train the model
                bsize = len(i_y_train) // args["nb_batch"]
                RTG = ReccurentTrainingGenerator(i_X_train,
                                                 i_y_train,
                                                 batch_size=bsize,
                                                 timesteps=period,
                                                 delay=1)
                RVG = ReccurentTrainingGenerator(i_X_valid,
                                                 i_y_valid,
                                                 batch_size=bsize,
                                                 timesteps=period,
                                                 delay=1)
                H = model.fit_generator(RTG,
                                        validation_data=RVG,
                                        epochs=args["nb_epochs"],
                                        verbose=1,
                                        callbacks=callbacks)

            keras.backend.clear_session()
            print('\n' * 2 + '-' * 140 + '\n' * 2)

    elif args["train_mode"] == 'noise-injection':

        for target in listdir('dataset/target'):

            # make output directory
            write_result_out_dir = path.join(write_out_dir, args["train_mode"],
                                             target)
            makedirs(write_result_out_dir, exist_ok=True)

            # load dataset
            data_dir_path = path.join('dataset', 'target', target)
            X_train, y_train, X_test, y_test = \
                read_data_from_dataset(data_dir_path)
            period = (len(y_train) + len(y_test)) // 30
            X_train, X_valid, y_train, y_valid =  \
                train_test_split(X_train, y_train, test_size=args["valid_ratio"], shuffle=False)
            print(f'\nTarget dataset : {target}')
            print(f'\nX_train : {X_train.shape}')
            print(f'\nX_valid : {X_valid.shape}')
            print(f'\nX_test : {X_test.shape[0]}')

            # construct the model
            file_path = path.join(write_result_out_dir, 'best_model.hdf5')
            callbacks = make_callbacks(file_path)
            input_shape = (period, X_train.shape[1])
            model = build_model(input_shape,
                                args["gpu"],
                                write_result_out_dir,
                                noise=args["noise_var"])

            # train the model
            bsize = len(y_train) // args["nb_batch"]
            RTG = ReccurentTrainingGenerator(X_train,
                                             y_train,
                                             batch_size=bsize,
                                             timesteps=period,
                                             delay=1)
            RVG = ReccurentTrainingGenerator(X_valid,
                                             y_valid,
                                             batch_size=bsize,
                                             timesteps=period,
                                             delay=1)
            H = model.fit_generator(RTG,
                                    validation_data=RVG,
                                    epochs=args["nb_epochs"],
                                    verbose=1,
                                    callbacks=callbacks)
            save_lr_curve(H, write_result_out_dir)

            # prediction
            best_model = load_model(file_path)
            RPG = ReccurentPredictingGenerator(X_test,
                                               batch_size=1,
                                               timesteps=period)
            y_test_pred = best_model.predict_generator(RPG)

            # save log for the model
            y_test = y_test[-len(y_test_pred):]
            save_prediction_plot(y_test, y_test_pred, write_result_out_dir)
            save_yy_plot(y_test, y_test_pred, write_result_out_dir)
            mse_score = save_mse(y_test,
                                 y_test_pred,
                                 write_result_out_dir,
                                 model=best_model)
            args["mse"] = mse_score
            save_arguments(args, write_result_out_dir)

            # clear memory up
            keras.backend.clear_session()
            print('\n' * 2 + '-' * 140 + '\n' * 2)