def full_data_training(stockmodel, option_type, only_call=False, with_percentage=False): """ print the results of the performance over the part of the dataset(*) for the given stock stockmodel and option type (*) hardware problems when full dataset is given. :param stockmodel: str, "BS", "VG" or "H" :param option_type: str, "opt_standard", "opt_asianmean", "opt_lookbackmin" or "opt_lookbackmax" :param only_call: bool (default=False), if the dataset only contains the call options :param with_percentage: bool (default=False), if the dataset needs to contain the percentage of the stock price and the strike """ n_samples = 10000 random_state = 9943 base_file_name = "GPR-random_search_{0}_{1}_scaled.p".format( stockmodel, option_type) full_file_name = pkg_resources.open_text(random_search_gpr, base_file_name).name dict_cv_results = modelsaver.get_model(full_file_name).cv_results_ best_position = np.where( dict_cv_results['rank_test_neg_mean_squared_error'] == 1) best_model_parameters = np.array( dict_cv_results['params'])[best_position][0] dm = dc.DataManager(stockmodel=stockmodel, option_type=option_type, only_call=only_call, with_percent=with_percentage) X_train, y_train, x_not_selected, y_not_selected = dm.get_random_training_data( n_samples=n_samples, random_state=random_state, get_not_selected_data=True) scaler = preprocessing.StandardScaler().fit(X_train, y_train) X_train = scaler.transform(X_train) gpr_model = gaussian_process.GaussianProcessRegressor( kernel=best_model_parameters["kernel"], normalize_y=best_model_parameters["normalize_y"], alpha=best_model_parameters["alpha"]) gpr_model.fit(X_train, y_train) X_test, y_test = dm.get_test_data() X_test = scaler.transform(X_test) x_not_selected = scaler.transform(x_not_selected) y_pred = gpr_model.predict(X_test) mse_test = mean_squared_error(y_test, y_pred=y_pred) y_pred_not_selected = gpr_model.predict(x_not_selected) mse_not_selected = mean_squared_error(y_not_selected, y_pred_not_selected) print(f"MSE(test data): {mse_test}") print(f"MSE(not selected): {mse_not_selected}")
def part_dataset_like_gpr(stockmodel, option_type, only_call=False): """ Do the testings with a smaller set of datapoints, the same as the test for the Gaussian Process Regressor Print the mse of the Test data and the part of the training data which are not used :param stockmodel: str, "BS", "VG" or "H" :param option_type: str, "opt_standard", "opt_asianmean", "opt_lookbackmin" or :param only_call: bool (default=False), if the dataset only contains the call options :param with_percentage: bool (default=False), if the dataset needs to contain the percentage of the stock price and the strike :param scale: bool (default=False), whenever to scale the data """ n_samples = 10000 random_state = 9943 base_file_name = "SVR-random_search_{0}_{1}_scaled.p".format(stockmodel, option_type) # get the best parameters from the cross validation full_file_name = pkg_resources.open_text(random_search_svr, base_file_name).name dict_cv_results = modelsaver.get_model(full_file_name).cv_results_ best_position = np.where(dict_cv_results['rank_test_neg_mean_squared_error'] == 1) best_model_parameters = np.array(dict_cv_results['params'])[best_position][0] # get the training and test data dm = dc.DataManager(stockmodel=stockmodel, option_type=option_type, only_call=only_call) X_train, y_train, x_not_selected, y_not_selected = dm.get_random_training_data(n_samples=n_samples, random_state=random_state, get_not_selected_data=True) scaler = preprocessing.StandardScaler().fit(X_train, y_train) X_train = scaler.transform(X_train) svr_model = SVR(cache_size=2000, C=best_model_parameters['C'], degree=best_model_parameters['degree'], epsilon=best_model_parameters['epsilon'], gamma=best_model_parameters['gamma'], kernel=best_model_parameters['kernel']) svr_model.fit(X_train, y_train) X_test, y_test = dm.get_test_data() X_test = scaler.transform(X_test) x_not_selected = scaler.transform(x_not_selected) y_pred = svr_model.predict(X_test) mse_test = mean_squared_error(y_test, y_pred=y_pred) y_pred_not_selected = svr_model.predict(x_not_selected) mse_not_selected = mean_squared_error(y_not_selected, y_pred_not_selected) print(f"MSE(test data): {mse_test:4.3f}") print(f"MSE(not selected): {mse_not_selected:4.3f}")
def plotting_results_cv_svr(): # todo: verder aan werken dict_cv_results = modelsaver.get_model("SVR-random_search.p").cv_results_ ranks = dict_cv_results['rank_test_neg_mean_squared_error'] best_positions = np.where(ranks <= 1) # print(best_positions) print(np.array(dict_cv_results['params'])[best_positions]) print(dict_cv_results['mean_test_neg_mean_squared_error'][best_positions]) # beste resultaten zijn een poly met graad 2 (mse=125) nadien rbf (mse=254) print(dict_cv_results) return None
def get_best_model(stockmodel, option_type): """ Function to return the best NN model from the cross validations :param stockmodel: str, "BS", "VG" or "H" :param option_type: str, "opt_standard", "opt_asianmean", "opt_lookbackmin" or "opt_lookbackmax" :return: tuple of lists (size_layers, activations) """ base_file_name = f"NN-random_search_{stockmodel}_{option_type}_scaled.p" full_file_name = pkg_resources.open_text(random_search_nn, base_file_name).name results = modelsaver.get_model(full_file_name) results.sort(key=lambda x: x["cv_result"]["Mean"]) return results[0]['size_layers'], results[0]['activations']
def full_data_training(stockmodel, option_type, only_call=False, with_percentage=False): """ print the results of the performance over the full dataset for the given stock stockmodel and option type :param stockmodel: str, "BS", "VG" or "H" :param option_type: str, "opt_standard", "opt_asianmean", "opt_lookbackmin" or "opt_lookbackmax" :param only_call: bool (default=False), if the dataset only contains the call options :param with_percentage: bool (default=False), if the dataset needs to contain the percentage of the stock price and the strike """ base_file_name = "SVR-random_search_{0}_{1}_scaled.p".format(stockmodel, option_type) # get the best parameters from the cross validation full_file_name = pkg_resources.open_text(random_search_svr, base_file_name).name dict_cv_results = modelsaver.get_model(full_file_name).cv_results_ best_position = np.where(dict_cv_results['rank_test_neg_mean_squared_error'] == 1) best_model_parameters = np.array(dict_cv_results['params'])[best_position][0] dm = dc.DataManager(stockmodel=stockmodel, option_type=option_type, only_call=only_call, with_percent=with_percentage) X_train, y_train = dm.get_training_data() scaler = preprocessing.StandardScaler().fit(X_train, y_train) X_train = scaler.transform(X_train) svr_model = SVR(cache_size=2000, C=best_model_parameters['C'], degree=best_model_parameters['degree'], epsilon=best_model_parameters['epsilon'], gamma=best_model_parameters['gamma'], kernel=best_model_parameters['kernel']) svr_model.fit(X_train, y_train) X_test, y_test = dm.get_test_data() X_test = scaler.transform(X_test) y_pred = svr_model.predict(X_test) mse = mean_squared_error(y_test, y_pred=y_pred) print(f"MSE: {mse:4.3f}")
def rf_plot_train_test(model, column_fitting, save_plot=False): """ :param model: :param column_fitting: :param save_plot: :return: """ # todo: comments max_features = ["auto", "log2", 5] dict_codes = { "opt_standard": "S", "opt_asianmean": "A", "opt_lookbackmin": "Lmin", "opt_lookbackmax": "Lmax", "opt_exact_standard": "SE" } opt_type_code = dict_codes[column_fitting] base_file_name = "rf_50-1000-results_train_test-{0}-{1}-{2}.p" pickle_files = [ base_file_name.format(model, opt_type_code, feature) for feature in max_features ] file_names = [ pkg_resources.open_text(train_test_rf, pickle_file).name for pickle_file in pickle_files ] dict_values = [modelsaver.get_model(file_name) for file_name in file_names] dict_plotting = {} for feature, results in zip(max_features, dict_values): dict_plotting[feature] = results['Test'] plot_results("RF", model, column_fitting, dict_plotting, range(50, 1001, 50), save_plot=save_plot)