def __init__(self, n_dim, price_table, dll_path, func_key_reward, func_reward_config=None, function_set=None): self.n_dim = n_dim self.price_table = price_table self.price_table_ptr = self.price_table.ctypes.data_as( ctypes.POINTER(ctypes.c_double)) self.len_price_table = len(price_table) self.mid = Middleware(dll_path) self.reward_func = self.mid.get_function(func_key_reward) if func_reward_config is None: func_reward_config = { "argtypes": [ ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_double), ctypes.c_int, ctypes.POINTER(ctypes.c_double), ctypes.c_int, ctypes.c_int ], "restype": ctypes.c_double } if function_set is None: function_set = ['add', 'sub', 'mul', 'div', 'sin'] self.function_set = function_set self.reward_func.argtypes = [ ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_double), ctypes.c_int, ctypes.POINTER(ctypes.c_double), ctypes.c_int, ctypes.c_int ] self.reward_func.restype = ctypes.c_double
class GPQuant: def __init__(self, n_dim, price_table, dll_path, func_key_reward, func_reward_config=None, function_set=None): self.n_dim = n_dim self.price_table = price_table self.price_table_ptr = self.price_table.ctypes.data_as( ctypes.POINTER(ctypes.c_double)) self.len_price_table = len(price_table) self.mid = Middleware(dll_path) self.reward_func = self.mid.get_function(func_key_reward) if func_reward_config is None: func_reward_config = { "argtypes": [ ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_double), ctypes.c_int, ctypes.POINTER(ctypes.c_double), ctypes.c_int, ctypes.c_int ], "restype": ctypes.c_double } if function_set is None: function_set = ['add', 'sub', 'mul', 'div', 'sin'] self.function_set = function_set self.reward_func.argtypes = [ ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_double), ctypes.c_int, ctypes.POINTER(ctypes.c_double), ctypes.c_int, ctypes.c_int ] self.reward_func.restype = ctypes.c_double def make_explict_func(self): n_dim = self.n_dim price_table_ptr = self.price_table_ptr len_price_table = self.len_price_table def explicit_fitness(y, y_pred, sample_weight): """ :param y: as indicies correspondint to _y_pred see fit() below e.g. y = [2,5,7] and y_pred = [1.23, 2.34, 8.12] means: f(x[2]) = 1.23 f(x[5]) = 2.34 f(x[7]) = 8.12 :param y_pred: :param sample_weight: :return: """ bool_sample_weight = np.array(sample_weight, dtype=bool) indices = y[bool_sample_weight] y_pred_arr = y_pred[bool_sample_weight] indices_pointer = indices.ctypes.data_as( ctypes.POINTER(ctypes.c_int)) y_pred_arr_pointer = y_pred_arr.ctypes.data_as( ctypes.POINTER(ctypes.c_double)) result = self.reward_func(indices_pointer, y_pred_arr_pointer, len(indices), price_table_ptr, n_dim, 0) return result return explicit_fitness def fit(self, x_data): est_gp = SymbolicRegressor(population_size=500, generations=10, stopping_criteria=0.0001, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, metric=make_fitness( self.make_explict_func(), False), function_set=self.function_set, verbose=1, parsimony_coefficient=0.01) indicies = np.arange(x_data.shape[0]) est_gp.fit(x_data, indicies) return est_gp
CONFIG_DLL_PATH = "D:/sunao/workspace/cpp/GPQuant/x64/Release/GPQuant.dll" CONFIG_REWARD_FUNC_KEY = "?get_reward_with_x@BackTesting@GPQuant@@SANPEAHPEANH1HH@Z" CONFIG_CHEATING_FUNC_KEY = "?cheating@BackTesting@GPQuant@@SAPEANPEANHH@Z" CONFIG_DOUBLE_GC_FUNC_KEY = "?delete_double_pointer@BackTesting@GPQuant@@SAXPEAN@Z" CONFIG_INT_GC_FUNC_KEY = "?delete_int_pointer@BackTesting@GPQuant@@SAXPEAH@Z" CONFIG_TEST_MEM_FUNC_KEY = "?test_mem@BackTesting@GPQuant@@SANPEAHPEANH@Z" def read_data(file_path, header=None): x_data = pd.read_csv(file_path, header=header) x_data = x_data.as_matrix() return x_data mid = Middleware(CONFIG_DLL_PATH) get_reward_func = mid.get_function(CONFIG_REWARD_FUNC_KEY) get_reward_func.argtypes = [ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_double), ctypes.c_int, ctypes.POINTER(ctypes.c_double), ctypes.c_int, ctypes.c_int] get_reward_func.restype = ctypes.c_double cheating_func = mid.get_function(CONFIG_CHEATING_FUNC_KEY) cheating_func.restype = ctypes.POINTER(ctypes.c_double) test_mem_func = mid.get_function(CONFIG_TEST_MEM_FUNC_KEY) total_data = 1000000 y = range(total_data) sample_weight = [np.random.randint(0, 2) for _ in range(total_data)]
from numpy.ctypeslib import ndpointer from gpquant.gp_dynamic import * from gpquant.gp_fitness import * from gplearn.genetic import SymbolicRegressor from gplearn.fitness import make_fitness from ctypes import Structure import pydotplus class DataPackage(Structure): _fields_ = [('n_data', ctypes.c_int), ('n_dim', ctypes.c_int), ('data', ctypes.POINTER(ctypes.POINTER(ctypes.c_double)))] mid = Middleware("GPQuant.dll") get_data_func = mid.get_function("?get_data@BackTesting@GPQuant@@SA?AUTestDataPackage@2@XZ") get_data_func.restype = DataPackage get_reward_func = mid.get_function("?get_reward@BackTesting@GPQuant@@SANPEAHPEAN@Z") get_reward_func.restype = ctypes.c_double package = get_data_func() n_dim = int(package.n_dim) n_data = int(package.n_data) x_data = [] for i in range(n_dim): _x_data = [] for j in range(n_data): _x_data.append(package.data[i][j])
def train(): mid = Middleware(CONFIG_DLL_PATH) get_reward_func = mid.get_function(CONFIG_FUNC_KEY_REWARD) get_reward_func.argtypes = [ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_double), ctypes.c_int, ctypes.POINTER(ctypes.c_double), ctypes.c_int, ctypes.c_int] get_reward_func.restype = ctypes.c_double cheating_func = mid.get_function(CONFIG_FUNC_KEY_CHEAT) cheating_func.restype = ctypes.POINTER(ctypes.c_double) x_data = read_data(CONFIG_FILE_PATH) _x_data = x_data.flatten() x_arr_pointer = _x_data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) x_len = len(_x_data) n_data = int(x_len / CONFIG_N_DIM) def explicit_fitness(y, _y_pred, sample_weight): _indices = np.array([i for i in range(len(y)) if sample_weight[i]], dtype=int) _y_pred_arr = np.array([_y_pred[i] for i in range(len(y)) if sample_weight[i]], dtype=float) _n_data = len(_indices) indices_pointer = _indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) y_pred_arr_pointer = _y_pred_arr.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) result = get_reward_func(indices_pointer, y_pred_arr_pointer, _n_data, x_arr_pointer, CONFIG_N_DIM, x_len) return result explicit_fitness.counter = 0 explicit_fitness.res = 0 function_set = ['add', 'sub', 'mul', 'div', 'sin'] est_gp = SymbolicRegressor(population_size=5000, generations=10, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, metric=make_fitness(explicit_fitness, False), function_set=function_set, max_samples=0.8, verbose=1, parsimony_coefficient=0.01, random_state=0) _ = [i for i in range(x_data.shape[0])] est_gp.fit(x_data, _) ts = int(time.time()) graph = pydotplus.graphviz.graph_from_dot_data(est_gp._program.export_graphviz()) graph.write_png("outputs/gp-{suffix}.png".format(suffix=ts)) res = cheating_func(x_arr_pointer, CONFIG_N_DIM, x_len) y_truth = np.array([float(res[i]) for i in range(n_data)]) y_pred = np.array(est_gp.predict(x_data)) n_data_plot = 200 indicies_plot = sorted(np.random.choice(n_data, n_data_plot, replace=False)) canvas = gp_plot.GPCanvas() canvas.draw_line_chart_2d(range(0, n_data_plot), y_truth[indicies_plot], color="blue", label="y_truth", line_style="solid") canvas.draw_line_chart_2d(range(0, n_data_plot), y_pred[indicies_plot], color="red", label="y_pred") mse = ((np.array(y_truth) - np.array(y_pred)) ** 2).mean() canvas.set_x_label("Indices") canvas.set_y_label("Values") canvas.set_title("Fitting plot with MSE={:5f}".format(mse)) canvas.set_legend() canvas.set_axis_invisible() canvas.froze()
CONFIG_N_DIM = 3 CONFIG_FILE_PATH = "_test.txt" CONFIG_DLL_PATH = "D:/sunao/workspace/cpp/GPQuant/x64/Release/GPQuant.dll" CONFIG_FUNC_KEY_REWARD = "?get_reward_with_x@BackTesting@GPQuant@@SANPEAHPEANH1HH@Z" CONFIG_FUNC_KEY_CHEAT = "?cheating@BackTesting@GPQuant@@SAPEANPEANHH@Z" CONFIG_FUNC_KEY_CONVERT = "?convert_1d_array_to_2d_array@BackTesting@GPQuant@@SAPEAPEANPEANHH@Z" def read_data(file_path, header=None): x_data = pd.read_csv(file_path, header=header) x_data = x_data.as_matrix() return x_data mid = Middleware(CONFIG_DLL_PATH) get_reward_func = mid.get_function(CONFIG_FUNC_KEY_REWARD) get_reward_func.argtypes = [ ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_double), ctypes.c_int, ctypes.POINTER(ctypes.c_double), ctypes.c_int, ctypes.c_int ] get_reward_func.restype = ctypes.c_double cheating_func = mid.get_function(CONFIG_FUNC_KEY_CHEAT) cheating_func.restype = ctypes.POINTER(ctypes.c_double) x_data = read_data(CONFIG_FILE_PATH) _x_data = x_data.flatten() x_arr_pointer = _x_data.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
import pandas as pd import numpy as np import time from gpquant import gp_model from gpquant import gp_io x_data, price_table = gp_io.read_data("./data/data.csv") CONFIG_DLL_PATH = "D:/sunao/workspace/cpp/GPQuant/x64/Release/GPQuant.dll" CONFIG_FUNC_KEY_REWARD = "?get_reward@BackTesting@GPQuant@@SANPEAHPEANH1HH@Z" gp = gp_model.GPQuant(56, price_table, CONFIG_DLL_PATH, CONFIG_FUNC_KEY_REWARD) est_gp = gp.fit(x_data) y_pred = est_gp.predict(x_data) print("---------------------------------------------") indices = np.array(range(len(x_data)), dtype=int) indices_pointer = indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) y_pred_arr_pointer = y_pred.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) price_table_ptr = price_table.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) mid = Middleware(CONFIG_DLL_PATH) reward_func = mid.get_function(CONFIG_FUNC_KEY_REWARD) result = reward_func(indices_pointer, y_pred_arr_pointer, len(x_data), price_table_ptr, 0, -1) print(result) print(est_gp._program.fitness_)