def compare_agraph_potential(X, Y): """does the comparison using agraph""" # make solution manipulator sol_manip = agm(1, 16, nloads=2) sol_manip.add_node_type(AGNodes.Add) sol_manip.add_node_type(AGNodes.Multiply) # sol_manip.add_node_type(AGNodes.Subtract) # sol_manip.add_node_type(AGNodes.Divide) # sol_manip.add_node_type(AGNodes.Exp) # sol_manip.add_node_type(AGNodes.Sin) # sol_manip.add_node_type(AGNodes.Cos) # make predictor manipulator pred_manip = fpm(32, X.shape[0]) # create training data training_data = PairwiseAtomicTrainingData(potential_energy=Y, configurations=X) # create fitness metric atomic_regressor = PairwiseAtomicPotential() # make and run island manager islmngr = SerialIslandManager(N_ISLANDS, solution_training_data=training_data, solution_manipulator=sol_manip, predictor_manipulator=pred_manip, fitness_metric=atomic_regressor) success = islmngr.run_islands(MAX_STEPS, EPSILON, step_increment=N_STEPS, make_plots=False) assert success
def compare_agraphcpp_explicit(X, Y): """does the comparison""" # make solution manipulator sol_manip = agcm(X.shape[1], 16, nloads=2) sol_manip.add_node_type(2) sol_manip.add_node_type(4) # make predictor manipulator pred_manip = fpm(32, Y.shape[0]) # create training data training_data = ExplicitTrainingData(X, Y) # create fitness metric explicit_regressor = StandardRegression() # make and run island manager islmngr = SerialIslandManager(N_ISLANDS, solution_training_data=training_data, solution_manipulator=sol_manip, predictor_manipulator=pred_manip, fitness_metric=explicit_regressor) assert islmngr.run_islands(MAX_STEPS, EPSILON, step_increment=N_STEPS, make_plots=False)
def compare_ag_implicit(X, Y, operator, params): """does const symbolic regression and tests convergence""" # convert to single array X = np.hstack((X, Y.reshape([-1, 1]))) # make solution manipulator sol_manip = agm(X.shape[1], 16, nloads=2) sol_manip.add_node_type(AGNodes.Add) sol_manip.add_node_type(AGNodes.Subtract) sol_manip.add_node_type(AGNodes.Multiply) sol_manip.add_node_type(AGNodes.Divide) sol_manip.add_node_type(AGNodes.Exp) sol_manip.add_node_type(AGNodes.Log) sol_manip.add_node_type(AGNodes.Sin) sol_manip.add_node_type(AGNodes.Cos) sol_manip.add_node_type(AGNodes.Abs) sol_manip.add_node_type(AGNodes.Pow) # make true equation equ = sol_manip.generate() equ.command_list[0] = (AGNodes.LoadData, (0, )) equ.command_list[1] = (AGNodes.LoadData, (1, )) equ.command_list[2] = (AGNodes.LoadData, (2, )) equ.command_list[3] = (operator, params) equ.command_list[-1] = (AGNodes.Subtract, (3, 2)) # make predictor manipulator pred_manip = fpm(32, X.shape[0]) # make training data training_data = ImplicitTrainingData(X) # make fitness metric implicit_regressor = ImplicitRegression() # make and run island manager islmngr = SerialIslandManager(N_ISLANDS, solution_training_data=training_data, solution_manipulator=sol_manip, predictor_manipulator=pred_manip, fitness_metric=implicit_regressor) epsilon = 1.05 * islmngr.isles[0].solution_fitness_true(equ) + 1.0e-10 assert islmngr.run_islands(MAX_STEPS, epsilon, step_increment=N_STEPS, make_plots=False)
def compare_ag_explicit(X, Y, operator, params): """does the comparison""" # make solution manipulator sol_manip = agm(X.shape[1], 16, nloads=2) sol_manip.add_node_type(AGNodes.Add) sol_manip.add_node_type(AGNodes.Subtract) sol_manip.add_node_type(AGNodes.Multiply) sol_manip.add_node_type(AGNodes.Divide) sol_manip.add_node_type(AGNodes.Exp) sol_manip.add_node_type(AGNodes.Log) sol_manip.add_node_type(AGNodes.Sin) sol_manip.add_node_type(AGNodes.Cos) sol_manip.add_node_type(AGNodes.Abs) sol_manip.add_node_type(AGNodes.Pow) # make true equation equ = sol_manip.generate() equ.command_list[0] = (AGNodes.LoadData, (0, )) equ.command_list[1] = (AGNodes.LoadData, (1, )) equ.command_list[-1] = (operator, params) # make predictor manipulator pred_manip = fpm(32, Y.shape[0]) # make training data Y = Y.reshape([-1, 1]) training_data = ExplicitTrainingData(X, Y) # make fitness_metric explicit_regressor = StandardRegression() # make and run island manager islmngr = SerialIslandManager(N_ISLANDS, solution_training_data=training_data, solution_manipulator=sol_manip, predictor_manipulator=pred_manip, fitness_metric=explicit_regressor) epsilon = 1.05 * islmngr.isles[0].solution_fitness_true(equ) + 1.0e-10 assert islmngr.run_islands(MAX_STEPS, epsilon, step_increment=N_STEPS, make_plots=False)
def compare_agcpp_implicit(X, Y, operator, params): """does const symbolic regression and tests convergence""" # convert to single array X = np.hstack((X, Y.reshape([-1, 1]))) Y = None # make solution manipulator sol_manip = agm(X.shape[1], 16, nloads=2) sol_manip.add_node_type(2) sol_manip.add_node_type(3) sol_manip.add_node_type(4) sol_manip.add_node_type(5) # make true equation equ = sol_manip.generate() equ.command_array[0] = (0, 0, 0) equ.command_array[1] = (0, 1, 1) equ.command_array[2] = (0, 2, 2) equ.command_array[3] = (operator, params[0], params[1]) equ.command_array[-1] = (3, 3, 2) # make predictor manipulator pred_manip = fpm(32, X.shape[0]) # make training data training_data = ImplicitTrainingData(X) # make fitness metric implicit_regressor = ImplicitRegression() # make and run island manager islmngr = SerialIslandManager(N_ISLANDS, solution_training_data=training_data, solution_manipulator=sol_manip, predictor_manipulator=pred_manip, fitness_metric=implicit_regressor) epsilon = 1.05 * islmngr.isles[0].solution_fitness_true(equ) + 1.0e-10 assert islmngr.run_islands(MAX_STEPS, epsilon, step_increment=N_STEPS, make_plots=False)
def compare_agcpp_explicit(X, Y, operator, params): """does the comparison""" Y = Y.reshape([-1, 1]) # make solution manipulator sol_manip = agm(X.shape[1], 16, nloads=2) sol_manip.add_node_type(2) sol_manip.add_node_type(3) sol_manip.add_node_type(4) sol_manip.add_node_type(5) # make true equation equ = sol_manip.generate() equ.command_array[0] = (0, 0, 0) equ.command_array[1] = (0, 1, 1) equ.command_array[-1] = (operator, params[0], params[1]) # make predictor manipulator pred_manip = fpm(32, X.shape[0]) # make training data training_data = ExplicitTrainingData(X, Y) # make fitness_metric explicit_regressor = StandardRegression(const_deriv=True) # make and run island manager islmngr = SerialIslandManager(N_ISLANDS, solution_training_data=training_data, solution_manipulator=sol_manip, predictor_manipulator=pred_manip, fitness_metric=explicit_regressor) epsilon = 1.05 * islmngr.isles[0].solution_fitness_true(equ) + 1.0e-10 assert islmngr.run_islands(MAX_STEPS, epsilon, step_increment=N_STEPS, make_plots=False)
def main(max_steps, epsilon, data_size, n_islands): """main regression function""" # -------------------------------------------- # MAKE DATA (uncomment one of the below lines) # -------------------------------------------- # data for standard regression # (use with ExplicitTrainingData and StandardRegression) # x_true, y_true = make_1d_data(data_size, 1) x_true, y_true = make_1d_data(data_size, 2) # x_true, y_true = make_1d_data(data_size, 3) # x_true, y_true = make_1d_data(data_size, 4) # x_true, y_true = make_norm_data(data_size) # data for implicit regression # (use with ImplicitTrainingData and ImplicitRegression) # x_true = make_circle_data(data_size) # ------------------------------------------------ # MAKE TRAINING DATA (uncomment one of the below) # ------------------------------------------------ training_data = ExplicitTrainingData(x_true, y_true) # training_data = ImplicitTrainingData(x_true) # ------------------------------------------------------------ # MAKE SOLUTION MANIPULATOR (uncomment one of the below blocks) # ------------------------------------------------------------ # using AGraph.py for the solution manipulator # sol_manip = agm(x_true.shape[1], 32, nloads=2) # sol_manip.add_node_type(AGNodes.Add) # sol_manip.add_node_type(AGNodes.Subtract) # sol_manip.add_node_type(AGNodes.Multiply) # sol_manip.add_node_type(AGNodes.Divide) # # sol_manip.add_node_type(AGNodes.Exp) # # sol_manip.add_node_type(AGNodes.Log) # # sol_manip.add_node_type(AGNodes.Sin) # # sol_manip.add_node_type(AGNodes.Cos) # # sol_manip.add_node_type(AGNodes.Abs) # sol_manip.add_node_type(AGNodes.Sqrt) # using AGraphCpp.py for the solution manipulator sol_manip = AGraphCpp.AGraphCppManipulator(x_true.shape[1], 32, nloads=2) sol_manip.add_node_type(2) # + sol_manip.add_node_type(3) # - sol_manip.add_node_type(4) # * sol_manip.add_node_type(5) # / sol_manip.add_node_type(12) # sqrt # ---------------------------------- # MAKE FITNESS PREDICTOR MANIPULATOR # ---------------------------------- pred_manip = fpm(128, data_size) # ------------------------------------------------ # MAKE FITNESS METRIC (uncomment one of the below) # ------------------------------------------------ regressor = StandardRegression() # regressor = ImplicitRegression() # -------------------------------------- # MAKE SERIAL ISLAND MANAGER THEN RUN IT # -------------------------------------- islmngr = SerialIslandManager(n_islands, solution_training_data=training_data, solution_manipulator=sol_manip, predictor_manipulator=pred_manip, solution_pop_size=64, fitness_metric=regressor, solution_age_fitness=True ) islmngr.run_islands(max_steps, epsilon, step_increment=100)
def main(max_steps, epsilon, data_size): """main function which runs regression""" comm = MPI.COMM_WORLD rank = comm.Get_rank() # load data on rank 0 and send it to all ranks x_true = None y_true = None if rank == 0: # -------------------------------------------- # MAKE DATA (uncomment one of the below lines) # -------------------------------------------- # standard regression # (use with ExplicitTrainingData and StandardRegression) # x_true, y_true = make_1d_data(data_size, 1) x_true, y_true = make_1d_data(data_size, 2) # x_true, y_true = make_1d_data(data_size, 3) # x_true, y_true = make_1d_data(data_size, 4) # x_true, y_true = make_norm_data(data_size) # implicit regression # (use with ImplicitTrainingData and ImplicitRegression) # x_true = make_circle_data(data_size) # then broadcast to all ranks x_true = MPI.COMM_WORLD.bcast(x_true, root=0) y_true = MPI.COMM_WORLD.bcast(y_true, root=0) # ------------------------------------------------ # MAKE TRAINING DATA (uncomment one of the below) # ------------------------------------------------ training_data = ExplicitTrainingData(x_true, y_true) # training_data = ImplicitTrainingData(x_true) # ------------------------------------------------------------ # MAKE SOLUTION MANIPULATOR (uncomment one of the below blocks) # ------------------------------------------------------------ # using AGraph.py for the solution manipulator # sol_manip = agm(x_true.shape[1], 32, nloads=2) # sol_manip.add_node_type(AGNodes.Add) # sol_manip.add_node_type(AGNodes.Subtract) # sol_manip.add_node_type(AGNodes.Multiply) # sol_manip.add_node_type(AGNodes.Divide) # # sol_manip.add_node_type(AGNodes.Exp) # # sol_manip.add_node_type(AGNodes.Log) # # sol_manip.add_node_type(AGNodes.Sin) # # sol_manip.add_node_type(AGNodes.Cos) # # sol_manip.add_node_type(AGNodes.Abs) # sol_manip.add_node_type(AGNodes.Sqrt) # using AGraphCpp.py for the solution manipulator sol_manip = AGraphCpp.AGraphCppManipulator(x_true.shape[1], 32, nloads=2) sol_manip.add_node_type(2) # + sol_manip.add_node_type(3) # - sol_manip.add_node_type(4) # * sol_manip.add_node_type(5) # / sol_manip.add_node_type(12) # sqrt # ---------------------------------- # MAKE FITNESS PREDICTOR MANIPULATOR # ---------------------------------- pred_manip = fpm(128, data_size) # ------------------------------------------------ # MAKE FITNESS METRIC (uncomment one of the below) # ------------------------------------------------ regressor = StandardRegression(const_deriv=True) # regressor = ImplicitRegression() # -------------------------------------- # MAKE SERIAL ISLAND MANAGER THEN RUN IT # -------------------------------------- islmngr = ParallelIslandManager(solution_training_data=training_data, solution_manipulator=sol_manip, predictor_manipulator=pred_manip, solution_pop_size=64, fitness_metric=regressor, solution_age_fitness=True) islmngr.run_islands(max_steps, epsilon, min_steps=1000, step_increment=1000)
def main(max_steps, epsilon, data_size, parallel): # STEP 1 # Create your x and y data, on parallel, broadcast it to all other ranks ################################################## ##################### SINGLE ##################### ################################################## if not parallel: n_lin = int(math.pow(data_size, 1.0 / 3)) + 1 x_1 = np.linspace(0, 5, n_lin) x_2 = np.linspace(0, 5, n_lin) x_3 = np.linspace(0, 5, n_lin) x = np.array(np.meshgrid(x_1, x_2, x_3)).T.reshape(-1, 3) x = x[np.random.choice(x.shape[0], data_size, replace=False), :] # make solution y = (x[:, 0] * x[:, 0] + 3.5 * x[:, 1]) x_true = x y_true = y y_true = y_true.reshape(-1, 1) ################################################## ################################################## ################################################## ################################################## #################### PARALLEL #################### ################################################## if parallel: comm = MPI.COMM_WORLD rank = comm.Get_rank() if rank == 0: #n_lin = int(math.pow(data_size, 1.0/3)) + 1 #x_1 = np.linspace(0, 5, n_lin) #x_2 = np.linspace(0, 5, n_lin) #x_3 = np.linspace(0, 5, n_lin) #x = np.array(np.meshgrid(x_1, x_2, x_3)).T.reshape(-1, 3) #x = x[np.random.choice(x.shape[0], data_size, replace=False), :] x_true = np.array([i for i in range(25, 425, 25)]) y_true = np.array([ 5.38, 2.91, 2.07, 1.71, 1.46, 1.35, 1.29, 1.24, 1.2, 1.19, 1.22, 1.23, 1.23, 1.23, 1.26, 1.26 ]) #y = (x[:,0]*x[:,0]+3.5*x[:,1]) #x_true = x #y_true = y y_true = y_true.reshape(-1, 1) else: x_true = None y_true = None # then broadcast to all ranks x_true = MPI.COMM_WORLD.bcast(x_true, root=0) y_true = MPI.COMM_WORLD.bcast(y_true, root=0) ################################################## ################################################## ################################################## # STEP 2 # Create solution manipulators. The solution manipulator is what creates # the representations of the functions as the acyclic graph ####### SOLUTION MANIPULATOR ####### # nvars - number of independent variables # ag_size - length of the command stack # nloads - number of load operation which are required at the # start of stack - Default 1 # float_lim - (0, max) of floats which are generated - Default 10.0 # terminal_prob: probability that a new node will be a terminal -Default .1 sol_manip = AGraphCpp.AGraphCppManipulator(1, 64, nloads=2) ####### OPERATIONS ####### sol_manip.add_node_type(2) # + sol_manip.add_node_type(3) # - sol_manip.add_node_type(4) # * sol_manip.add_node_type(5) # / sol_manip.add_node_type(6) # sin sol_manip.add_node_type(7) # cos sol_manip.add_node_type(8) # exp sol_manip.add_node_type(9) # log sol_manip.add_node_type(10) # pow sol_manip.add_node_type(11) # abs sol_manip.add_node_type(12) # sqrt ####### PREDICTION MANIPULATOR ####### pred_manip = fpm(16, data_size) # STEP 3 # Create the training data from your x and y data, # and create the fitness metric. # For this example, we are using explicit (standard) ####### TRAINING DATA ####### training_data = ExplicitTrainingData(x_true, y_true) ####### FITNESS METRIC ####### explicit_regressor = StandardRegression() # STEP 4 # Create the island manager, this will run the steps on the population, and # determine when to stop running ####### ISLAND MANAGER ####### islmngr = ParallelIslandManager( #restart_file='test.p', solution_training_data=training_data, solution_manipulator=sol_manip, predictor_manipulator=pred_manip, solution_pop_size=64, fitness_metric=explicit_regressor) ####### RUN ISLAND MANAGER ####### ################################################## ##################### SINGLE ##################### ################################################## # max_steps - Max amount to go if no convergence happens # epsilon - error which defines convergence # min_steps - minimum number of steps required - Default 0 # step_increment - number of steps between convergence # checks / migration - Default 1000 # make_plots - bool whether or not to produce plots - Default True # checkpoint_file - base file name for checkpoint files if not parallel: islmngr.run_islands(max_steps, epsilon, min_steps=500, step_increment=500) ################################################## ################################################## ################################################## ################################################## #################### PARALLEL #################### ################################################## # when_update - how often rank 0 gets updated on ages - Default 10 # non_block - bool to determine to run nonblocking - Default True if parallel: islmngr.run_islands(max_steps, epsilon, min_steps=500, step_increment=500, when_update=50)
def compare_cpp_agcpp_implicit(X, Y, operator, params): """does the comparison""" X = np.hstack((X, Y.reshape([-1, 1]))) Y = None # make solution manipulator sol_manip = bingocpp.AcyclicGraphManipulator(X.shape[1], 16, nloads=2) sol_manip.add_node_type(2) sol_manip.add_node_type(3) sol_manip.add_node_type(4) sol_manip.add_node_type(5) sol_manip.add_node_type(6) sol_manip.add_node_type(7) sol_manip.add_node_type(8) sol_manip.add_node_type(9) sol_manip.add_node_type(10) sol_manip.add_node_type(11) sol_manip.add_node_type(12) # make true equation equ = sol_manip.generate() stack = np.copy(equ.stack) stack[0] = (0, 0, 0) stack[1] = (0, 1, 1) stack[2] = (0, 2, 2) stack[3] = (operator, params[0], params[1]) stack[-1] = (3, 3, 2) equ.stack = np.copy(stack) sol_manip.simplify_stack(equ) print(stack) print("equstack\n", equ.stack) # make predictor manipulator pred_manip = fpm(32, X.shape[0]) # make training data training_data = bingocpp.ImplicitTrainingData(X) # make fitness_metric explicit_regressor = bingocpp.ImplicitRegression() # make and run island manager islmngr = SerialIslandManager(N_ISLANDS, solution_training_data=training_data, solution_manipulator=sol_manip, predictor_manipulator=pred_manip, fitness_metric=explicit_regressor) epsilon = 1.05 * islmngr.isles[0].solution_fitness_true(equ) + 1.0e-10 print("EPSILON IS - ", epsilon, equ.latexstring()) converged = islmngr.run_islands(MAX_STEPS, epsilon, step_increment=N_STEPS, make_plots=False) if not converged: # try to run again if it fails islmngr = SerialIslandManager(N_ISLANDS, solution_training_data=training_data, solution_manipulator=sol_manip, predictor_manipulator=pred_manip, fitness_metric=explicit_regressor) epsilon = 1.05 * islmngr.isles[0].solution_fitness_true(equ) + 1.0e-10 print("EPSILON IS - ", epsilon, equ.latexstring()) converged = islmngr.run_islands(MAX_STEPS, epsilon, step_increment=N_STEPS, make_plots=False)
def main(max_steps, epsilon, data_size): """main function which runs regression""" comm = MPI.COMM_WORLD rank = comm.Get_rank() # load data on rank 0 if rank == 0: # make data # n_lin = int(math.pow(data_size, 1.0/3)) + 1 # x_1 = np.linspace(0, 5, n_lin) # x_2 = np.linspace(0, 5, n_lin) # x_3 = np.linspace(0, 5, n_lin) # x = np.array(np.meshgrid(x_1, x_2, x_3)).T.reshape(-1, 3) # x = x[np.random.choice(x.shape[0], data_size, replace=False), :] # make solution # y = (x[:,0]*x[:,0]+3.5*x[:,1]) # x_true = x # y_true = y x = snake_walk() y = (x[:, 0] + x[:, 1]) x_true = np.hstack((x, y.reshape([-1, 1]))) y_true = None else: x_true = None y_true = None # then broadcast to all ranks x_true = MPI.COMM_WORLD.bcast(x_true, root=0) y_true = MPI.COMM_WORLD.bcast(y_true, root=0) # make solution manipulator # sol_manip = agm(x_true.shape[1], 64, nloads=2) # sol_manip.add_node_type(AGNodes.Add) # sol_manip.add_node_type(AGNodes.Subtract) # sol_manip.add_node_type(AGNodes.Multiply) # sol_manip.add_node_type(AGNodes.Divide) # sol_manip.add_node_type(AGNodes.Exp) # sol_manip.add_node_type(AGNodes.Log) # sol_manip.add_node_type(AGNodes.Sin) # sol_manip.add_node_type(AGNodes.Cos) # sol_manip.add_node_type(AGNodes.Abs) # sol_manip.add_node_type(AGNodes.Sqrt) # make solution manipulator # y_true = y_true.reshape(-1, 1) # sol_manip2 = AGraphCpp.AGraphCppManipulator(x_true.shape[1], 64, nloads=2) sol_manip2 = bingocpp.AcyclicGraphManipulator(x_true.shape[1], 64, nloads=2) # sol_manip2 = bingocpp.AcyclicGraphManipulator(x_true.shape[1], 64, nloads=2, opt_rate=0) # sol_manip.add_node_type(2) # + # sol_manip.add_node_type(3) # - # sol_manip.add_node_type(4) # * # sol_manip.add_node_type(5) # / # sol_manip.add_node_type(6) # sin # sol_manip.add_node_type(7) # cos # sol_manip.add_node_type(8) # exp # sol_manip.add_node_type(9) # log # # sol_manip.add_node_type(10) # pow # sol_manip.add_node_type(11) # abs # sol_manip.add_node_type(12) # sqrt sol_manip2.add_node_type(2) # + sol_manip2.add_node_type(3) # - sol_manip2.add_node_type(4) # * sol_manip2.add_node_type(5) # / sol_manip2.add_node_type(6) # sin sol_manip2.add_node_type(7) # cos sol_manip2.add_node_type(8) # exp sol_manip2.add_node_type(9) # log # sol_manip2.add_node_type(10) # pow sol_manip2.add_node_type(11) # abs sol_manip2.add_node_type(12) # sqrt # make predictor manipulator pred_manip = fpm(128, data_size) # make training data # training_data = ImplicitTrainingData(x_true) training_data = bingocpp.ImplicitTrainingData(x_true) # training_data = ExplicitTrainingData(x_true, y_true) # training_data2 = bingocpp.ExplicitTrainingData(x_true, y_true) # make fitness metric # implicit_regressor = ImplicitRegression() implicit_regressor = bingocpp.ImplicitRegression() # explicit_regressor = StandardRegression(const_deriv=True) # explicit_regressor2 = bingocpp.StandardRegression() # make and run island manager islmngr = ParallelIslandManager(#restart_file='test.p', solution_training_data=training_data, solution_manipulator=sol_manip2, predictor_manipulator=pred_manip, solution_pop_size=64, fitness_metric=implicit_regressor) # fitness_metric=explicit_regressor) # islmngr2 = ParallelIslandManager(#restart_file='test.p', # solution_training_data=training_data, # solution_manipulator=sol_manip, # predictor_manipulator=pred_manip, # solution_pop_size=64, # fitness_metric=explicit_regressor) # islmngr = ParallelIslandManager(#restart_file='test.p', # data_x=x_true, data_y=y_true, # solution_manipulator=sol_manip, # predictor_manipulator=pred_manip, # solution_pop_size=64, # fitness_metric=StandardRegression) # islmngr2 = ParallelIslandManager(#restart_file='test.p', # data_x=x_true, data_y=y_true, # solution_manipulator=sol_manip, # predictor_manipulator=pred_manip, # solution_pop_size=64, # fitness_metric=StandardRegression) non_one = time.time() islmngr.run_islands(max_steps, epsilon, min_steps=500, step_increment=500, when_update=50) non_two = time.time() non_time = non_two - non_one timesN.append(non_time) agesN.append(islmngr.age)