def single_run_algorithm(run_args): # Set seeds np.random.seed(run_args["seed"]) random.seed(run_args["seed"]) runid = args.id + '-' + args.dataset + '-' + str( run_args["seed"]) + '-' + datetime.now().strftime("%y%m%d_%H%M") gp_dataloaders, test_dataloaders, input_size = DataPipeline.preprocess_data( X, y, run_args["seed"]) # Hyper-parameters long_params = { "hidden_size": 100, "num_epochs": 100, "input_size": input_size, "learning_rate": 1e-2 } short_params = { "hidden_size": 100, "num_epochs": 10, "input_size": input_size, "learning_rate": 1e-2 } fitness_func = LossFunctionEvoFitness(gp_dataloaders, test_dataloaders, short_params, long_params, seed=run_args["seed"]) sgp = SimpleGP(fitness_func, functions, terminals, heuristics, pop_size=args.pop, max_generations=args.gen, crossover_rate=0.33, mutation_rate=0.33, op_mutation_rate=0.33, initialization_max_tree_height=4, runid=runid, seed=run_args["seed"], tournament_size=7, max_tree_size=31, verbose=True) sgp.Run() elite = sgp.fitness_function.elite budget = sgp.fitness_function.evaluations with open('logs/elites/' + runid, 'wb') as f: pickle.dump(elite, f) ResultsPipeline.evaluate_elite(elite, runid, gp_dataloaders, test_dataloaders, long_params, run_args["seed"], budget, device)
def do_experiment(experiment): i, (p, m, cr, mH, tSize, tim), (lr, initB, u, g, bIter, fG), X_train, X_test, y_train, y_test = experiment # Set fitness function fitness_function = SymbolicRegressionFitness(X_train, y_train) # Run GP backprop_function = Backpropagation(X_train, y_train, iters=bIter, learning_rate=lr, decayFunction=Backpropagation.NoDecay) sgp = SimpleGP(fitness_function, backprop_function, functions, terminals, pop_size=p, mutation_rate=m, crossover_rate=cr, initialization_max_tree_height=mH, tournament_size=tSize, max_time=tim, uniform_k=u, backprop_selection_ratio=1, backprop_every_generations=g, initialBackprop=initB, first_generations=fG) _, _, _, runtime = sgp.Run(applyBackProp=True, iterationNum=i, dirName=dir_name) # Print results with open(sgp.dirName + "/" + sgp.logName, "a") as fp: # Show the evolved function final_evolved_function = fitness_function.elite nodes_final_evolved_function = final_evolved_function.GetSubtree() fp.write('Function found (' + str(len(nodes_final_evolved_function)) + 'nodes ):\n\t' + str(nodes_final_evolved_function) + "\n") # Print results for training set fp.write('Training\n\tMSE:' + str(np.round(final_evolved_function.fitness, 3)) + '\n\tRsquared:' + str( np.round( 1.0 - final_evolved_function.fitness / np.var(y_train), 3)) + "\n") # Re-evaluate the evolved function on the test set test_prediction = final_evolved_function.GetOutput(X_test) test_mse = np.mean(np.square(y_test - test_prediction)) fp.write('Test:\n\tMSE:' + str(np.round(test_mse, 3)) + '\n\tRsquared:' + str(np.round(1.0 - test_mse / np.var(y_test), 3)) + "\n") fp.write(runtime)
def do_experiment(experiment): (i, train_index, test_index), (p, m, cr, mH, tSize, tim), _ = experiment # Cross validation X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] # Set fitness function fitness_function = SymbolicRegressionFitness(X_train, y_train) # Run GP backprop_function = Backpropagation(X_train, y_train, iters=5, learning_rate=0.001, decayFunction=Backpropagation.NoDecay) sgp = SimpleGP(fitness_function, backprop_function, functions, terminals, pop_size=p, mutation_rate=m, crossover_rate=cr, initialization_max_tree_height=mH, tournament_size=tSize, max_time=tim) # other parameters are optional _, _, _, runtime = sgp.Run(applyBackProp=False, iterationNum=i) # Print results with open(sgp.dirName + "/" + sgp.logName, "a") as fp: # Show the evolved function final_evolved_function = fitness_function.elite nodes_final_evolved_function = final_evolved_function.GetSubtree() fp.write('Function found (' + str(len(nodes_final_evolved_function)) + 'nodes ):\n\t' + str(nodes_final_evolved_function) + "\n") # Print results for training set fp.write('Training\n\tMSE:' + str(np.round(final_evolved_function.fitness, 3)) + '\n\tRsquared:' + str( np.round( 1.0 - final_evolved_function.fitness / np.var(y_train), 3)) + "\n") # Re-evaluate the evolved function on the test set test_prediction = final_evolved_function.GetOutput(X_test) test_mse = np.mean(np.square(y_test - test_prediction)) fp.write('Test:\n\tMSE:' + str(np.round(test_mse, 3)) + '\n\tRsquared:' + str(np.round(1.0 - test_mse / np.var(y_test), 3)) + "\n") fp.write(runtime)
def run_with_population(pop_size): # Set functions and terminals functions = [AddNode(), SubNode(), MulNode(), AnalyticQuotientNode()] # chosen function nodes terminals = [EphemeralRandomConstantNode()] # use one ephemeral random constant node # Run GP tuner = Tuner() sgp = SimpleGP(tuner=tuner, functions=functions, pop_size=pop_size, max_generations=100) CrossValidation(sgp, terminals).validate()
def fit(self, X, y): # Check that X and y have correct shape X, y = check_X_y(X, y) self.X_ = X self.y_ = y fitness_function = SymbolicRegressionFitness(X, y, self.use_linear_scaling) terminals = [] if self.use_erc: terminals.append(EphemeralRandomConstantNode()) n_features = X.shape[1] for i in range(n_features): terminals.append(FeatureNode(i)) sgp = SimpleGP( fitness_function, self.functions, terminals, pop_size=self.pop_size, max_generations=self.max_generations, max_time=self.max_time, max_evaluations=self.max_evaluations, crossover_rate=self.crossover_rate, mutation_rate=self.mutation_rate, min_height=self.min_height, initialization_max_tree_height=self.initialization_max_tree_height, max_tree_size=self.max_tree_size, max_features=self.max_features, tournament_size=self.tournament_size, verbose=self.verbose) sgp.Run() self.gp_ = sgp return self
def run_with_range(range_settings): # Set functions and terminals functions = [AddNode(), SubNode(), MulNode(), AnalyticQuotientNode()] # chosen function nodes terminals = [EphemeralRandomConstantNode()] # use one ephemeral random constant node # Run GP tuner = Tuner( scale_range=(range_settings[0], range_settings[1]), translation_range=(range_settings[0], range_settings[1]), run_generations=(range(0, 20)) ) sgp = SimpleGP(tuner=tuner, functions=functions, pop_size=100, max_generations=20) CrossValidation(sgp, terminals).validate()
def run_in_gen(ls, run_gen): # Set functions and terminals functions = [AddNode(), SubNode(), MulNode(), AnalyticQuotientNode()] # chosen function nodes terminals = [EphemeralRandomConstantNode()] # use one ephemeral random constant node # Run GP tuner = Tuner( scale_range=(-5, 5), translation_range=(-5, 5), run_generations=(run_gen) ) sgp = SimpleGP( linear_scale=ls, tuner=tuner, functions=functions, pop_size=100, max_generations=100 ) CrossValidation(sgp, terminals).validate()
random_state=seed_no) # Set fitness function fitness_function = SymbolicRegressionFitness(X_train, y_train) # Set functions and terminals functions = [AddNode(), SubNode(), MulNode(), AnalyticQuotientNode()] # chosen function nodes terminals = [EphemeralRandomConstantNode() ] # use one ephemeral random constant node for i in range(X.shape[1]): terminals.append(FeatureNode(i)) # add a feature node for each feature # Run GP sgp = SimpleGP(fitness_function, functions, terminals) # other parameters are optional spreadsheet_string = sgp.Run() # Print results # Show the evolved function final_evolved_function = fitness_function.elite nodes_final_evolved_function = final_evolved_function.GetSubtree() print('Function found (', len(nodes_final_evolved_function), 'nodes ):\n\t', nodes_final_evolved_function) # this is in Polish notation # Print results for training set training_MSE = np.round(final_evolved_function.fitness, 3) training_Rsquared = np.round( 1.0 - final_evolved_function.fitness / np.var(y_train), 3) print('Training\n\tMSE:', training_MSE, '\n\tRsquared:', training_Rsquared) # Re-evaluate the evolved function on the test set
populationSizes = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192] mutationRates = [0, 0.001, 0.01, 0.1] crossoverRates = [0.1, 0.25, 0.5, 0.75, 1] maxHeights = [2, 4, 8] tourSize = [2, 4, 8] #maxNumEval = [5000, 10000] maxTime = [5, 10, 15, 20, 25, 30] numRep = 10 # number of repetitions # Set fitness function fitness_function = SymbolicRegressionFitness( X_train, y_train ) # Run GP backprop_function = Backpropagation( X_train, y_train, iters=3, learning_rate=0.5, decayFunction = Backpropagation.NoDecay, override_iterations = 50) sgp = SimpleGP(fitness_function, backprop_function, functions, terminals, pop_size = 250, max_time = 30, backprop_selection_ratio = 1, backprop_every_generations = 1) # other parameters are optional sgp.Run(applyBackProp=True) # Print results # Show the evolved function final_evolved_function = fitness_function.elite nodes_final_evolved_function = final_evolved_function.GetSubtree() print ('Function found (',len(nodes_final_evolved_function),'nodes ):\n\t', nodes_final_evolved_function) # this is in Polish notation # Print results for training set print ('Training\n\tMSE:', np.round(final_evolved_function.fitness,3), '\n\tRsquared:', np.round(1.0 - final_evolved_function.fitness / np.var(y_train),3)) # Re-evaluate the evolved function on the test set test_prediction = final_evolved_function.GetOutput( X_test ) test_mse = np.mean(np.square( y_test - test_prediction )) print ('Test:\n\tMSE:', np.round( test_mse, 3), '\n\tRsquared:', np.round(1.0 - test_mse / np.var(y_test),3))
print( f"Running experiment {counter}/{total_experiments} with lr={lr}, iters={steps}" ) for i in range( 10): # Run each experiment 10 times, because of stochasticity print(f"Running tests {i+1}/10") backprop_function = Backpropagation(X_train, y_train, iters=steps, learning_rate=lr) sgp = SimpleGP(fitness_function, backprop_function, functions, terminals, pop_size=pop_size, max_generations=100, mutation_rate=mut_rate, crossover_rate=cross_rate, initialization_max_tree_height=max_height, max_time=max_time, tournament_size=tour_size) _, _, _, runtime = sgp.Run(applyBackProp=True) # Log results nodes_final_evolved_function = final_evolved_function.GetSubtree() test_prediction = final_evolved_function.GetOutput(X_test) train_mse = final_evolved_function.fitness test_mse = np.mean(np.square(y_test - test_prediction)) evals = fitness_function.evaluations
fitness_function = SymbolicRegressionFitness(X_train, y_train) # Set functions and terminals functions = [AddNode(), SubNode(), MulNode(), AnalyticQuotientNode()] # chosen function nodes terminals = [EphemeralRandomConstantNode() ] # use one ephemeral random constant node for i in range(X.shape[1]): terminals.append(FeatureNode(i)) # add a feature node for each feature # Run GP sgp = SimpleGP(fitness_function, functions, terminals, pop_size=100, max_generations=100) # other parameters are optional sgp.Run() # Print results # Show the evolved function final_evolved_function = fitness_function.elite nodes_final_evolved_function = final_evolved_function.GetSubtree() print('Function found (', len(nodes_final_evolved_function), 'nodes ):\n\t', nodes_final_evolved_function) # this is in Polish notation # Print results for training set print('Training\n\tMSE:', np.round(final_evolved_function.fitness, 3), '\n\tRsquared:', np.round(1.0 - final_evolved_function.fitness / np.var(y_train), 3)) # Re-evaluate the evolved function on the test set
SubNode(), MulNode(), AnalyticQuotientNode()] # chosen function nodes terminals = [EphemeralRandomConstantNode() ] # use one ephemeral random constant node for i in range(X.shape[1]): terminals.append(FeatureNode(i)) # add a feature node for each feature # Run GP sgp = SimpleGP( fitness_function, functions, terminals, pop_size=GP_POP_SIZE, max_generations=GP_MAX_GENERATIONS, crossover_rate=GP_CROSSOVER_RATE, mutation_rate=GP_MUTATION_RATE, weight_tuning_individual_rate=WEIGHT_TUNING_INDIVIDUAL_RATE, weight_tuning_generation_rate=WEIGHT_TUNING_GENERATION_RATE, weight_tuning_max_generations=WEIGHT_TUNING_MAX_GENERATIONS, real_pop_size=REAL_POP_SIZE, real_crossover_rate=REAL_CROSSOVER_RATE, real_mutation_rate=REAL_MUTATION_RATE) # other parameters are optional sgp.Run() # Print results # Show the evolved function final_evolved_function = fitness_function.elite nodes_final_evolved_function = final_evolved_function.GetSubtree() print('Function found (', len(nodes_final_evolved_function), 'nodes ):\n\t', final_evolved_function) # this is in Polish notation