Esempio n. 1
0
def single_run_algorithm(run_args):

    # Set seeds
    np.random.seed(run_args["seed"])
    random.seed(run_args["seed"])

    runid = args.id + '-' + args.dataset + '-' + str(
        run_args["seed"]) + '-' + datetime.now().strftime("%y%m%d_%H%M")

    gp_dataloaders, test_dataloaders, input_size = DataPipeline.preprocess_data(
        X, y, run_args["seed"])

    # Hyper-parameters
    long_params = {
        "hidden_size": 100,
        "num_epochs": 100,
        "input_size": input_size,
        "learning_rate": 1e-2
    }

    short_params = {
        "hidden_size": 100,
        "num_epochs": 10,
        "input_size": input_size,
        "learning_rate": 1e-2
    }

    fitness_func = LossFunctionEvoFitness(gp_dataloaders,
                                          test_dataloaders,
                                          short_params,
                                          long_params,
                                          seed=run_args["seed"])

    sgp = SimpleGP(fitness_func,
                   functions,
                   terminals,
                   heuristics,
                   pop_size=args.pop,
                   max_generations=args.gen,
                   crossover_rate=0.33,
                   mutation_rate=0.33,
                   op_mutation_rate=0.33,
                   initialization_max_tree_height=4,
                   runid=runid,
                   seed=run_args["seed"],
                   tournament_size=7,
                   max_tree_size=31,
                   verbose=True)

    sgp.Run()

    elite = sgp.fitness_function.elite
    budget = sgp.fitness_function.evaluations
    with open('logs/elites/' + runid, 'wb') as f:
        pickle.dump(elite, f)

    ResultsPipeline.evaluate_elite(elite, runid, gp_dataloaders,
                                   test_dataloaders, long_params,
                                   run_args["seed"], budget, device)
Esempio n. 2
0
def do_experiment(experiment):
    i, (p, m, cr, mH, tSize,
        tim), (lr, initB, u, g, bIter,
               fG), X_train, X_test, y_train, y_test = experiment
    # Set fitness function
    fitness_function = SymbolicRegressionFitness(X_train, y_train)
    # Run GP
    backprop_function = Backpropagation(X_train,
                                        y_train,
                                        iters=bIter,
                                        learning_rate=lr,
                                        decayFunction=Backpropagation.NoDecay)
    sgp = SimpleGP(fitness_function,
                   backprop_function,
                   functions,
                   terminals,
                   pop_size=p,
                   mutation_rate=m,
                   crossover_rate=cr,
                   initialization_max_tree_height=mH,
                   tournament_size=tSize,
                   max_time=tim,
                   uniform_k=u,
                   backprop_selection_ratio=1,
                   backprop_every_generations=g,
                   initialBackprop=initB,
                   first_generations=fG)
    _, _, _, runtime = sgp.Run(applyBackProp=True,
                               iterationNum=i,
                               dirName=dir_name)

    # Print results
    with open(sgp.dirName + "/" + sgp.logName, "a") as fp:

        # Show the evolved function
        final_evolved_function = fitness_function.elite
        nodes_final_evolved_function = final_evolved_function.GetSubtree()
        fp.write('Function found (' + str(len(nodes_final_evolved_function)) +
                 'nodes ):\n\t' + str(nodes_final_evolved_function) + "\n")
        # Print results for training set
        fp.write('Training\n\tMSE:' +
                 str(np.round(final_evolved_function.fitness, 3)) +
                 '\n\tRsquared:' + str(
                     np.round(
                         1.0 - final_evolved_function.fitness /
                         np.var(y_train), 3)) + "\n")
        # Re-evaluate the evolved function on the test set
        test_prediction = final_evolved_function.GetOutput(X_test)
        test_mse = np.mean(np.square(y_test - test_prediction))
        fp.write('Test:\n\tMSE:' + str(np.round(test_mse, 3)) +
                 '\n\tRsquared:' +
                 str(np.round(1.0 - test_mse / np.var(y_test), 3)) + "\n")
        fp.write(runtime)
Esempio n. 3
0
def do_experiment(experiment):
    (i, train_index, test_index), (p, m, cr, mH, tSize, tim), _ = experiment
    # Cross validation
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # Set fitness function
    fitness_function = SymbolicRegressionFitness(X_train, y_train)
    # Run GP
    backprop_function = Backpropagation(X_train,
                                        y_train,
                                        iters=5,
                                        learning_rate=0.001,
                                        decayFunction=Backpropagation.NoDecay)
    sgp = SimpleGP(fitness_function,
                   backprop_function,
                   functions,
                   terminals,
                   pop_size=p,
                   mutation_rate=m,
                   crossover_rate=cr,
                   initialization_max_tree_height=mH,
                   tournament_size=tSize,
                   max_time=tim)  # other parameters are optional
    _, _, _, runtime = sgp.Run(applyBackProp=False, iterationNum=i)

    # Print results
    with open(sgp.dirName + "/" + sgp.logName, "a") as fp:

        # Show the evolved function
        final_evolved_function = fitness_function.elite
        nodes_final_evolved_function = final_evolved_function.GetSubtree()
        fp.write('Function found (' + str(len(nodes_final_evolved_function)) +
                 'nodes ):\n\t' + str(nodes_final_evolved_function) + "\n")
        # Print results for training set
        fp.write('Training\n\tMSE:' +
                 str(np.round(final_evolved_function.fitness, 3)) +
                 '\n\tRsquared:' + str(
                     np.round(
                         1.0 - final_evolved_function.fitness /
                         np.var(y_train), 3)) + "\n")
        # Re-evaluate the evolved function on the test set
        test_prediction = final_evolved_function.GetOutput(X_test)
        test_mse = np.mean(np.square(y_test - test_prediction))
        fp.write('Test:\n\tMSE:' + str(np.round(test_mse, 3)) +
                 '\n\tRsquared:' +
                 str(np.round(1.0 - test_mse / np.var(y_test), 3)) + "\n")
        fp.write(runtime)
Esempio n. 4
0
def run_with_population(pop_size):
    # Set functions and terminals
    functions = [AddNode(), SubNode(), MulNode(), AnalyticQuotientNode()]  # chosen function nodes
    terminals = [EphemeralRandomConstantNode()]  # use one ephemeral random constant node

    # Run GP
    tuner = Tuner()
    sgp = SimpleGP(tuner=tuner, functions=functions, pop_size=pop_size, max_generations=100)

    CrossValidation(sgp, terminals).validate()
Esempio n. 5
0
    def fit(self, X, y):

        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        self.X_ = X
        self.y_ = y

        fitness_function = SymbolicRegressionFitness(X, y,
                                                     self.use_linear_scaling)

        terminals = []
        if self.use_erc:
            terminals.append(EphemeralRandomConstantNode())
        n_features = X.shape[1]
        for i in range(n_features):
            terminals.append(FeatureNode(i))

        sgp = SimpleGP(
            fitness_function,
            self.functions,
            terminals,
            pop_size=self.pop_size,
            max_generations=self.max_generations,
            max_time=self.max_time,
            max_evaluations=self.max_evaluations,
            crossover_rate=self.crossover_rate,
            mutation_rate=self.mutation_rate,
            min_height=self.min_height,
            initialization_max_tree_height=self.initialization_max_tree_height,
            max_tree_size=self.max_tree_size,
            max_features=self.max_features,
            tournament_size=self.tournament_size,
            verbose=self.verbose)

        sgp.Run()

        self.gp_ = sgp

        return self
Esempio n. 6
0
def run_with_range(range_settings):
    # Set functions and terminals
    functions = [AddNode(), SubNode(), MulNode(), AnalyticQuotientNode()]  # chosen function nodes
    terminals = [EphemeralRandomConstantNode()]  # use one ephemeral random constant node

    # Run GP
    tuner = Tuner(
        scale_range=(range_settings[0], range_settings[1]),
        translation_range=(range_settings[0], range_settings[1]),
        run_generations=(range(0, 20))
    )
    sgp = SimpleGP(tuner=tuner, functions=functions, pop_size=100, max_generations=20)

    CrossValidation(sgp, terminals).validate()
Esempio n. 7
0
def run_in_gen(ls, run_gen):
    # Set functions and terminals
    functions = [AddNode(), SubNode(), MulNode(), AnalyticQuotientNode()]  # chosen function nodes
    terminals = [EphemeralRandomConstantNode()]  # use one ephemeral random constant node

    # Run GP
    tuner = Tuner(
        scale_range=(-5, 5),
        translation_range=(-5, 5),
        run_generations=(run_gen)
    )
    sgp = SimpleGP(
        linear_scale=ls,
        tuner=tuner,
        functions=functions,
        pop_size=100,
        max_generations=100
    )

    CrossValidation(sgp, terminals).validate()
Esempio n. 8
0
                                                        random_state=seed_no)
    # Set fitness function
    fitness_function = SymbolicRegressionFitness(X_train, y_train)

    # Set functions and terminals
    functions = [AddNode(),
                 SubNode(),
                 MulNode(),
                 AnalyticQuotientNode()]  # chosen function nodes
    terminals = [EphemeralRandomConstantNode()
                 ]  # use one ephemeral random constant node
    for i in range(X.shape[1]):
        terminals.append(FeatureNode(i))  # add a feature node for each feature

    # Run GP
    sgp = SimpleGP(fitness_function, functions,
                   terminals)  # other parameters are optional
    spreadsheet_string = sgp.Run()

    # Print results
    # Show the evolved function
    final_evolved_function = fitness_function.elite
    nodes_final_evolved_function = final_evolved_function.GetSubtree()
    print('Function found (', len(nodes_final_evolved_function),
          'nodes ):\n\t',
          nodes_final_evolved_function)  # this is in Polish notation
    # Print results for training set
    training_MSE = np.round(final_evolved_function.fitness, 3)
    training_Rsquared = np.round(
        1.0 - final_evolved_function.fitness / np.var(y_train), 3)
    print('Training\n\tMSE:', training_MSE, '\n\tRsquared:', training_Rsquared)
    # Re-evaluate the evolved function on the test set
Esempio n. 9
0
populationSizes = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]
mutationRates = [0, 0.001, 0.01, 0.1]
crossoverRates = [0.1, 0.25, 0.5, 0.75, 1]
maxHeights = [2, 4, 8]
tourSize = [2, 4, 8]
#maxNumEval = [5000, 10000]
maxTime = [5, 10, 15, 20, 25, 30]
numRep = 10 # number of repetitions


# Set fitness function
fitness_function = SymbolicRegressionFitness( X_train, y_train )
# Run GP
backprop_function = Backpropagation( X_train, y_train, iters=3, learning_rate=0.5, decayFunction = Backpropagation.NoDecay, override_iterations = 50)
sgp = SimpleGP(fitness_function, backprop_function, functions, terminals, pop_size = 250, max_time = 30, backprop_selection_ratio = 1, backprop_every_generations = 1)	# other parameters are optional
sgp.Run(applyBackProp=True)

# Print results
# Show the evolved function
final_evolved_function = fitness_function.elite
nodes_final_evolved_function = final_evolved_function.GetSubtree()
print ('Function found (',len(nodes_final_evolved_function),'nodes ):\n\t', nodes_final_evolved_function) # this is in Polish notation
# Print results for training set
print ('Training\n\tMSE:', np.round(final_evolved_function.fitness,3),
	'\n\tRsquared:', np.round(1.0 - final_evolved_function.fitness / np.var(y_train),3))
# Re-evaluate the evolved function on the test set
test_prediction = final_evolved_function.GetOutput( X_test )
test_mse = np.mean(np.square( y_test - test_prediction ))
print ('Test:\n\tMSE:', np.round( test_mse, 3),
	'\n\tRsquared:', np.round(1.0 - test_mse / np.var(y_test),3))
Esempio n. 10
0
        print(
            f"Running experiment {counter}/{total_experiments} with lr={lr}, iters={steps}"
        )
        for i in range(
                10):  # Run each experiment 10 times, because of stochasticity
            print(f"Running tests {i+1}/10")
            backprop_function = Backpropagation(X_train,
                                                y_train,
                                                iters=steps,
                                                learning_rate=lr)
            sgp = SimpleGP(fitness_function,
                           backprop_function,
                           functions,
                           terminals,
                           pop_size=pop_size,
                           max_generations=100,
                           mutation_rate=mut_rate,
                           crossover_rate=cross_rate,
                           initialization_max_tree_height=max_height,
                           max_time=max_time,
                           tournament_size=tour_size)
            _, _, _, runtime = sgp.Run(applyBackProp=True)

            # Log results
            nodes_final_evolved_function = final_evolved_function.GetSubtree()
            test_prediction = final_evolved_function.GetOutput(X_test)

            train_mse = final_evolved_function.fitness
            test_mse = np.mean(np.square(y_test - test_prediction))
            evals = fitness_function.evaluations
Esempio n. 11
0
fitness_function = SymbolicRegressionFitness(X_train, y_train)

# Set functions and terminals
functions = [AddNode(),
             SubNode(),
             MulNode(),
             AnalyticQuotientNode()]  # chosen function nodes
terminals = [EphemeralRandomConstantNode()
             ]  # use one ephemeral random constant node
for i in range(X.shape[1]):
    terminals.append(FeatureNode(i))  # add a feature node for each feature

# Run GP
sgp = SimpleGP(fitness_function,
               functions,
               terminals,
               pop_size=100,
               max_generations=100)  # other parameters are optional
sgp.Run()

# Print results
# Show the evolved function
final_evolved_function = fitness_function.elite
nodes_final_evolved_function = final_evolved_function.GetSubtree()
print('Function found (', len(nodes_final_evolved_function), 'nodes ):\n\t',
      nodes_final_evolved_function)  # this is in Polish notation
# Print results for training set
print('Training\n\tMSE:', np.round(final_evolved_function.fitness, 3),
      '\n\tRsquared:',
      np.round(1.0 - final_evolved_function.fitness / np.var(y_train), 3))
# Re-evaluate the evolved function on the test set
Esempio n. 12
0
                 SubNode(),
                 MulNode(),
                 AnalyticQuotientNode()]  # chosen function nodes
    terminals = [EphemeralRandomConstantNode()
                 ]  # use one ephemeral random constant node
    for i in range(X.shape[1]):
        terminals.append(FeatureNode(i))  # add a feature node for each feature

    # Run GP
    sgp = SimpleGP(
        fitness_function,
        functions,
        terminals,
        pop_size=GP_POP_SIZE,
        max_generations=GP_MAX_GENERATIONS,
        crossover_rate=GP_CROSSOVER_RATE,
        mutation_rate=GP_MUTATION_RATE,
        weight_tuning_individual_rate=WEIGHT_TUNING_INDIVIDUAL_RATE,
        weight_tuning_generation_rate=WEIGHT_TUNING_GENERATION_RATE,
        weight_tuning_max_generations=WEIGHT_TUNING_MAX_GENERATIONS,
        real_pop_size=REAL_POP_SIZE,
        real_crossover_rate=REAL_CROSSOVER_RATE,
        real_mutation_rate=REAL_MUTATION_RATE)  # other parameters are optional
    sgp.Run()

    # Print results
    # Show the evolved function
    final_evolved_function = fitness_function.elite
    nodes_final_evolved_function = final_evolved_function.GetSubtree()
    print('Function found (', len(nodes_final_evolved_function),
          'nodes ):\n\t', final_evolved_function)  # this is in Polish notation