def learn_nnet(self, num_restarts): self.learner = NeuralNetLearner() self.neural_net = self.learner.network self.dataset = self.learner.ds self.training_set, self.testing_set = self.learner.get_datasets() # Optimizer will take 2000 steps and restart, saving the best model from the restarts self.optimizer = HillClimber(self.training_set.evaluateModuleMSE, self.neural_net, minimize=True, verbose = True, numParameters = 661, maxLearningSteps = 2000, storeAllEvaluations = True) # Save best model and lowest MSE for random restarting best_model = self.neural_net min_MSE = 2147438647 for i in range(num_restarts): temp, best_estimate = self.optimizer.learn() nnet_hc_evaluations_file = open('out/nnet_hc_evaluations.csv', 'a') for item in self.optimizer._allEvaluations: nnet_hc_evaluations_file.write("%s\n" % item) nnet_hc_evaluations_file.write("Restart %d\n" % i) self.optimizer = HillClimber(self.training_set.evaluateModuleMSE, self.neural_net, minimize=True, verbose = True, numParameters = 661, maxLearningSteps = 1000, storeAllEvaluations = True) if best_estimate <= min_MSE: best_model = temp min_MSE = best_estimate self.neural_net = best_model return best_model
class HillClimbingOptimizer(): def learn_nnet(self, num_restarts): self.learner = NeuralNetLearner() self.neural_net = self.learner.network self.dataset = self.learner.ds self.training_set, self.testing_set = self.learner.get_datasets() # Optimizer will take 2000 steps and restart, saving the best model from the restarts self.optimizer = HillClimber(self.training_set.evaluateModuleMSE, self.neural_net, minimize=True, verbose = True, numParameters = 661, maxLearningSteps = 2000, storeAllEvaluations = True) # Save best model and lowest MSE for random restarting best_model = self.neural_net min_MSE = 2147438647 for i in range(num_restarts): temp, best_estimate = self.optimizer.learn() nnet_hc_evaluations_file = open('out/nnet_hc_evaluations.csv', 'a') for item in self.optimizer._allEvaluations: nnet_hc_evaluations_file.write("%s\n" % item) nnet_hc_evaluations_file.write("Restart %d\n" % i) self.optimizer = HillClimber(self.training_set.evaluateModuleMSE, self.neural_net, minimize=True, verbose = True, numParameters = 661, maxLearningSteps = 1000, storeAllEvaluations = True) if best_estimate <= min_MSE: best_model = temp min_MSE = best_estimate self.neural_net = best_model return best_model def learn_optimizationproblem(self, num_restarts, problem, fitness_function, minimize=False): # Optimizer will take 250 steps and restart, saving the best model from the restarts self.optimizer = HillClimber(fitness_function, problem, verbose = True, maxLearningSteps = 250, minimize=minimize, storeAllEvaluations = True) best_model = problem max_fitness = -2147438640 for i in range(num_restarts): print("Restart", i) temp, best_estimate = self.optimizer.learn() out_name = 'out/opt_hc_evaluations_' + problem.__class__.__name__ + '.csv' opt_hc_evaluations_file = open(out_name, 'a') for item in self.optimizer._allEvaluations: opt_hc_evaluations_file.write("%s\n" % item) opt_hc_evaluations_file.write("Restart %d\n" % i) self.optimizer = HillClimber(fitness_function, problem, verbose = True, maxLearningSteps = 250, minimize=True, storeAllEvaluations = True) if best_estimate >= max_fitness: best_model = temp max_fitness = best_estimate return best_model
def generate(start): u = Universe() def fitness(f): return u.do_simulation(f, False) l = HillClimber(fitness, start, maxEvaluations=int(sys.argv[1])) best, fitn = l.learn() print(f"fitness: {fitn}") #u.do_simulation(u.bestfaller, noStop=False) u.record.finish()
def learn_optimizationproblem(self, num_restarts, problem, fitness_function, minimize=False): # Optimizer will take 250 steps and restart, saving the best model from the restarts self.optimizer = HillClimber(fitness_function, problem, verbose = True, maxLearningSteps = 250, minimize=minimize, storeAllEvaluations = True) best_model = problem max_fitness = -2147438640 for i in range(num_restarts): print("Restart", i) temp, best_estimate = self.optimizer.learn() out_name = 'out/opt_hc_evaluations_' + problem.__class__.__name__ + '.csv' opt_hc_evaluations_file = open(out_name, 'a') for item in self.optimizer._allEvaluations: opt_hc_evaluations_file.write("%s\n" % item) opt_hc_evaluations_file.write("Restart %d\n" % i) self.optimizer = HillClimber(fitness_function, problem, verbose = True, maxLearningSteps = 250, minimize=True, storeAllEvaluations = True) if best_estimate >= max_fitness: best_model = temp max_fitness = best_estimate return best_model
# from pybrain.rl.learners.continuous.policygradients import ENAC # from pybrain.rl.agents.learning import LearningAgent from pybrain.rl.agents import OptimizationAgent from pybrain.rl.experiments import EpisodicExperiment # any episodic task task = BalanceTask() # any neural network controller net = buildNetwork(task.outdim, 1, task.indim) # any optimization algorithm to be plugged in, for example: # learner = CMAES(storeAllEvaluations = True) # or: learner = HillClimber(storeAllEvaluations = True) # in a non-optimization case the agent would be a LearningAgent: # agent = LearningAgent(net, ENAC()) # here it is an OptimizationAgent: agent = OptimizationAgent(net, learner) # the agent and task are linked in an Experiment # and everything else happens under the hood. exp = EpisodicExperiment(task, agent) exp.doEpisodes(100) print('Episodes learned from:', len(learner._allEvaluations)) n, fit = learner._bestFound() print('Best fitness found:', fit) print('with this network:')
from random import random from pybrain.structure.evolvables.evolvable import Evolvable from pybrain.optimization import HillClimber class SimpleEvo(Evolvable): def __init__(self, x): self.x = max(0, min(x, 20)) def mutate(self): self.x = max(0, min(self.x + random() - 0.3, 20)) def copy(self): return SimpleEvo(self.x) def randomize(self): self.x = 20 * random() def __repr__(self): return '<-%.2f->' % (self.x) if __name__ == "__main__": x0 = SimpleEvo(1.2) l = HillClimber(lambda x: x.x, x0, maxEvaluations=500) result = l.learn() print result
from pybrain.optimization import HillClimber, CMAES #@UnusedImport # from pybrain.rl.learners.continuous.policygradients import ENAC # from pybrain.rl.agents.learning import LearningAgent from pybrain.rl.agents import OptimizationAgent from pybrain.rl.experiments import EpisodicExperiment # any episodic task task = BalanceTask() # any neural network controller net = buildNetwork(task.outdim, 1, task.indim) # any optimization algorithm to be plugged in, for example: # learner = CMAES(storeAllEvaluations = True) # or: learner = HillClimber(storeAllEvaluations=True) # in a non-optimization case the agent would be a LearningAgent: # agent = LearningAgent(net, ENAC()) # here it is an OptimizationAgent: agent = OptimizationAgent(net, learner) # the agent and task are linked in an Experiment # and everything else happens under the hood. exp = EpisodicExperiment(task, agent) exp.doEpisodes(100) print('Episodes learned from:', len(learner._allEvaluations)) n, fit = learner._bestFound() print('Best fitness found:', fit) print('with this network:')
error = np.log2(2048 - game.max_block) print 'score', game.score print 'max block', game.max_block print 'error', error print return error hc_params = [] for m in nnet.connections.values(): for c in m: hc_params.extend(c.params) #extend concatenates 2 arrays opt = HillClimber(checknn, hc_params) opt.minimize = True opt.maxEvaluations = 100000 opt.learn() NetworkWriter.writeToFile(nnet, filename) # r = 15 # xvalues = np.arange(-r, r, 0.1) # yvalues = [nnet.activate([x]) for x in xvalues] # # plot.figure(0) # plot.plot(xvalues, yvalues) # xvalues = np.arange(-r, r, 0.1) # yvalues = [f(x) for x in xvalues] # plot.plot(xvalues, yvalues)
from pybrain.rl.environments.cartpole.balancetask import BalanceTask from pybrain.tools.shortcuts import buildNetwork from pybrain.rl.agents import OptimizationAgent from pybrain.rl.experiments import EpisodicExperiment from pybrain.optimization import HillClimber task = BalanceTask() net = buildNetwork(task.outdim, 3, task.indim) HillClimber(task, net, maxEvaluations=100).learn() agent = OptimizationAgent(net, HillClimber()) exp = EpisodicExperiment(task, agent) print(exp.doEpisodes(100))
from random import random from pybrain.structure.evolvables.evolvable import Evolvable from pybrain.optimization import HillClimber class SimpleEvo(Evolvable): def __init__(self, x): self.x = max(0, min(x, 10)) def mutate(self): self.x = max(0, min(self.x + random() - 0.3, 10)) def copy(self): return SimpleEvo(self.x) def randomize(self): self.x = 10 * random() def __repr__(self): return "<-%.2f->" + str(self.x) x0 = SimpleEvo(5) l = HillClimber(lambda x: x.x, x0, maxEvaluations=50) print l.learn()
target = PolyEvolve() target.randomize(n=nCoeffs) func = lambda x: 5 * (x ** 3) + -3 * (x ** 2) + 1 * (x ** 1) + 1 * (x ** 0) point = lambda x: (x, func(x)) data = sorted([point(random.random() * 200 - 100) for x in xrange(25)], key=itemgetter(0)) from pybrain.optimization import HillClimber seed = PolyEvolve() seed.randomize(n=nCoeffs) maxIters = 10000 L = HillClimber(lambda x: x.fitness(data), seed, maxEvaluations=maxIters) result, fitness = L.learn() fmt = '{:>12}{:>24}{:>24}' s = fmt.format('X', 'Y', 'V') fmt = '{:>12.2f}{:>24.2f}{:>24.2f}' print '-' * len(s) print s print '-' * len(s) for x, y in data: v = result.eval(x) print fmt.format(x, y, v) print '-' * len(s)
def run_experiment(): # Create the controller network HIDDEN_NODES = 4 RUNS = 2 BATCHES = 1 PRINTS = 1 EPISODES = 500 env = None start_state_net = None run_results = [] # Set up plotting tools for the experiments tools = ExTools(BATCHES, PRINTS) # Run the experiment for run in range(RUNS): if run == 0: continue # If an environment already exists, shut it down if env: env.closeSocket() # Create the environment env = create_environment() # Create the task task = Pa10MovementTask(env) # Create the neural network. Only create the network once so it retains # the same starting values for each run. if start_state_net: net = start_state_net.copy() else: # Create the initial neural network net = create_network( in_nodes=env.obsLen, hidden_nodes=HIDDEN_NODES, out_nodes=env.actLen ) start_state_net = net.copy() # Create the learning agent learner = HillClimber(storeAllEvaluations=True) agent = OptimizationAgent(net, learner) tools.agent = agent # Create the experiment experiment = EpisodicExperiment(task, agent) # Perform all episodes in the run for episode in range(EPISODES): experiment.doEpisodes(BATCHES) # Calculate results all_results = agent.learner._allEvaluations max_result = np.max(all_results) min_result = np.min(all_results) avg_result = np.sum(all_results) / len(all_results) run_results.append((run, max_result, min_result, avg_result)) # Make the results directory if it does not exist if not os.path.exists(G_RESULTS_DIR): os.mkdir(G_RESULTS_DIR) # Write all results to the results file with open(os.path.join(G_RESULTS_DIR, 'run_%d.txt' % run), 'w+') as f: # Store the calculated max, min, avg f.write('RUN, MAX, MIN, AVG\n') f.write('%d, %f, %f, %f\n' % (run, max_result, min_result, avg_result)) # Store all results from this run f.write('EPISODE, REWARD\n') for episode, result in enumerate(all_results): f.write('%d, %f\n' % (episode, result)) return
MAX_STEPS = 1200 #all_evals = [] #reward_avgs = [] # keep track of the average fitness per trial #reward_maxes = [] # keep track of the maximum fitness per trial #movement = [] # keep track of the seeker's movement #food_loc = [] # keep track of the location of the food # pybrain initialization task = ChemotaxisTask(ChemotaxisEnv(), MAX_STEPS) module = buildNetwork(2,2,2) # create a feed-forward neural network with 3 layers: 2 input neurons, 2 hidden neurons, and 2 output neurons #learner = HillClimber(minimize=True, storeAllEvaluations=True, verbose=False) #agent = OptimizationAgent(module, learner) #exp = EpisodicExperiment(task, agent) #exp.doEpisodes(MAX_TRIALS) learner = HillClimber(task, module, maxEvaluations=MAX_TRIALS, mustMinimize=True, storeAllEvaluations=True, storeAllEvaluated=True, verbose=False) learner.learn() # _allEvaluations is a list of the sum of rewards for each trial, i.e. the fitness of each trial's network # _allEvaluated is a list of the networks for each trial #for network in learner._allEvaluated: # print network.params reward_avgs = [e/MAX_STEPS for e in learner._allEvaluations] """ for i in range(0, MAX_TRIALS): exp.doInteractions(MAX_STEPS) agent.learn() print exp.agent.learner.module.params total_reward = agent.history.getSumOverSequences("reward")[0][0]