def train(self, episodes, maxSteps): avgReward = 0 # set up environment and task self.env = InfoMaxEnv(self.objectNames, self.actionNames, self.numCategories) self.task = InfoMaxTask(self.env, maxSteps=maxSteps, \ do_decay_beliefs = True, uniformInitialBeliefs = True) # create neural net and learning agent self.params = buildNetwork(self.task.outdim, self.task.indim, \ bias=True, outclass=SoftmaxLayer) if self._PGPE: self.agent = OptimizationAgent(self.params, PGPE(minimize=False, verbose=False)) elif self._CMAES: self.agent = OptimizationAgent( self.params, CMAES(minimize=False, verbose=False)) # init and perform experiment exp = EpisodicExperiment(self.task, self.agent) for i in range(episodes): exp.doEpisodes(1) avgReward += self.task.getTotalReward() print "reward episode ", i, self.task.getTotalReward() # print initial info print "\naverage reward over training = ", avgReward / episodes # save trained network self._saveWeights()
def simulate(self, random_state): """ Simulates agent behavior in 'n_sim' episodes. """ logger.debug("Simulating user actions ({} episodes)".format( self.rl_params.n_simulation_episodes)) self.experiment = EpisodicExperiment(self.task, self.agent) # set training flag off self.task.env.training = False # deactivate learning for experiment self.agent.learning = False # deactivate exploration explorer = self.agent.learner.explorer self.agent.learner.explorer = EGreedyExplorer( epsilon=0, decay=1, random_state=random_state) self.agent.learner.explorer.module = self.agent.module # activate logging self.task.env.start_logging() # simulate behavior self.experiment.doEpisodes(self.rl_params.n_simulation_episodes) # store log data dataset = self.task.env.log # deactivate logging self.task.env.end_logging() # reactivate exploration self.agent.learner.explorer = explorer # reactivate learning for experiment self.agent.learning = True # set training flag back on self.task.env.training = True return dataset
def someEpisodes(game_env, net, discountFactor=0.99, maxSteps=100, avgOver=1, returnEvents=False, exploretoo=True): """ Return the fitness value for one episode of play, given the policy defined by a neural network. """ import pdb pdb.set_trace() task = GameTask(game_env) game_env.recordingEnabled = True game_env.reset() net.reset() task.maxSteps=maxSteps agent = LearningAgent(net) agent.learning = False agent.logging = False exper = EpisodicExperiment(task, agent) fitness = 0 for _ in range(avgOver): rs = exper.doEpisodes(1) # add a slight bonus for more exploration, if rewards are identical if exploretoo: fitness += len(set(game_env._allEvents)) * 1e-6 # the true, discounted reward fitness += sum([sum([v*discountFactor**step for step, v in enumerate(r)]) for r in rs]) fitness /= avgOver if returnEvents: return fitness, game_env._allEvents else: return fitness
def _train_model(self): """ Uses reinforcement learning to find the optimal strategy """ self.experiment = EpisodicExperiment(self.task, self.agent) n_epochs = int(self.rl_params.n_training_episodes / self.rl_params.n_episodes_per_epoch) logger.debug( "Fitting user model over {} epochs, each {} episodes, total {} episodes." .format(n_epochs, self.rl_params.n_episodes_per_epoch, n_epochs * self.rl_params.n_episodes_per_epoch)) for i in range(n_epochs): logger.debug("RL epoch {}".format(i)) self.experiment.doEpisodes(self.rl_params.n_episodes_per_epoch) self.agent.learn() self.agent.reset() # reset buffers
def main(): client_id = Utils.connectToVREP() # Define RL elements environment = StandingUpEnvironment(client_id) task = StandingUpTask(environment) controller = MyActionValueTable() learner = Q(0.5, 0.9) learner.explorer = EpsilonGreedyExplorer(0.15, 1) # EpsilonGreedyBoltzmannExplorer() agent = LearningAgent(controller, learner) experiment = EpisodicExperiment(task, agent) controller.initialize(agent) i = 0 try: while True: i += 1 print('Episode ' + str(i)) experiment.doEpisodes() agent.learn() agent.reset() print('mean: '+str(numpy.mean(controller.params))) print('max: '+str(numpy.max(controller.params))) print('min: '+str(numpy.min(controller.params))) if i % 500 == 0: # Save q-table every 500 episodes print('Save q-table') controller.save() task.t_table.save() except (KeyboardInterrupt, SystemExit): with open('../data/standing-up-q.pkl', 'wb') as handle: pickle.dump(controller.params, handle) task.t_table.save() controller.save() vrep.simxFinish(client_id)
def main(): vrep.simxFinish(-1) # just in case, close all opened connections client_id = vrep.simxStart('127.0.0.1', 19997, True, True, 5000, 5) # Connect to V-REP if client_id < 0: print('Failed connecting to remote API server') return -1 print('Connected to remote API server') # Define RL elements environment = StandingUpEnvironment(client_id) task = StandingUpTask(environment) controller = ActionValueTable(task.get_state_space_size(), task.get_action_space_size()) controller.initialize(1.) file = open('standing-up-q.pkl', 'rb') controller._params = pickle.load(file) file.close() # learner = Q() agent = LearningAgent(controller) experiment = EpisodicExperiment(task, agent) i = 0 while True: i += 1 print('Iteration n° ' + str(i)) experiment.doEpisodes(1) vrep.simxFinish(client_id)
for runs in range(numbExp): # create environment #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560) if env != None: env.closeSocket() env = ShipSteeringEnvironment() # create task task = GoNorthwardTask(env, maxsteps=500) # create controller network net = buildNetwork(task.outdim, task.indim, outclass=TanhLayer) # create agent with controller and learner (and its options) agent = OptimizationAgent( net, PGPE(learningRate=0.3, sigmaLearningRate=0.15, momentum=0.0, epsilon=2.0, rprop=False, storeAllEvaluations=True)) et.agent = agent #create experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) et.addExps() et.showExps() #To view what the simulation is doing at the moment set the environment with True, go to pybrain/rl/environments/ode/ and start viewer.py (python-openGL musst be installed, see PyBrain documentation)
def main(): if len(sys.argv) != 2: print 'Please provide a path to a model data directory.' print ('The script will load the newest model data from the directory,' 'then continue to improve that model') sys.exit(0) model_directory = sys.argv[1] existing_models = sorted(glob(os.path.join(model_directory, '*.rlmdl'))) if existing_models: newest_model_name = existing_models[-1] iteration_count = int(newest_model_name[-12:-6]) + 1 print 'Loading model {}'.format(newest_model_name) newest_model = open(newest_model_name, 'r') agent = pickle.load(newest_model) else: net = buildNetwork(Environment.outdim, Environment.outdim + Environment.indim, Environment.indim) agent = OptimizationAgent(net, PGPE()) iteration_count = 1 environment = Environment(LOCAL_HOST, PORT, PATH_TO_SCENE) task = Task(environment) experiment = EpisodicExperiment(task, agent) def signal_handler(signal, frame): print 'Exiting gracefully' environment.teardown() sys.exit(0) signal.signal(signal.SIGINT, signal_handler) while True: time.sleep(1) print '>>>>> Running iteration {}'.format(iteration_count) # NOTE this weird stuff is hacky, but we need it to plug in our autosave # stuff properly. Took a long time to figure this out. experiment.optimizer.maxEvaluations = experiment.optimizer.numEvaluations + experiment.optimizer.batchSize try: experiment.doEpisodes() except Exception as e: print 'ERROR RUNNING SIMULATION: \n{}'.format(e) environment.teardown() else: if iteration_count % AUTOSAVE_INTERVAL == 0: filename = str(iteration_count).zfill(6) + '.rlmdl' filename = os.path.join(model_directory, filename) f = open(filename, 'w+') print 'Saving model to {}'.format(filename) pickle.dump(agent, f) iteration_count += 1 print 'Iteration finished <<<<<'
from environments.continous_maze_discrete import CTS_Maze from tasks.CTS_TASK import CTS_MazeTask from pybrain.rl.experiments import EpisodicExperiment from learners.baseline_learner import GP_SARSA from agents.baseline_agent import GPSARSA_Agent env = CTS_Maze([0.40, 0.40]) #goal task = CTS_MazeTask(env) learner = GP_SARSA(gamma=0.95) learner.batchMode = False #extra , not in use , set to True for batch learning agent = GPSARSA_Agent(learner) agent.logging = True exp = EpisodicExperiment( task, agent) #epsilon greedy exploration (with and without use of uncertainity) plt.ion() i = 1000 performance = [] #reward accumulation, dump variable for any evaluation metric sum = [] agent.reset() i = 0 for num_exp in range(100): performance = exp.doEpisodes(1) sum = np.append(sum, np.sum(performance)) if (num_exp % 10 == 0): agent.init_exploration -= agent.init_exploration * 0.10
def run_experiment(): # Create the controller network HIDDEN_NODES = 4 RUNS = 2 BATCHES = 1 PRINTS = 1 EPISODES = 500 env = None start_state_net = None run_results = [] # Set up plotting tools for the experiments tools = ExTools(BATCHES, PRINTS) # Run the experiment for run in range(RUNS): if run == 0: continue # If an environment already exists, shut it down if env: env.closeSocket() # Create the environment env = create_environment() # Create the task task = Pa10MovementTask(env) # Create the neural network. Only create the network once so it retains # the same starting values for each run. if start_state_net: net = start_state_net.copy() else: # Create the initial neural network net = create_network( in_nodes=env.obsLen, hidden_nodes=HIDDEN_NODES, out_nodes=env.actLen ) start_state_net = net.copy() # Create the learning agent learner = HillClimber(storeAllEvaluations=True) agent = OptimizationAgent(net, learner) tools.agent = agent # Create the experiment experiment = EpisodicExperiment(task, agent) # Perform all episodes in the run for episode in range(EPISODES): experiment.doEpisodes(BATCHES) # Calculate results all_results = agent.learner._allEvaluations max_result = np.max(all_results) min_result = np.min(all_results) avg_result = np.sum(all_results) / len(all_results) run_results.append((run, max_result, min_result, avg_result)) # Make the results directory if it does not exist if not os.path.exists(G_RESULTS_DIR): os.mkdir(G_RESULTS_DIR) # Write all results to the results file with open(os.path.join(G_RESULTS_DIR, 'run_%d.txt' % run), 'w+') as f: # Store the calculated max, min, avg f.write('RUN, MAX, MIN, AVG\n') f.write('%d, %f, %f, %f\n' % (run, max_result, min_result, avg_result)) # Store all results from this run f.write('EPISODE, REWARD\n') for episode, result in enumerate(all_results): f.write('%d, %f\n' % (episode, result)) return
from pybrain_extension.environment.morse_environment import ContinuousControllerEnvironment from pybrain_extension.task.red_cube_task import CameraPixelsRedCubeTask try: import __builtin__ input = getattr(__builtin__, 'raw_input') except (ImportError, AttributeError): pass with CurrentController(3) as control: environment = ContinuousControllerEnvironment(control) task = CameraPixelsRedCubeTask(environment, True) experiment = EpisodicExperiment(task, None) # control.calibrate() start = time() bias = True def eval_fitness(genomes): for g in genomes: # visualize.draw_net(g, view=False) agent = NeatAgent(g, bias=bias) g.fitness = 0 for state in range(control.get_randomize_states()): control.randomize(state) g.fitness += task.f(agent) if not task.found_cube: