def learn_option(option, environment_name, num_episodes, max_steps): """ :param source: the source community :type source: int :param target: the target community :param target: int """ from pyrl.agents.sarsa_lambda import sarsa_lambda from pyrl.rlglue import RLGlueLocal as RLGlueLocal from pyrl.environments.pinball import PinballRLGlue import numpy as np import logging import pyflann import options import cPickle import random import csv prefix = 'option-%d-to-%d'%(option.label, option.target) score_file = csv.writer(open(prefix + '-score.csv', 'wb')) # Create agent and environments agent = sarsa_lambda(epsilon=0.01, alpha=0.001, gamma=0.9, lmbda=0.9, params={'name':'fourier', 'order':4}) # Wrap the environment with the option's pseudo-reward environment = options.TrajectoryRecorder(options.PseudoRewardEnvironment(PinballRLGlue(environment_name), option, 10000), prefix + '-trajectory') # Connect to RL-Glue rlglue = RLGlueLocal.LocalGlue(environment, agent) rlglue.RL_init() # Execute episodes if not num_episodes: num_episodes = np.alen(option.initial_states) print 'Learning %d episodes'%(num_episodes,) for i in xrange(num_episodes): initial_state = option.initial_state() rlglue.RL_env_message('set-start-state %f %f %f %f' %(initial_state[0], initial_state[1], initial_state[2], initial_state[3])) terminated = rlglue.RL_episode(max_steps) total_steps = rlglue.RL_num_steps() total_reward = rlglue.RL_return() with open(prefix + '-score.csv', 'a') as f: writer = csv.writer(f) writer.writerow([i, total_steps, total_reward, terminated]) rlglue.RL_cleanup() # Save function approximation option.basis = agent.basis option.weights = agent.weights[0,:,:] cPickle.dump(option, open(prefix + '-policy.pl', 'wb')) return option
def learn_policy(environment_name, nepisodes, max_steps, prefix): from pyrl.agents.sarsa_lambda import sarsa_lambda from pyrl.rlglue import RLGlueLocal as RLGlueLocal from pyrl.environments.pinball import PinballRLGlue from options import TrajectoryRecorder import cPickle import csv # Create agent and environments agent = sarsa_lambda(epsilon=0.01, alpha=0.001, gamma=1.0, lmbda=0.9, params={'name':'fourier', 'order':4}) # Wrap the environment with the option's pseudo-reward environment = TrajectoryRecorder(PinballRLGlue(environment_name), prefix + '-trajectory') score_file = csv.writer(open(prefix + '-scores.csv', 'wb')) # Connect to RL-Glue rlglue = RLGlueLocal.LocalGlue(environment, agent) rlglue.RL_init() # Execute episodes scores = [] for i in xrange(nepisodes): print 'Episode ', i terminated = rlglue.RL_episode(max_steps) total_steps = rlglue.RL_num_steps() total_reward = rlglue.RL_return() print '\t %d steps, %d reward, %d terminated'%(total_steps, total_reward, terminated) score = [i, total_steps, total_reward, terminated] scores.append(score) score_file.writerow(score) rlglue.RL_cleanup() cPickle.dump(agent, open(prefix + '.pl', 'wb')) return scores
def learn_option(option, environment_name, num_episodes, max_steps): """ :param source: the source community :type source: int :param target: the target community :param target: int """ from pyrl.agents.sarsa_lambda import sarsa_lambda from pyrl.rlglue import RLGlueLocal as RLGlueLocal from pyrl.environments.pinball import PinballRLGlue import numpy as np import logging import pyflann import options import cPickle import random import csv prefix = 'option-%d-to-%d' % (option.label, option.target) score_file = csv.writer(open(prefix + '-score.csv', 'wb')) # Create agent and environments agent = sarsa_lambda(epsilon=0.01, alpha=0.001, gamma=0.9, lmbda=0.9, params={ 'name': 'fourier', 'order': 4 }) # Wrap the environment with the option's pseudo-reward environment = options.TrajectoryRecorder( options.PseudoRewardEnvironment(PinballRLGlue(environment_name), option, 10000), prefix + '-trajectory') # Connect to RL-Glue rlglue = RLGlueLocal.LocalGlue(environment, agent) rlglue.RL_init() # Execute episodes if not num_episodes: num_episodes = np.alen(option.initial_states) print 'Learning %d episodes' % (num_episodes, ) for i in xrange(num_episodes): initial_state = option.initial_state() rlglue.RL_env_message('set-start-state %f %f %f %f' % (initial_state[0], initial_state[1], initial_state[2], initial_state[3])) terminated = rlglue.RL_episode(max_steps) total_steps = rlglue.RL_num_steps() total_reward = rlglue.RL_return() with open(prefix + '-score.csv', 'a') as f: writer = csv.writer(f) writer.writerow([i, total_steps, total_reward, terminated]) rlglue.RL_cleanup() # Save function approximation option.basis = agent.basis option.weights = agent.weights[0, :, :] cPickle.dump(option, open(prefix + '-policy.pl', 'wb')) return option