def launch_rlglue_agent(parameters): """Start the rlglue agent. (This function is executed in a separate process using multiprocessing.) """ import rl_glue_ale_agent agent = rl_glue_ale_agent.NeuralAgent(parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.phi_length, parameters.replay_memory_size, parameters.experiment_prefix, parameters.nn_file, parameters.pause, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, parameters.freeze_interval, parameters.batch_size, parameters.replay_start_size, parameters.update_frequency, parameters.image_resize) AgentLoader.loadAgent(agent)
def runAgent(agent_class): """Use the agent_parameters function to parse command line arguments and run the RL agent in network mode. """ parser = argparse.ArgumentParser(parents=[agent_class.agent_parameters()], add_help=True) params = vars(parser.parse_args()) AgentLoader.loadAgent(agent_class(**params))
def main(args): """ Mostly just read command line arguments here. We do this here instead of agent_init to make it possible to use --help from the command line without starting an experiment. """ from logutils import setupLogging # Handle command line argument: parser = addScriptArguments() # ignore unknowns parameters, _ = parser.parse_known_args(args) setupLogging(parameters.verbosity) if not parameters.recording: best_video = epoch_network = learning_log = inner_video = every_video = False else: best_video = parameters.video inner_video = parameters.inner_video every_video = parameters.every_video epoch_network = learning_log = True if parameters.nips: default_parameters = NIPSParameters else: default_parameters = DefaultParameters AgentLoader.loadAgent(NeuralAgent(parameters.game_name, network_size=default_parameters.get_default(parameters, 'network_size'), learning_rate=default_parameters.get_default(parameters, 'learning_rate'), batch_size=default_parameters.get_default(parameters, 'batch_size'), discount_rate=default_parameters.get_default(parameters, 'discount_rate'), momentum=default_parameters.get_default(parameters, 'momentum'), rms_decay=default_parameters.get_default(parameters, 'RMS_decay'), experiment_prefix=default_parameters.get_default(parameters, 'experiment_prefix'), experiment_directory=default_parameters.get_default(parameters, 'experiment_directory'), nn_file=default_parameters.get_default(parameters, 'nn_file'), pause=default_parameters.get_default(parameters, 'pause'), epsilon_start=default_parameters.get_default(parameters, 'epsilon_start'), epsilon_min=default_parameters.get_default(parameters, 'epsilon_min'), epsilon_decay=default_parameters.get_default(parameters, 'epsilon_decay'), testing_epsilon=default_parameters.get_default(parameters, 'testing_epsilon'), history_length=default_parameters.get_default(parameters, 'history_length'), max_history=default_parameters.get_default(parameters, 'history_max'), best_video=best_video, every_video=every_video, inner_video=inner_video, keep_epoch_network=epoch_network, learning_log=learning_log, target_reset_frequency=default_parameters.get_default(parameters, 'target_reset_frequency')))
def main(args): """ Mostly just read command line arguments here. We do this here instead of agent_init to make it possible to use --help from the command line without starting an experiment. """ from logutils import setupLogging # Handle command line argument: parser = argparse.ArgumentParser(description='Neural rl agent.') parser.add_argument("-v", "--verbose", dest="verbosity", default=0, action="count", help="Verbosity. Invoke many times for higher verbosity") parser.add_argument("-g", '--game-name', dest="game_name", default=None, help='Name of the game') parser.add_argument('-b', '--batch-size', dest="batch_size", type=int, default=TestingNeuralAgent.DefaultBatchSize, help='Batch size (default: %(default)s)') parser.add_argument('-e', '--experiment-directory', dest="experiment_directory", type=str, required=True, help='Directory where experiment details were saved') parser.add_argument('-t', '--test-epsilon', dest="testing_epsilon", type=float, default=TestingNeuralAgent.DefaultTestingEpsilon, help='Epsilon to use during testing (default: %(default)s)') parser.add_argument("-p", '--pause', dest="pause", type=float, default=TestingNeuralAgent.DefaultPauseTime, help='Amount of time to pause display while testing. (default: %(default)s)') parser.add_argument("-hl", '--history-length', dest="history_length", type=int, default=TestingNeuralAgent.DefaultHistoryLength, help='History length (default: %(default)s)') parser.add_argument('--no-video', dest="video", default=True, action="store_false", help='Do not make a "video" record of the best run in each game') parser.add_argument('--no-records', dest="recording", default=True, action="store_false", help='Do not record anything about the experiment (best games, epoch networks, test results, etc)') # ignore unknowns parameters, _ = parser.parse_known_args(args) setupLogging(parameters.verbosity) if not parameters.recording: best_video = learning_log = False else: best_video = parameters.video learning_log = True AgentLoader.loadAgent(TestingNeuralAgent(parameters.game_name, batch_size=parameters.batch_size, experiment_directory=parameters.experiment_directory, testing_epsilon=parameters.testing_epsilon, pause=parameters.pause, history_length=parameters.history_length, best_video=best_video, learning_log=learning_log))
def launch_rlglue_agent(parameters): """Start the rlglue agent. (This function is executed in a separate process using multiprocessing.) """ import rl_glue_ale_agent agent = rl_glue_ale_agent.NeuralAgent( parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.phi_length, parameters.replay_memory_size, parameters.experiment_prefix, parameters.nn_file, parameters.pause, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, parameters.freeze_interval, parameters.batch_size, parameters.replay_start_size, parameters.update_frequency, parameters.image_resize) AgentLoader.loadAgent(agent)
def agent_start(self,observation): return Action() def agent_step(self,reward, observation): return Action() def agent_end(self,reward): pass def agent_cleanup(self): pass def agent_message(self,inMessage): if inMessage==None: return "null" if inMessage=="": return "empty" if inMessage=="null": return None if inMessage=="empty": return "" return inMessage; if __name__=="__main__": AgentLoader.loadAgent(test_message_agent())
states = numpy.vstack([e.prev_state for e in self.experiences]) actions = numpy.array([e.prev_action for e in self.experiences],dtype='int32') targets = numpy.zeros(len(self.experiences)) costs = [] for n in xrange(10): # Recompute target Q values with current estimate for i in xrange(len(self.experiences)-1): max_q = self.max_action(self.experiences[i].state)[0] targets[i] = self.experiences[i].reward + discount_factor*max_q targets[-1] = self.experiences[i].reward cost = self.update(states,actions,targets) costs.append(cost) print 'Costs:',costs self.experiences = [] self.p_exploration *= p_exploration_decay if self.p_exploration < 1: self.p_exploration = 0 print 'p_exploration',self.p_exploration def agent_cleanup(self): pass def agent_message(self, message): pass if __name__=="__main__": AgentLoader.loadAgent(mlp_agent())
def main(): AgentLoader.loadAgent(PredictiveMockAgent())
print "########### MODEL UPDATED ######################" self.DN.target_model_update() # Simple text based visualization print ' REWARD %.1f / EPSILON %.5f' % (np.sign(reward), self.epsilon) # Time count if self.policyFrozen is False: self.time += 1 def agent_cleanup(self): pass def agent_message(self, inMessage): if inMessage.startswith("freeze learning"): self.policyFrozen = True return "message understood, policy frozen" if inMessage.startswith("unfreeze learning"): self.policyFrozen = False return "message understood, policy unfrozen" if inMessage.startswith("save model"): serializers.save_npz('resume.model', self.DN.model) # save current model np.savez('stored_D012.npz', D0=self.DN.D[0], D1=self.DN.D[1], D2=self.DN.D[2]) np.savez('stored_D34.npz', D3=self.DN.D[3], D4=self.DN.D[4]) return "message understood, model saved" if __name__ == "__main__": AgentLoader.loadAgent(dn_agent())
action = 'E' self.window.refresh() except KeyboardInterrupt: RLGlue.RL_cleanup() a = Action() if action: a.charArray = [action] return a # (double) -> void def agent_end(self, reward): pass # () -> void def agent_cleanup(self): curses.endwin() print 'BYE!' # (string) -> string def agent_message(self, message): pass if __name__ == "__main__": AgentLoader.loadAgent(ManualAgent())
self.write_data(observation.doubleArray, "observation") return returnAction def agent_step(self, reward, observation): print "Observation: ", observation.doubleArray print "Reward: ", reward returnAction = Action() returnAction.doubleArray = self.agent_policy(observation) self.lastAction = copy.deepcopy(returnAction) self.lastObservation = copy.deepcopy(observation) self.write_data(observation.doubleArray, "observation") self.write_data([reward], "reward") return returnAction def agent_end(self, reward): print "Agent Down!" def agent_cleanup(self): pass def agent_message(self, inMessage): print inMessage return "Message received" if __name__ == "__main__": AgentLoader.loadAgent(weak_baseline())
def main(): AgentLoader.loadAgent(CaclaAgentExperimenter())
def main(): AgentLoader.loadAgent(cacla_agent())
return returnAction def agent_step(self,reward, observation): #Generate random action, 0 or 1 thisIntAction=self.randGenerator.randint(0,1) returnAction=Action() returnAction.intArray=[thisIntAction] last_action=copy.deepcopy(returnAction) last_observation=copy.deepcopy(observation) return returnAction def agent_end(self,reward): pass def agent_cleanup(self): pass def agent_message(self,inMessage): print inMessage if __name__=="__main__": parser = argparse.ArgumentParser(description='Run DQN Recurrent experiment') parser.add_argument('--learn_start', metavar='L', type=int, default=5*10**4, help='only start learning after an amount of steps in order to build a db') args = parser.parse_args() AgentLoader.loadAgent(QAgent(args))
action = 'E' self.window.refresh() except KeyboardInterrupt: RLGlue.RL_cleanup() a = Action() if action: a.charArray = [action] return a # (double) -> void def agent_end(self, reward): pass # () -> void def agent_cleanup(self): curses.endwin() print 'BYE!' # (string) -> string def agent_message(self, message): pass if __name__=="__main__": AgentLoader.loadAgent(ManualAgent())
""" pass def save_params(self, filename="cnnparams.pkl"): the_file = open(filename, "w") cPickle.dump(self.cnn, the_file, -1) def agent_message(self, in_message): """ The experiment will cause this method to be called. Used to save data to the indicated file. """ if in_message.startswith("save_data"): total_time = time.time() - self.start_time file_name=in_message.split(" ")[1] the_file = open(file_name, "w") all_data = (self.cnn.get_params()) print "PICKLING: " + file_name #cPickle.dump(all_data, the_file, -1) #print "Simulated at a rate of {}/s".format(len(self.rewards) / # total_time) return "File saved successfully" else: return "I don't know how to respond to your message" if __name__=="__main__": AgentLoader.loadAgent(NeuralQLearnAgent())
import os, sys, time import numpy as np import scipy.misc as spm from rlglue.agent import AgentLoader sys.path.append(os.path.split(os.getcwd())[0]) from PIL import Image from config import config from agent import Agent # Override config config.apply_batchnorm = True config.ale_actions = [0, 3, 4] config.ale_screen_size = [210, 160] config.ale_scaled_screen_size = [84, 84] config.rl_replay_memory_size = 10**5 config.rl_replay_start_size = 10**4 config.q_conv_hidden_channels = [32, 64, 64] config.q_conv_strides = [4, 2, 1] config.q_conv_filter_sizes = [8, 4, 3] config.q_conv_output_vector_dimension = 512 config.q_fc_hidden_units = [256, 128] # Override agent class PongAgent(Agent): pass AgentLoader.loadAgent(PongAgent())
# Message Description # unfreeze learning # Action: Set flag to resume updating policy # if inMessage.startswith("unfreeze learning"): self.policyFrozen = False return "message understood, policy unfrozen" #Message Description # freeze exploring # Action: Set flag to stop exploring (greedy actions only) # if inMessage.startswith("freeze exploring"): self.exploringFrozen = True return "message understood, exploring frozen" #Message Description # unfreeze exploring # Action: Set flag to resume exploring (e-greedy actions) # if inMessage.startswith("unfreeze exploring"): self.exploringFrozen = False return "message understood, exploring frozen" return "Invasive agent does not understand your message." if __name__ == "__main__": AgentLoader.loadAgent(InvasiveAgent())
states, actions, rewards, next_states, next_actions = samples repeats = 1 if not self.replay_size is None else self.replay_times for _ in xrange(repeats): for i in xrange(sample_size): state, action, reward, next_state, next_action = states[i], \ actions[i], rewards[i], next_states[i], next_actions[i] n_rew = self.normalize_reward(reward) # assert np.unique(state), 'state contains duplicate values' delta = n_rew - self.get_value(state, action, self.sparse) assert not np.any(np.isnan(delta), np.isinf(delta)), \ 'delta is nan or infinite: %s' % str(delta) ns_values = self.get_all_values(next_state, self.sparse) # Here's the difference with Q-learning: next_action is used delta += self.gamma*ns_values[next_action] # Normalize alpha with # of active features alpha = self.alpha / float(np.sum(state!=0.)) # TODO I might be missing out on something, compare formula # Maybe trace made up for the fact that a factor is missing self.theta += alpha * delta * self.trace def create_projector(self): return RAMALEFeatures() if __name__=="__main__": parser = argparse.ArgumentParser(description='run Sarsa Replay Agent') ALEReplayAgent.register_with_parser(parser) args = parser.parse_args() AgentLoader.loadAgent(ALEReplayAgent(args))
def run_agent(agent=None): AgentLoader.loadAgent(agent)
agent.agent_start(observation) agent.agent_train(False) for i in range(2, 256): print "Round %d" % i reward = float(i) color = i observation = Observation() observation.intArray = np.ones(size_of_observation, dtype=np.uint8) observation.intArray *= color agent.agent_step(reward, observation) agent.agent_train(False) reward = float(i) color = i observation = Observation() observation.intArray = np.ones(size_of_observation, dtype=np.uint8) observation.intArray *= color agent.agent_step(reward, observation) agent.agent_train(True) #ipdb.set_trace() if __name__ == '__main__': #test_agent_step() agent = setup() AgentLoader.loadAgent(agent)
from rlglue.types import Observation from rlglue.agent import AgentLoader as AgentLoader from rlglue.agent.Agent import Agent from rlglue.utils.TaskSpecVRLGLUE3 import TaskSpecParser from LambdaSARSA import LambdaSARSA import tool import pickle from ModelAgent import ModelAgent if __name__=="__main__": import atexit agent = tool.Load('mario_sarsa_981_0.04_0.db') #agent = LinearSarsaAgent() #atexit.register(lambda: saveObj(agent)) #workaround to the NoneType error in hte descructorn #agent = tool.Load("Speed.db") #AgentLoader.loadAgent(agent) #while True: AgentLoader.loadAgent(agent) #time.sleep(2)
def main(): AgentLoader.loadAgent(NeuralAgent())
def agent_step(self, reward, observation): # print "Observation: ",observation.doubleArray # print "Reward: ",reward returnAction = Action() returnAction.doubleArray = self.agent_policy(observation) self.lastAction = copy.deepcopy(returnAction) self.lastObservation = copy.deepcopy(observation) self.write_data(observation.doubleArray, "observation") self.write_data([reward], "reward") return returnAction def agent_end(self, reward): self.episode += 1 print "Agent Down!" def agent_cleanup(self): pass def agent_message(self, inMessage): print inMessage return "Message received" if __name__ == "__main__": AgentLoader.loadAgent(random_agent())
parser.add_argument('--potential', metavar='F', type=str, default='less_enemies', help='potentials to use: less_enemies or lowest_enemy') parser.add_argument('--actions', metavar='C',type=int, default=None, nargs='*',help='list of allowed actions') args = parser.parse_args() act = None if not (args.actions is None): act = np.array(args.actions) if args.potential == 'less_enemies': AgentLoader.loadAgent(ALESarsaShapingAgent(agent_id=args.id, alpha =args.alpha, lambda_=args.lambda_, eps =args.eps, gamma=args.gamma, save_path=args.savepath, actions = act, selected_potential = args.potential)) elif args.potential == 'lowest_enemy': AgentLoader.loadAgent(ALESarsaShapingAgent(agent_id=args.id, alpha =args.alpha, lambda_=args.lambda_, eps =args.eps, gamma=args.gamma, save_path=args.savepath, actions = act, selected_potential = args.potential)) else: print 'unknown potential type'
def main(): AgentLoader.loadAgent(CaclaAgentNolearn())
return "message understood, exploring frozen" #Message Description # save_policy FILENAME # Action: Save current value function in binary format to # file called FILENAME # if inMessage.startswith("save_policy"): splitString=inMessage.split(" "); self.save_value_function(splitString[1]); print "Saved."; return "message understood, saving policy" #Message Description # load_policy FILENAME # Action: Load value function in binary format from # file called FILENAME # if inMessage.startswith("load_policy"): splitString=inMessage.split(" ") self.load_value_function(splitString[1]) print "Loaded." return "message understood, loading policy" return "SampleqAgent(Python) does not understand your message." if __name__=="__main__": AgentLoader.loadAgent(q_agent())
a = np.asarray(a).astype(np.int32) r = np.asarray(r).astype(np.float32) s2 = np.asarray(s2).astype(np.float32) t = np.asarray(t).astype(np.float32) #Q 値推測用ネットワーク targetQ を取得し、s2の Q 値を求める s2 = chainer.Variable(self.xp.asarray(s2)) Q = self.targetQ.value(s2) Q_data = Q.data if type(Q_data).__module__ == np.__name__: max_Q_data = np.max(Q_data, axis=1) else: max_Q_data = np.max(self.xp.asnumpy(Q_data).astype(np.float32), axis=1) #targetQで推測した Q 値を使用して 教師データ t 作成 t = np.sign(r) + (1 - t)*self.gamma*max_Q_data self.optimizer.update(self.Q, s, a, t) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Deep Q-Learning') parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--size', '-s', default=6, type=int, help='Reversi board size') args = parser.parse_args() AgentLoader.loadAgent(KmoriReversiAgent(args.gpu,args.size))
self.qvalues=pickle.load(theFile) theFile.close() def agent_message(self,inMessage): #Message Description # save_policy FILENAME # Action: Save current value function in binary format to # file called FILENAME # if inMessage.startswith("save_policy"): splitString=inMessage.split(" "); self.save_value_function(splitString[1]); print "Saved."; return "message understood, saving policy" #Message Description # load_policy FILENAME # Action: Load value function in binary format from # file called FILENAME # if inMessage.startswith("load_policy"): splitString=inMessage.split(" ") self.load_value_function(splitString[1]) print "Loaded." return "message understood, loading policy" return "QLearnAgent(Python) does not understand your message." if __name__=="__main__": AgentLoader.loadAgent(QLearnAgent())
F = self.gamma * (current_potential - self.last_potential) if not self.allow_negative_rewards: F = max(0, F) a_ns = self.step(reward,phi_ns) #log state data self.last_phi = copy.deepcopy(phi_ns) self.last_action = copy.deepcopy(a_ns) self.last_potential = current_potential return self.create_action(self.actions[a_ns])#create RLGLUE action def agent_end(self, reward): with open(self.shaping_data_filename, 'a') as f: for i in range(len(self.outstanding_shaping_data['alien_bonus'])): f.write(','.join(map(str, [ self.outstanding_shaping_data['alien_bonus'][i], self.outstanding_shaping_data['laser_penalty'][i], self.outstanding_shaping_data['shield_bonus'][i], self.outstanding_shaping_data['lowest_enemy_penalty'][i] ])) + '\n') super(ALEShapingAgent, self).agent_end(reward) if __name__=="__main__": parser = argparse.ArgumentParser(description='run Sarsa Agent') ALEShapingAgent.register_with_parser(parser) args = parser.parse_args() AgentLoader.loadAgent(ALEShapingAgent(args))
def agent_freeze(self): pass def agent_message(self,inMessage): return None def randomify(self): self.action.intArray = [] self.action.doubleArray = [] for min_action,max_action in self.int_action_ranges: act = random.randrange(min_action,max_action+1) self.action.intArray.append(act) for min_action,max_action in self.double_action_ranges: act = random.uniform(min_action,max_action) self.action.doubleArray.append(act) self.action.charArray = GenPasswd2(self.action.numChars) #print self.action.intArray #print self.action.doubleArray #print self.action.charArray if __name__=="__main__": AgentLoader.loadAgent(RandomAgent())
maze = detect_maze(screen) self.image = pacman_image(maze) return_action = Action() action = randrange(self.numActions) return_action.intArray = [action] self.lastAction = copy.deepcopy(return_action) self.lastObservation = copy.deepcopy(observation) return return_action def agent_step(self, reward, observation): screen = observation.intArray[128:] screen = np.reshape(screen, (210, -1)) self.image.new_image(screen) return_action = Action() action = randrange(self.numActions) return_action.intArray = [action] self.lastAction=copy.deepcopy(return_action) self.lastObservation=copy.deepcopy(observation) return return_action def agent_end(self, reward): pass def agent_cleanup(self): pass if __name__=="__main__": AgentLoader.loadAgent(pacmanAgent())
if not self.allow_negative_rewards: F = max(0, F) a_ns = self.step(reward, phi_ns) #log state data self.last_phi = copy.deepcopy(phi_ns) self.last_action = copy.deepcopy(a_ns) self.last_potential = current_potential return self.create_action(self.actions[a_ns]) #create RLGLUE action def agent_end(self, reward): with open(self.shaping_data_filename, 'a') as f: for i in range(len(self.outstanding_shaping_data['alien_bonus'])): f.write(','.join( map(str, [ self.outstanding_shaping_data['alien_bonus'][i], self.outstanding_shaping_data['laser_penalty'][i], self.outstanding_shaping_data['shield_bonus'][i], self. outstanding_shaping_data['lowest_enemy_penalty'][i] ])) + '\n') super(ALEShapingAgent, self).agent_end(reward) if __name__ == "__main__": parser = argparse.ArgumentParser(description='run Sarsa Agent') ALEShapingAgent.register_with_parser(parser) args = parser.parse_args() AgentLoader.loadAgent(ALEShapingAgent(args))
# log state data self.last_phi = copy.deepcopy(phi_ns) self.last_action = copy.deepcopy(a_ns) return self.create_action(self.actions[a_ns]) # create RLGLUE action def agent_end(self, reward): super(ALESarsaAgent, self).agent_end(reward) self.step(reward) class BasicALESarsaAgent(BasicALEAgent, ALESarsaAgent): pass class RAMALESarsaAgent(RAMALEAgent, ALESarsaAgent): pass if __name__ == "__main__": parser = argparse.ArgumentParser(description="run Sarsa Agent") parser.add_argument("--features", metavar="F", type=str, default="RAM", help="features to use: RAM or BASIC") ALESarsaAgent.register_with_parser(parser) args = parser.parse_args() if args.features == "RAM": AgentLoader.loadAgent(RAMALESarsaAgent(args)) elif args.features == "BASIC": AgentLoader.loadAgent(BasicALESarsaAgent(args)) else: raise Exception("unknown feature type")
return "message understood, exploring frozen"; #Message Description # save_policy FILENAME # Action: Save current value function in binary format to # file called FILENAME # if inMessage.startswith("save_policy"): splitString=inMessage.split(" "); self.save_value_function(splitString[1]); print "Saved."; return "message understood, saving policy"; #Message Description # load_policy FILENAME # Action: Load value function in binary format from # file called FILENAME # if inMessage.startswith("load_policy"): splitString=inMessage.split(" "); self.load_value_function(splitString[1]); print "Loaded."; return "message understood, loading policy"; return "SampleSarsaAgent(Python) does not understand your message."; if __name__=="__main__": AgentLoader.loadAgent(sarsa_agent())
self.nonEmptyAction.intArray = (0, 1, 2, 3, 4, 5, 6) self.nonEmptyAction.doubleArray = (0.0 / 3.0, 1.0 / 3.0, 2.0 / 3.0) self.nonEmptyAction.charArray = "a" def agent_start(self, observation): self.whichEpisode = self.whichEpisode + 1 if self.whichEpisode % 2 == 0: return self.emptyAction else: return self.nonEmptyAction def agent_step(self, reward, observation): if self.whichEpisode % 2 == 0: return self.emptyAction else: return self.nonEmptyAction def agent_end(self, reward): pass def agent_cleanup(self): pass def agent_message(self, inMessage): return "" if __name__ == "__main__": AgentLoader.loadAgent(test_empty_agent())
self.exploringFrozen = False return "message understood, exploring frozen" #Message Description # save_policy FILENAME # Action: Save current value function in binary format to # file called FILENAME # if inMessage.startswith("save_policy"): splitString = inMessage.split(" ") self.save_value_function(splitString[1]) print "Saved." return "message understood, saving policy" #Message Description # load_policy FILENAME # Action: Load value function in binary format from # file called FILENAME # if inMessage.startswith("load_policy"): splitString = inMessage.split(" ") self.load_value_function(splitString[1]) print "Loaded." return "message understood, loading policy" return "SampleSarsaAgent(Python) does not understand your message." if __name__ == "__main__": AgentLoader.loadAgent(Qlearning_agent())
parser.add_argument('--actions', metavar='C',type=int, default=None, nargs='*',help='list of allowed actions') args = parser.parse_args() act = None if not (args.actions is None): act = np.array(args.actions) if args.features == 'RAM': AgentLoader.loadAgent(RAMALEERSarsaAgent(agent_id=args.id, alpha =args.alpha, lambda_=args.lambda_, eps =args.eps, gamma=args.gamma, save_path=args.savepath, actions = act, db_size= args.db_size, trajectory_length=args.trajectory_length, replays=args.replays)) elif args.features == 'BASIC': AgentLoader.loadAgent(BasicALEERSarsaAgent(agent_id=args.id, alpha =args.alpha, lambda_=args.lambda_, eps =args.eps, gamma=args.gamma, save_path=args.savepath, actions = act, db_size= args.db_size, trajectory_length=args.trajectory_length, replays=args.replays))
def main(): AgentLoader.loadAgent(CaclaAgentLasagne())
self.exploringFrozen = False return "message understood, exploring frozen" #Message Description # save_policy FILENAME # Action: Save current value function in binary format to # file called FILENAME # if inMessage.startswith("save_policy"): splitString = inMessage.split(" ") self.save_value_function(splitString[1]) print "Saved." return "message understood, saving policy" #Message Description # load_policy FILENAME # Action: Load value function in binary format from # file called FILENAME # if inMessage.startswith("load_policy"): splitString = inMessage.split(" ") self.load_value_function(splitString[1]) print "Loaded." return "message understood, loading policy" return "SampleSarsaAgent(Python) does not understand your message." if __name__ == "__main__": AgentLoader.loadAgent(sarsa_agent())
return returnAction def agent_step(self, reward, observation): #print observation.intArray #Generate random action, 0 or 1 thisIntAction = self.randGenerator.randint(0, 1) returnAction = Action() returnAction.intArray = [thisIntAction] lastAction = copy.deepcopy(returnAction) lastObservation = copy.deepcopy(observation) return returnAction def agent_end(self, reward): pass def agent_cleanup(self): pass def agent_message(self, inMessage): if inMessage == "what is your name?": return "my name is skeleton_agent, Python edition!" else: return "I don't know how to respond to your message" if __name__ == "__main__": AgentLoader.loadAgent(skeleton_agent())
self.DQN.experienceReplay(self.time) # Simple text based visualization print ' REWARD %.1f / EPSILON %.5f' % (np.sign(reward), self.epsilon) # Time count if not self.policyFrozen: self.time += 1 def agent_cleanup(self): pass def agent_message(self, inMessage): if inMessage.startswith("freeze learning"): self.policyFrozen = True return "message understood, policy frozen" if inMessage.startswith("unfreeze learning"): self.policyFrozen = False return "message understood, policy unfrozen" if inMessage.startswith("save model"): with open('dqn_model.dat', 'w') as f: pickle.dump(self.DQN.model, f) return "message understood, model saved" if __name__ == "__main__": AgentLoader.loadAgent(dqn_agent())
if(reward > action_reward): action = randAction action_reward = reward if(action_reward > numpy.inner(self.reward_weight,self.lastObservation.doubleArray)): #print "predicted state==== ",numpy.add(numpy.inner (self.value_function_weight,randAction),self.last_observation_list[-1]) return action def randomAction(self): """ generate random action.--- test purpose """ action = [] action_length = len(self.rangeAction) for i in range(0,action_length): action.append(self.randGenerator.uniform(self.rangeAction[i][0],self.rangeAction[i][1])) return action return action if __name__=="__main__": AgentLoader.loadAgent(helicopter_agent())
def printQue(self): if self.strategyIndex ==0: #BFS h = Queue.Queue() elif self.strategyIndex == 1 or self.strategyIndex == 2: #DFS , ID h = Queue.LifoQueue() elif self.strategyIndex == 3 or self.strategyIndex == 4: #UCS, A* h = Queue.PriorityQueue() while not self.heapQueue.empty(): temp= self.heapQueue.get() h.put(temp) print temp[1], print def agent_end(self, reward): print reward def agent_cleanup(self): pass def agent_message(self, inMessage): if inMessage == "what is your name?": return "my name is skeleton_agent, Python edition!"; else: return "I don't know how to respond to your message"; if __name__ == "__main__": AgentLoader.loadAgent(skeleton_agent())
def agent_step(self,reward, observation): self.stepCount=self.stepCount+1 action=Action() action.intArray=observation.intArray action.doubleArray=observation.doubleArray action.charArray=observation.charArray return action def agent_end(self,reward): pass def agent_cleanup(self): pass def agent_message(self,inMessage): timesToPrint=self.stepCount%3 outMessage=inMessage+"|" for i in range(0, timesToPrint): outMessage=outMessage+"%d" % (self.stepCount) outMessage=outMessage+"." outMessage=outMessage+"|"+inMessage return outMessage if __name__=="__main__": AgentLoader.loadAgent(test_1_agent())
plt.savefig('plots/%s_episode_qvals.png' % self.prefix) plt.close() except: print "Failed to render plots" def resize_image(self, observation): image = observation[128:].reshape(IMAGE_HEIGHT, IMAGE_WIDTH, 3) image = np.array(image, dtype='uint8') offset = 10 # remove ACTIVISION logo width = RESIZED_WIDTH height = int(round(float(IMAGE_HEIGHT) * RESIZED_HEIGHT / IMAGE_WIDTH)) image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) image = cv2.resize(image, (width, height), interpolation=cv2.INTER_LINEAR) image = image[height - offset - RESIZED_HEIGHT:height - offset, :] if False: plt.figure() plt.imshow(image, cmap=cm.Greys_r) plt.show() return image if __name__ == '__main__': prefix = sys.argv[1] if len(sys.argv) > 1 else "" network_file = sys.argv[2] if len(sys.argv) > 2 else None AgentLoader.loadAgent(DeepQLearningAgent(prefix, network_file))
def agent_step(self, reward, observation): self.stepCount = self.stepCount + 1 action = Action() action.intArray = observation.intArray action.doubleArray = observation.doubleArray action.charArray = observation.charArray return action def agent_end(self, reward): pass def agent_cleanup(self): pass def agent_message(self, inMessage): timesToPrint = self.stepCount % 3 outMessage = inMessage + "|" for i in range(0, timesToPrint): outMessage = outMessage + "%d" % (self.stepCount) outMessage = outMessage + "." outMessage = outMessage + "|" + inMessage return outMessage if __name__ == "__main__": AgentLoader.loadAgent(test_1_agent())
theFile.close() def agent_message(self, inMessage): #Message Description # save_policy FILENAME # Action: Save current value function in binary format to # file called FILENAME # if inMessage.startswith("save_policy"): splitString = inMessage.split(" ") self.save_value_function(splitString[1]) print "Saved." return "message understood, saving policy" #Message Description # load_policy FILENAME # Action: Load value function in binary format from # file called FILENAME # if inMessage.startswith("load_policy"): splitString = inMessage.split(" ") self.load_value_function(splitString[1]) print "Loaded." return "message understood, loading policy" return "QLearnAgent(Python) does not understand your message." if __name__ == "__main__": AgentLoader.loadAgent(QLearnAgent())
default=None, help='image height') parser.add_argument('--width', metavar='W', type=int, default=None, help='image width') parser.add_argument('--color', metavar='L', type=str, default='ale', help='frame color mode') parser.add_argument('--name', metavar='N', type=str, default='frames', help='output file name') args = parser.parse_args() if (args.width is None) or (args.height is None): resize = None else: resize = (args.height, args.width) print "Vision agent with capacity " + str(args.capacity) AgentLoader.loadAgent( ALEVisionAgent(mem_capacity=args.capacity, resize=resize, color_mode=args.color, name=args.name))
print "--------------------------------------------------" s = hf.getOkolica(observation,ok,ok,ok,ok) print "step: %d reward: %.2f " % \ (self.trial_steps, self.trial_reward) print "\n".join(["".join(i) for i in s]) print "x: %2.2f y: %2.2f q-len: %d " % \ (mario.x, mario.y, len(self.Q)) print "" def print_stats(self): time_passed = time.time() - self.trial_start self.best_reward = max(self.best_reward,self.trial_reward) self.print_world() print "trial number: %d -" % (self.trial_number) print "number of steps: %d" % (self.trial_steps) print "steps per second: %d" % (self.trial_steps/time_passed) print "trial reward pos: %.2f" % (self.trial_reward_pos) print "trial reward neg: %.2f" % (self.trial_reward_neg) print "trial reward: %.2f" % (self.trial_reward) print "best score so far: %.2f" % (self.best_reward) print "" if __name__=="__main__": AgentLoader.loadAgent(FixedPolicyAgent())
alpha = self.alpha / float(np.sum(self.phi != 0.)) self.theta += alpha * delta * self.trace if not greedy: self.trace *= 0. #reset trace return a_ns #a_ns is action index (not action value) class BasicALEQLearningAgent(BasicALEAgent, ALEQLearningAgent): pass class RAMALEQLearningAgent(RAMALEAgent, ALEQLearningAgent): pass if __name__ == "__main__": parser = argparse.ArgumentParser(description='run Sarsa Agent') parser.add_argument('--features', metavar='F', type=str, default='RAM', help='features to use: RAM or BASIC') ALEQLearningAgent.register_with_parser(parser) args = parser.parse_args() if args.features == 'RAM': AgentLoader.loadAgent(RAMALEQLearningAgent(args)) elif args.features == 'BASIC': AgentLoader.loadAgent(BasicALEQLearningAgent(args)) else: raise Exception('unknown feature type')
help='features to use: RAM or BASIC') parser.add_argument('--actions', metavar='C',type=int, default=None, nargs='*',help='list of allowed actions') args = parser.parse_args() act = None if not (args.actions is None): act = np.array(args.actions) if args.features == 'RAM': AgentLoader.loadAgent(RAMALESarsaAgent(agent_id=args.id, alpha =args.alpha, lambda_=args.lambda_, eps =args.eps, gamma=args.gamma, save_path=args.savepath, actions = act)) elif args.features == 'BASIC': AgentLoader.loadAgent(BasicALESarsaAgent(agent_id=args.id, alpha =args.alpha, lambda_=args.lambda_, eps =args.eps, gamma=args.gamma, save_path=args.savepath, actions = act)) else: print 'unknown feature type'
alpha = 0.1 self.thetax[x][last_action] = self.thetax[x][ last_action] + alpha * delta * self.ex[x] self.thetax[y][last_action] = self.thetax[y][ last_action] + alpha * delta * self.ey[y] self.thetaxy[x][y][last_action] = self.thetaxy[x][y][ last_action] + alpha * delta * self.exy[x][y] #print Reward,self.thetax,self.thetay def maxim(self, state): return max(self.qfunction[state]) def epsilon_greedy(self, state): if random.random() < self.epsilon: return random.randint(0, 3) else: k = self.qfunction[state].index(max(self.qfunction[state])) #print k return k def agent_cleanup(self): pass def agent_message(self, Message): pass if __name__ == "__main__": AgentLoader.loadAgent(q_agent())
parser.add_argument("--alpha", metavar="A", type=float, default=0.5, help="learning rate") parser.add_argument("--lambda_", metavar="L", type=float, default=0.9, help="trace decay") parser.add_argument("--eps", metavar="E", type=float, default=0.05, help="exploration rate") parser.add_argument("--savepath", metavar="P", type=str, default=".", help="save path") parser.add_argument("--features", metavar="F", type=str, default="BASIC", help="features to use: RAM or BASIC") parser.add_argument("--actions", metavar="C", type=int, default=None, nargs="*", help="list of allowed actions") args = parser.parse_args() if args.features == "RAM": AgentLoader.loadAgent( RAMALEQlearningAgent( agent_id=args.id, alpha=args.alpha, lambda_=args.lambda_, eps=args.eps, gamma=args.gamma, save_path=args.savepath, actions=args.actions, ) ) elif args.features == "BASIC": AgentLoader.loadAgent( BasicALEQlearningAgent( agent_id=args.id, alpha=args.alpha, lambda_=args.lambda_, eps=args.eps, gamma=args.gamma, save_path=args.savepath, actions=args.actions,