class AgentThread(StoppableThread, RunnerListener): def __init__(self, threadId, sess, graph): StoppableThread.__init__(self) self.threadId = threadId self.sess = sess self.graph = graph with self.graph.as_default(): if args.atari: env = gym_env(args.game + 'NoFrameskip-v0') env = WarmUp(env, min_step=0, max_step=30) env = ActionRepeat(env, 4) proproc = PreProPipeline( [GrayPrePro(), ResizePrePro(modelOps.INPUT_SIZE)]) rewproc = PreProPipeline([RewardClipper(-1, 1)]) #q_model = A3CModel(modelOps) else: if args.game == "Grid": env = GridEnv() else: env = gym_env(args.game) proproc = None rewproc = None #q_model = TabularQModel(modelOps) for trans in args.env_transforms: env = globals()[trans](env) if 'shared_model' in kargs and kargs['shared_model']: q_model = model else: q_model = globals()[args.model](modelOps) q_model.model_update = model.model q_model.set_weights(model.get_weights()) summary_writer = tf.summary.FileWriter( args.logdir + '/thread-' + str(threadId), K.get_session().graph) if not args.logdir is None else None agentOps = DqnAgentOps() agentOps.double_dqn = args.double_dqn agentOps.REPLAY_START_SIZE = 1 #agentOps.INITIAL_EXPLORATION = 0 agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 1e10 #replay_buffer = ReplayBuffer(int(1e6), 4, 4, agentOps.REPLAY_START_SIZE, 32) replay_buffer = None #if threadId > 0: if args.nstep > 0: replay_buffer = NStepBuffer(modelOps.AGENT_HISTORY_LENGTH, args.nstep) else: replay_buffer = ReplayBuffer(args.replay_buffer_size, modelOps.AGENT_HISTORY_LENGTH, args.update_frequency, args.replay_start_size, args.batch_size) #print(kargs['agent']) if kargs['agent'] == 'ActorCriticAgent': agent = ActorCriticAgent(env.action_space, q_model, replay_buffer, rewproc, agentOps, summary_writer, ac_model_update=model) # else: agent = DqnAgent(env.action_space, q_model, replay_buffer, rewproc, agentOps, summary_writer, model_eval=model_eval) # egreedyAgent = None if threadId > 0 and kargs[ 'agent'] != 'ActorCriticAgent': # first thread is for testing egreedyOps = EGreedyOps() egreedyOps.REPLAY_START_SIZE = replay_buffer.REPLAY_START_SIZE #egreedyOps.FINAL_EXPLORATION_FRAME = int(args.egreedy_final_step / args.thread_count) #if args.egreedy_decay<1: # egreedyAgent = EGreedyAgentExp(env.action_space, egreedyOps, agent) #else: if len(args.egreedy_props ) > 1 and args.egreedy_props[0] == round( args.egreedy_props[0]): cs = np.array(args.egreedy_props) cs = np.cumsum(cs) idx = np.searchsorted(cs, threadId) print('Egreedyagent selected', idx, args.egreedy_final[idx], args.egreedy_decay[idx], args.egreedy_final_step[idx]) egreedyAgent = MultiEGreedyAgent( env.action_space, egreedyOps, agent, [1], [args.egreedy_final[idx]], [args.egreedy_decay[idx]], [args.egreedy_final_step[idx]]) else: egreedyAgent = MultiEGreedyAgent( env.action_space, egreedyOps, agent, args.egreedy_props, args.egreedy_final, args.egreedy_decay, args.egreedy_final_step) self.runner = Runner( env, egreedyAgent if egreedyAgent is not None else agent, proproc, modelOps.AGENT_HISTORY_LENGTH) if replay_buffer is not None: self.runner.listen(replay_buffer, proproc) self.runner.listen(agent, None) if egreedyAgent is not None: self.runner.listen(egreedyAgent, None) self.runner.listen(self, proproc) self.agent = agent self.q_model = q_model pass def run(self): with self.graph.as_default(): self.runner.run() def on_step(self, ob, action, next_ob, reward, done): global T global model, model_eval with tLock: T = T + 1 if T % target_network_update_freq == 0 and kargs[ 'agent'] != 'ActorCriticAgent': #print('CLONE TARGET: ' + str(T)) model_eval.set_weights(model.get_weights()) for agent in agents: agent.model_eval = model_eval if T % SAVE_FREQ == 0 and args.mode == "train": if not args.output_dir is None: model.model.save_weights(args.output_dir + '/weights_{0}.h5'.format(T)) #if T % 1000 == 0: # print('STEP', T) #if self.threadId == 0 and T % 10 == 0: # self.q_model.set_weights(model.get_weights()) if T % args.render_step == 0 and ENABLE_RENDER: viewer.imshow( np.repeat(np.reshape(ob, ob.shape + (1, )), 3, axis=2)) if T > args.max_step: self.stop() #print(T) def stop(self): super(AgentThread, self).stop() self.runner.stop()
class AgentThread(StoppableThread, RunnerListener): def __init__(self, threadId, sess, graph): StoppableThread.__init__(self) self.threadId = threadId self.sess = sess self.graph = graph with self.graph.as_default(): if args.game == "Grid": env = GridEnv() else: env = gym_env(args.game) env = Penalizer(env) proproc = None rewproc = None q_model = CartPoleModel(modelOps) q_model.model_update = model.model q_model.set_weights(model.get_weights()) summary_writer = tf.summary.FileWriter( args.logdir + '/thread-' + str(threadId), K.get_session().graph) if not args.logdir is None else None agentOps = DqnAgentOps() agentOps.double_dqn = args.double_dqn agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 20 agentOps.REPLAY_START_SIZE = 1 #agentOps.INITIAL_EXPLORATION = 0 agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 1e10 #replay_buffer = ReplayBuffer(int(1e6), 4, 4, agentOps.REPLAY_START_SIZE, 32) replay_buffer = NStepBuffer(modelOps.AGENT_HISTORY_LENGTH, args.nstep) agent = DqnAgent(env.action_space, q_model, replay_buffer, rewproc, agentOps, summary_writer, model_eval=model_eval) # egreedyOps = EGreedyOps() egreedyOps.REPLAY_START_SIZE = 1 egreedyOps.FINAL_EXPLORATION_FRAME = 5000 egreedyOps.FINAL_EXPLORATION = 0.01 egreedyOps.DECAY = 0.999 egreedyAgent = MultiEGreedyAgent(env.action_space, egreedyOps, agent, [0.4, 0.3, 0.3], [0.1, 0.01, 0.5]) self.runner = Runner(env, egreedyAgent, proproc, modelOps.AGENT_HISTORY_LENGTH) self.runner.listen(replay_buffer, proproc) self.runner.listen(agent, None) self.runner.listen(egreedyAgent, None) self.runner.listen(self, proproc) pass def run(self): with self.graph.as_default(): self.runner.run() def on_step(self, ob, action, next_ob, reward, done): global T global model, model_eval with tLock: T = T + 1 #if T % 1000 == 0: # print('STEP', T) if T % target_network_update_freq == 0: print('CLONE TARGET') model_eval.set_weights(model.get_weights()) for agent in agents: agent.model_eval = model_eval if T % args.render_step == 0 and ENABLE_RENDER: viewer.imshow( np.repeat(np.reshape(ob, ob.shape + (1, )), 3, axis=2)) if T % SAVE_FREQ == 0 and args.mode == "train": if not args.output_dir is None: model.model.save_weights(args.output_dir + '/weights_{0}.h5'.format(T)) #print(T) def stop(self): super(AgentThread, self).stop() self.runner.stop()