def main(): parser = argparse.ArgumentParser( description="HasukoRPG parser version: {}".format(VERSION)) parser.add_argument('-d', '--debug', action='store_true', help="Debug mode") parser.add_argument('-t', '--token', help="Specify the discord bot token") parser.add_argument('-u', '--user', help="Specify the user to the database") parser.add_argument('-p', '--password', help="Specify the password to the database") parser.add_argument('-n', '--db-name', help="Specify the database name") args = parser.parse_args() log_path = "logs/" if not os.path.exists(os.path.dirname(log_path)): os.makedirs(os.path.dirname(log_path)) if args.debug: level = logging.DEBUG else: level = logging.INFO logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=level) runner = Runner(args) runner.run()
def do_test_example_sets(self, algorithm): runner = Runner(algorithm) for i in self.TESTS_TO_RUN: input_file = self.EXAMPLE_FILES[i] output_file = "%s.tour" % input_file expected_total = self.EXPECTED_TOTALS[i] expected_time = self.EXPECTED_TIMES[i] # Run the algorithm and generate output file sys.argv[1] = '-f' sys.argv[2] = input_file runner.load() start_time = time.clock() runner.run() elapsed_time = time.clock() - start_time # verify that all cities are visited (all_match, problems) = visit.main(input_file, output_file) message = "For %s, possible problems include:\n" % output_file[3:] for each in problems: message = "%s%s\n" % (message, problems[each]) self.assertTrue(all_match, message) # verify correct solution length cities = self.readinstance(input_file) solution = self.readsolution(output_file) self.checksolution(cities, solution[0][0], solution[1]) # verify that the problem was solved within the allotted time frame message = "For %s, execution time of %f exceeded allotted time of %f" % (output_file[3:], elapsed_time, expected_time) self.assertTrue(elapsed_time < expected_time, message) # verify that the solution is reasonably optimal if an optimal value is posted if expected_total >= 0: message = "For %s, the total distance %f exceeded the allowed %f" % (output_file[3:], solution[0][0], expected_total) self.assertTrue(solution[0][0] <= expected_total, message) print "%s passed with distance %d/%d and time %d/%d" % (self.EXAMPLE_FILES[i][3:], solution[0][0], expected_total, elapsed_time, expected_time)
agentOps = DqnAgentOps() agentOps.double_dqn = args.double_dqn agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 20 #agentOps.REPLAY_START_SIZE = 100 #agentOps.FINAL_EXPLORATION_FRAME = 10000 replay_buffer = ReplayBuffer(int(2000), 1, 1, 1000, 64) #replay_buffer = NStepBuffer(modelOps.AGENT_HISTORY_LENGTH, 8) agent = DqnAgent(env.action_space, q_model, replay_buffer, rewproc, agentOps, summary_writer) egreedyOps = EGreedyOps() egreedyOps.REPLAY_START_SIZE = replay_buffer.REPLAY_START_SIZE egreedyOps.FINAL_EXPLORATION_FRAME = 10000 egreedyOps.FINAL_EXPLORATION = 0.01 egreedyOps.DECAY = 0.999 egreedyAgent = EGreedyAgentExp(env.action_space, egreedyOps, agent) runner = Runner(env, egreedyAgent, proproc, 1) runner.listen(replay_buffer, proproc) runner.listen(agent, None) runner.listen(egreedyAgent, None) if viewer is not None: runner.listen(viewer, None) if args.logdir is not None: networkSaver = NetworkSaver(50000, args.logdir, q_model.model) runner.listen(networkSaver, None) runner.run()
class AgentThread(StoppableThread, RunnerListener): def __init__(self, threadId, sess, graph): StoppableThread.__init__(self) self.threadId = threadId self.sess = sess self.graph = graph with self.graph.as_default(): if args.atari: env = gym_env(args.game + 'NoFrameskip-v0') env = WarmUp(env, min_step=0, max_step=30) env = ActionRepeat(env, 4) proproc = PreProPipeline( [GrayPrePro(), ResizePrePro(modelOps.INPUT_SIZE)]) rewproc = PreProPipeline([RewardClipper(-1, 1)]) #q_model = A3CModel(modelOps) else: if args.game == "Grid": env = GridEnv() else: env = gym_env(args.game) proproc = None rewproc = None #q_model = TabularQModel(modelOps) for trans in args.env_transforms: env = globals()[trans](env) if 'shared_model' in kargs and kargs['shared_model']: q_model = model else: q_model = globals()[args.model](modelOps) q_model.model_update = model.model q_model.set_weights(model.get_weights()) summary_writer = tf.summary.FileWriter( args.logdir + '/thread-' + str(threadId), K.get_session().graph) if not args.logdir is None else None agentOps = DqnAgentOps() agentOps.double_dqn = args.double_dqn agentOps.REPLAY_START_SIZE = 1 #agentOps.INITIAL_EXPLORATION = 0 agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 1e10 #replay_buffer = ReplayBuffer(int(1e6), 4, 4, agentOps.REPLAY_START_SIZE, 32) replay_buffer = None #if threadId > 0: if args.nstep > 0: replay_buffer = NStepBuffer(modelOps.AGENT_HISTORY_LENGTH, args.nstep) else: replay_buffer = ReplayBuffer(args.replay_buffer_size, modelOps.AGENT_HISTORY_LENGTH, args.update_frequency, args.replay_start_size, args.batch_size) #print(kargs['agent']) if kargs['agent'] == 'ActorCriticAgent': agent = ActorCriticAgent(env.action_space, q_model, replay_buffer, rewproc, agentOps, summary_writer, ac_model_update=model) # else: agent = DqnAgent(env.action_space, q_model, replay_buffer, rewproc, agentOps, summary_writer, model_eval=model_eval) # egreedyAgent = None if threadId > 0 and kargs[ 'agent'] != 'ActorCriticAgent': # first thread is for testing egreedyOps = EGreedyOps() egreedyOps.REPLAY_START_SIZE = replay_buffer.REPLAY_START_SIZE #egreedyOps.FINAL_EXPLORATION_FRAME = int(args.egreedy_final_step / args.thread_count) #if args.egreedy_decay<1: # egreedyAgent = EGreedyAgentExp(env.action_space, egreedyOps, agent) #else: if len(args.egreedy_props ) > 1 and args.egreedy_props[0] == round( args.egreedy_props[0]): cs = np.array(args.egreedy_props) cs = np.cumsum(cs) idx = np.searchsorted(cs, threadId) print('Egreedyagent selected', idx, args.egreedy_final[idx], args.egreedy_decay[idx], args.egreedy_final_step[idx]) egreedyAgent = MultiEGreedyAgent( env.action_space, egreedyOps, agent, [1], [args.egreedy_final[idx]], [args.egreedy_decay[idx]], [args.egreedy_final_step[idx]]) else: egreedyAgent = MultiEGreedyAgent( env.action_space, egreedyOps, agent, args.egreedy_props, args.egreedy_final, args.egreedy_decay, args.egreedy_final_step) self.runner = Runner( env, egreedyAgent if egreedyAgent is not None else agent, proproc, modelOps.AGENT_HISTORY_LENGTH) if replay_buffer is not None: self.runner.listen(replay_buffer, proproc) self.runner.listen(agent, None) if egreedyAgent is not None: self.runner.listen(egreedyAgent, None) self.runner.listen(self, proproc) self.agent = agent self.q_model = q_model pass def run(self): with self.graph.as_default(): self.runner.run() def on_step(self, ob, action, next_ob, reward, done): global T global model, model_eval with tLock: T = T + 1 if T % target_network_update_freq == 0 and kargs[ 'agent'] != 'ActorCriticAgent': #print('CLONE TARGET: ' + str(T)) model_eval.set_weights(model.get_weights()) for agent in agents: agent.model_eval = model_eval if T % SAVE_FREQ == 0 and args.mode == "train": if not args.output_dir is None: model.model.save_weights(args.output_dir + '/weights_{0}.h5'.format(T)) #if T % 1000 == 0: # print('STEP', T) #if self.threadId == 0 and T % 10 == 0: # self.q_model.set_weights(model.get_weights()) if T % args.render_step == 0 and ENABLE_RENDER: viewer.imshow( np.repeat(np.reshape(ob, ob.shape + (1, )), 3, axis=2)) if T > args.max_step: self.stop() #print(T) def stop(self): super(AgentThread, self).stop() self.runner.stop()
class AgentThread(StoppableThread, RunnerListener): def __init__(self, threadId, sess, graph): StoppableThread.__init__(self) self.threadId = threadId self.sess = sess self.graph = graph with self.graph.as_default(): if args.game == "Grid": env = GridEnv() else: env = gym_env(args.game) env = Penalizer(env) proproc = None rewproc = None q_model = CartPoleModel(modelOps) q_model.model_update = model.model q_model.set_weights(model.get_weights()) summary_writer = tf.summary.FileWriter( args.logdir + '/thread-' + str(threadId), K.get_session().graph) if not args.logdir is None else None agentOps = DqnAgentOps() agentOps.double_dqn = args.double_dqn agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 20 agentOps.REPLAY_START_SIZE = 1 #agentOps.INITIAL_EXPLORATION = 0 agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 1e10 #replay_buffer = ReplayBuffer(int(1e6), 4, 4, agentOps.REPLAY_START_SIZE, 32) replay_buffer = NStepBuffer(modelOps.AGENT_HISTORY_LENGTH, args.nstep) agent = DqnAgent(env.action_space, q_model, replay_buffer, rewproc, agentOps, summary_writer, model_eval=model_eval) # egreedyOps = EGreedyOps() egreedyOps.REPLAY_START_SIZE = 1 egreedyOps.FINAL_EXPLORATION_FRAME = 5000 egreedyOps.FINAL_EXPLORATION = 0.01 egreedyOps.DECAY = 0.999 egreedyAgent = MultiEGreedyAgent(env.action_space, egreedyOps, agent, [0.4, 0.3, 0.3], [0.1, 0.01, 0.5]) self.runner = Runner(env, egreedyAgent, proproc, modelOps.AGENT_HISTORY_LENGTH) self.runner.listen(replay_buffer, proproc) self.runner.listen(agent, None) self.runner.listen(egreedyAgent, None) self.runner.listen(self, proproc) pass def run(self): with self.graph.as_default(): self.runner.run() def on_step(self, ob, action, next_ob, reward, done): global T global model, model_eval with tLock: T = T + 1 #if T % 1000 == 0: # print('STEP', T) if T % target_network_update_freq == 0: print('CLONE TARGET') model_eval.set_weights(model.get_weights()) for agent in agents: agent.model_eval = model_eval if T % args.render_step == 0 and ENABLE_RENDER: viewer.imshow( np.repeat(np.reshape(ob, ob.shape + (1, )), 3, axis=2)) if T % SAVE_FREQ == 0 and args.mode == "train": if not args.output_dir is None: model.model.save_weights(args.output_dir + '/weights_{0}.h5'.format(T)) #print(T) def stop(self): super(AgentThread, self).stop() self.runner.stop()
def run(conf): cf = ConfigParser.ConfigParser() cf.read(conf) runner = Runner(cf) runner.run()
from runner.runner import Runner from algorithm.collective_algorithm import RealTSPAlgorithm runner = Runner(RealTSPAlgorithm()) runner.load() runner.run()