def run_thread(agent_cls, map_name, visualize): with sc2_env.SC2Env(map_name=map_name, step_mul=FLAGS.step_mul, game_steps_per_episode=FLAGS.game_steps_per_episode, feature_screen_size=FLAGS.screen_resolution, feature_minimap_size=FLAGS.minimap_resolution, visualize=visualize, use_feature_units=True) as env: env = available_actions_printer.AvailableActionsPrinter(env) agent = agent_cls() agent_name = FLAGS.agent_file # set the path to save the models and graphs path = 'models/' + agent_name # restore the model only if u have the previously trained a model #agent.dqn.load_model(path) # run the steps run_loop.run_loop([agent], env, FLAGS.max_agent_steps) # save the model agent.dqn.save_model(path, 1) # plot cost and reward agent.dqn.plot_cost(path, save=True) agent.plot_reward(path, save=True) agent.plot_player_hp(path, save=True) agent.plot_enemy_hp(path, save=True) if FLAGS.save_replay: env.save_replay(agent_cls.__name__)
def run_thread(agent_cls, map_name, visualize): with sc2_env.SC2Env(map_name=map_name, step_mul=FLAGS.step_mul, game_steps_per_episode=FLAGS.game_steps_per_episode, feature_screen_size=FLAGS.screen_resolution, feature_minimap_size=FLAGS.minimap_resolution, visualize=visualize, use_feature_units=True) as env: env = available_actions_printer.AvailableActionsPrinter(env) agent = agent_cls() # set the path to save the models and graphs path1 = 'models/' path2 = 'graphs/' # restore the model only if u have the previously trained a model if LOAD_MODEL: agent.ddpg.load_model(path1) # run the steps run_loop.run_loop([agent], env, FLAGS.max_agent_steps) # save the model if SAVE_MODEL: agent.ddpg.save_model(path1, 1) # plot cost and reward agent.ddpg.plot_cost(path2, save=SAVE_PIC) agent.ddpg.plot_reward(path2, save=SAVE_PIC) agent.plot_hp(path2, save=SAVE_PIC) if FLAGS.save_replay: env.save_replay(agent_cls.__name__)
def evaluate_k_episodes_and_avg(agent, env, k=5, write_results=False): # Evaluate multiple episodes and compute scores agent.training = False avg_score, max_score, min_score = 0, -1, 100 print("Evaluating...") for epi in range(k): print("Running episode {}/{}.".format(epi, k)) for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS): if not is_done: continue obs = recorder[0].observation score = obs["score_cumulative"].score avg_score += score max_score = max(max_score, score) min_score = min(min_score, score) break avg_score /= k print("Max/Min/Avg score: {} / {} / {}".format(max_score, min_score, avg_score)) agent.training = True # write results to score_log file for visualization if write_results: log_fn = SCORE_LOG + '/log.dat' with open(log_fn, 'a') as fout: fout.write('\t'.join( map(str, [COUNTER, max_score, min_score, avg_score])) + '\n') return avg_score
def run_thread(agent_classes, players, map_name, visualize): """Run one thread worth of the environment with agents.""" with sc2_env.SC2Env( map_name=map_name, players=players, agent_interface_format=sc2_env.parse_agent_interface_format( feature_screen=FLAGS.feature_screen_size, feature_minimap=FLAGS.feature_minimap_size, rgb_screen=FLAGS.rgb_screen_size, rgb_minimap=FLAGS.rgb_minimap_size, action_space=FLAGS.action_space, use_feature_units=FLAGS.use_feature_units), step_mul=FLAGS.step_mul, game_steps_per_episode=FLAGS.game_steps_per_episode, visualize=visualize) as env: env = available_actions_printer.AvailableActionsPrinter(env) agents = [agent_cls() for agent_cls in agent_classes] run_loop(agents, env, FLAGS.max_agent_steps, FLAGS.max_episodes, FLAGS.feature_screen_size, FLAGS.save_replay) if FLAGS.save_replay: env.save_replay(agent_classes[0].__name__)
def run_thread(agent_classes, players, map_name, visualize): """Run one thread worth of the environment with agents.""" SNAPSHOT = FLAGS.snapshot_path+FLAGS.map+'/'+FLAGS.net LOG = FLAGS.log_path+FLAGS.map+'/'+FLAGS.net if not os.path.exists(LOG): os.makedirs(LOG) with sc2_env.SC2Env( map_name=map_name, players=players, agent_interface_format=sc2_env.parse_agent_interface_format( feature_screen=FLAGS.feature_screen_size, feature_minimap=FLAGS.feature_minimap_size, rgb_screen=FLAGS.rgb_screen_size, rgb_minimap=FLAGS.rgb_minimap_size, action_space=FLAGS.action_space, use_feature_units=FLAGS.use_feature_units), step_mul=FLAGS.step_mul, game_steps_per_episode=FLAGS.game_steps_per_episode, disable_fog=FLAGS.disable_fog, visualize=False) as env: env = available_actions_printer.AvailableActionsPrinter(env) # wouldnt work for agent vs bot agents = [agent_cls(int(FLAGS.feature_minimap_size.x),int(FLAGS.feature_screen_size.x),LOG) for agent_cls in agent_classes] # run_loop.run_loop(agents, env, FLAGS.max_agent_steps, FLAGS.max_episodes) replay_buffer = [] for recorder, is_done in run_loop.run_loop(agents, env, FLAGS.max_agent_steps, FLAGS.max_episodes): if FLAGS.training: replay_buffer.append(recorder) if is_done: counter = 0 # with LOCK: global COUNTER COUNTER += 1 counter = COUNTER # Learning rate schedule learning_rate = FLAGS.learning_rate * (1 - 0.9 * counter / FLAGS.max_steps) # print(replay_buffer) agents[0].update(replay_buffer, FLAGS.discount, learning_rate, counter) if counter % FLAGS.snapshot_step == 1: agents[0].save_model() replay_buffer = [] # if counter % FLAGS.snapshot_step == 1: # agents[0].save_model(SNAPSHOT, counter) if counter >= FLAGS.max_steps: break elif is_done: obs = recorder[-1].observation score = obs["score_cumulative"][0] print('Your score is '+str(score)+'!') if FLAGS.save_replay: env.save_replay(agent_classes[0].__name__)
def run_thread(agent, map_name, agent_id=0): ''' set up and run sc2_env loop ''' try: while True: with sc2_env.SC2Env( map_name=FLAGS.map_name, step_mul=FLAGS.step_mul, visualize=FLAGS.render, players=PLAYERS, agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions( screen=FLAGS.screen_res, minimap=FLAGS.minimap_res), use_feature_units=True), game_steps_per_episode=0) as env: env = available_actions_printer.AvailableActionsPrinter(env) # Only for a single player! # snapshot_dir = SNAPSHOT+str(id) # if i need later replay_buffer = [] for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS): if FLAGS.training: replay_buffer.append(recorder) if is_done: counter = 0 with LOCK: global COUNTER COUNTER += 1 counter = COUNTER # Learning rate schedule learning_rate = FLAGS.learning_rate * ( 1 - 0.9 * counter / FLAGS.max_steps) agent.update(replay_buffer, FLAGS.gamma, learning_rate, counter) replay_buffer = [] if counter % FLAGS.snapshot_step == 1: agent.save_model(SNAPSHOT, counter) if counter >= FLAGS.max_steps: break elif is_done: obs = recorder[-1].observation score = obs["score_cumulative"][0] print('Your score is ' + str(score) + '!') # does this ever print? if FLAGS.save_replay: env.save_replay(agent.name) except KeyboardInterrupt: pass
def run_thread(agent_cls, map_name, visualize): with sc2_env.SC2Env(map_name=map_name, step_mul=FLAGS.step_mul, game_steps_per_episode=FLAGS.game_steps_per_episode, feature_screen_size=FLAGS.screen_resolution, feature_minimap_size=FLAGS.minimap_resolution, visualize=visualize, use_feature_units=True) as env: env = available_actions_printer.AvailableActionsPrinter(env) agent = agent_cls() agent_name = FLAGS.agent_file # set the path to save the models and graphs path = 'graphs/' # run the steps run_loop.run_loop([agent], env, FLAGS.max_agent_steps) agent.plot_hp(path, save=SAVE_PIC) if FLAGS.save_replay: env.save_replay(agent_cls.__name__)
def run_thread(agent, map_name, visualize): with sc2_env.SC2Env( map_name=map_name, players= [ sc2_env.Agent(sc2_env.Race.terran), #sc2_env.Agent(sc2_env.Race.zerg) #sc2_env.Bot(race=sc2_env.Race.zerg, difficulty=sc2_env.Difficulty.very_easy) ], agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=64, minimap=64), use_feature_units=True), step_mul=FLAGS.step_mul, visualize=False) as env: env = available_actions_printer.AvailableActionsPrinter(env) # Only for a single player! agent.set_action_spec(env.action_spec()) replay_buffer = [] #zerg_agent = ZergAgent() for recorder, is_done in run_loop([ agent, ], env, MAX_AGENT_STEPS): if FLAGS.training: replay_buffer.append(recorder) if is_done: counter = 0 with LOCK: global COUNTER COUNTER += 1 counter = COUNTER # Learning rate schedule learning_rate = FLAGS.learning_rate * ( 1 - 0.9 * counter / FLAGS.max_steps) agent.update(replay_buffer, FLAGS.discount, learning_rate, counter) replay_buffer = [] if counter % FLAGS.snapshot_step == 1: agent.save_model(SNAPSHOT, counter) print('saving model {}'.format(counter)) if counter >= FLAGS.max_steps: break elif is_done: obs = recorder[-1].observation score = obs["score_cumulative"][0] print('Your score is ' + str(score) + '!') if FLAGS.save_replay: env.save_replay(agent.name)
def run_thread(agent, map_name, visualize): players = [] agent_module, agent_name = FLAGS.agent.rsplit(".", 1) players.append( sc2_env.Agent(sc2_env.Race[FLAGS.agent_race], FLAGS.agent_name or agent_name)) with sc2_env.SC2Env( map_name=map_name, players=players, step_mul=FLAGS.step_mul, agent_interface_format=sc2_env.parse_agent_interface_format( feature_screen=FLAGS.feature_minimap_size, feature_minimap=FLAGS.feature_minimap_size, rgb_screen=FLAGS.rgb_screen_size, rgb_minimap=FLAGS.rgb_minimap_size, action_space=FLAGS.action_space, use_feature_units=FLAGS.use_feature_units), visualize=visualize) as env: env = available_actions_printer.AvailableActionsPrinter(env) # Only for a single player! replay_buffer = [] for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS): if FLAGS.training: replay_buffer.append(recorder) if is_done: counter = 0 with LOCK: global COUNTER COUNTER += 1 counter = COUNTER # Learning rate schedule learning_rate = FLAGS.learning_rate * ( 1 - 0.9 * counter / FLAGS.max_steps) agent.update(replay_buffer, FLAGS.discount, learning_rate, counter) replay_buffer = [] if counter % FLAGS.snapshot_step == 1: agent.save_model(SNAPSHOT, counter) if counter >= FLAGS.max_steps: break elif is_done: obs = recorder[-1].observation score = obs["score_cumulative"][0] print('Your score is ' + str(score) + '!') if FLAGS.save_replay: env.save_replay(agent.name)
def run_thread(agent, map_name, visualize): with sc2_env.SC2Env(map_name=map_name, agent_race=FLAGS.agent_race, bot_race=FLAGS.bot_race, difficulty=FLAGS.difficulty, step_mul=FLAGS.step_mul, screen_size_px=(FLAGS.screen_resolution, FLAGS.screen_resolution), minimap_size_px=(FLAGS.minimap_resolution, FLAGS.minimap_resolution), visualize=visualize) as env: env = available_actions_printer.AvailableActionsPrinter(env) # Only for a single player! replay_buffer = [] for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS): if FLAGS.training: replay_buffer.append(recorder) if is_done: counter = 0 with LOCK: global COUNTER COUNTER += 1 counter = COUNTER # Learning rate schedule learning_rate = FLAGS.learning_rate * ( 1 - 0.9 * counter / FLAGS.max_steps) agent.update(replay_buffer, FLAGS.discount, learning_rate, counter) replay_buffer = [] if counter % FLAGS.snapshot_step == 1: agent.save_model(SNAPSHOT, counter) if counter >= FLAGS.max_steps: break if counter % 5 == 0: obs = recorder[-1].observation score = obs["score_cumulative"][0] f = open('scorelog', 'a') f.write(str(counter) + ' ' + str(score) + '\n') f.close() elif is_done: obs = recorder[-1].observation score = obs["score_cumulative"][0] print('Your score is ' + str(score) + '!') if FLAGS.save_replay: env.save_replay(agent.name)
def run_thread(agent, map_name, visualize): with sc2_env.SC2Env( map_name=map_name, agent_race=FLAGS.agent_race, bot_race=FLAGS.bot_race, difficulty=FLAGS.difficulty, step_mul=FLAGS.step_mul, screen_size_px=(FLAGS.screen_resolution, FLAGS.screen_resolution), minimap_size_px=(FLAGS.minimap_resolution, FLAGS.minimap_resolution), visualize=visualize) as env: env = available_actions_printer.AvailableActionsPrinter(env) #TODO don't discard replay_buffer, sample (20) from (2000) pc_buffer = deque() if FLAGS.training: for recorder, is_done in random_run_loop( env, REPLAY_BUFFER_SIZE) pc_buffer.append(recorder) if is_done: break pc_buffer[-1][3][0].last() = True # Only for a single player! env.reset() replay_buffer = [] for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS): pc_buffer.append(recorder) pc_buffer. popleft() replay_buffer.append(recorder) if is_done: counter = 0 with LOCK: global COUNTER COUNTER += 1 counter = COUNTER # Learning rate schedule learning_rate = FLAGS.learning_rate * (1 - 0.9 * counter / FLAGS.max_steps) agent.update(replay_buffer, FLAGS.discount, pc_buffer, learning_rate, counter) replay_buffer = [] if counter % FLAGS.snapshot_step == 1: agent.save_model(SNAPSHOT, counter) if counter >= FLAGS.max_steps: break elif is_done: obs = recorder[-1].observation score = obs["score_cumulative"][0] print('Your score is '+str(score)+'!') if FLAGS.save_replay: env.save_replay(agent.name)
def run_thread(agent, players, map_name, visualize): """Run one thread worth of the environment with agents.""" with sc2_env.SC2Env( map_name=map_name, players=players, agent_interface_format=sc2_env.parse_agent_interface_format( feature_screen=FLAGS.feature_screen_size, feature_minimap=FLAGS.feature_minimap_size, rgb_screen=FLAGS.rgb_screen_size, rgb_minimap=FLAGS.rgb_minimap_size, action_space=None, use_feature_units=FLAGS.use_feature_units, use_raw_units=FLAGS.use_raw_units), step_mul=FLAGS.step_mul, game_steps_per_episode=FLAGS.game_steps_per_episode, visualize=visualize) as env: env = available_actions_printer.AvailableActionsPrinter(env) replay_buffer = [] for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS): if FLAGS.training: replay_buffer.append(recorder) if is_done: counter = 0 with LOCK: global COUNTER COUNTER += 1 counter = COUNTER # Learning rate schedule learning_rate = FLAGS.learning_rate * ( 1 - 0.9 * counter / FLAGS.max_steps) agent.update(replay_buffer, FLAGS.discount, learning_rate, counter) replay_buffer = [] if counter % FLAGS.output_step == 1: agent.save_model(OUTPUT, counter) if counter >= FLAGS.max_steps: break obs = recorder[-1].observation score = obs["score_cumulative"][0] print('Your score is ' + str(score) + '!') elif is_done: obs = recorder[-1].observation score = obs["score_cumulative"][0] print('Your score is ' + str(score) + '!') if FLAGS.save_replay: env.save_replay(agent.name)
def train_one_episode(agent, env): # Step & update weights in one episode # Only for a single player! global COUNTER replay_buffer = [] for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS, COUNTER): replay_buffer.append(recorder) if not is_done: continue counter = 0 with LOCK: COUNTER += 1 counter = COUNTER # Learning rate schedule learning_rate = FLAGS.learning_rate * (1 - 0.9 * counter / FLAGS.max_steps) agent.update(replay_buffer, FLAGS.discount, learning_rate, counter) break
def run_thread(agent, map_name, visualize): with sc2_env.SC2Env( map_name=map_name, agent_race=FLAGS.agent_race, bot_race=FLAGS.bot_race, difficulty=FLAGS.difficulty, step_mul=FLAGS.step_mul, agent_interface_format=sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=64, minimap=64)), visualize=visualize) as env: env = available_actions_printer.AvailableActionsPrinter(env) # Only for a single player! replay_buffer = [] for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS): if FLAGS.training: replay_buffer.append(recorder) if is_done: counter = 0 with LOCK: global COUNTER COUNTER += 1 counter = COUNTER # Learning rate schedule learning_rate = FLAGS.learning_rate * (1 - 0.9 * counter / FLAGS.max_steps) agent.update(replay_buffer, FLAGS.discount, learning_rate, counter) replay_buffer = [] if counter % FLAGS.snapshot_step == 1: agent.save_model(SNAPSHOT, counter) if counter >= FLAGS.max_steps: break elif is_done: obs = recorder[-1].observation score = obs["score_cumulative"][0] print('Your score is '+str(score)+'!') if FLAGS.save_replay: env.save_replay(agent.name)
def run_thread(agent, map_name, visualize): with sc2_env.SC2Env( map_name=map_name, agent_race=FLAGS.agent_race, bot_race=FLAGS.bot_race, difficulty=FLAGS.difficulty, step_mul=FLAGS.step_mul, screen_size_px=(FLAGS.screen_resolution, FLAGS.screen_resolution), minimap_size_px=(FLAGS.minimap_resolution, FLAGS.minimap_resolution), visualize=visualize) as env: env = available_actions_printer.AvailableActionsPrinter(env) # Only for a single player! replay_buffer = [] for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS): if FLAGS.training: replay_buffer.append(recorder) if is_done: counter = 0 with LOCK: global COUNTER COUNTER += 1 counter = COUNTER # Learning rate schedule learning_rate = FLAGS.learning_rate * (1 - 0.9 * counter / FLAGS.max_steps) agent.update(replay_buffer, FLAGS.discount, learning_rate, counter) replay_buffer = [] if counter % FLAGS.snapshot_step == 1: agent.save_model(SNAPSHOT, counter) if counter >= FLAGS.max_steps: break elif is_done: obs = recorder[-1].observation score = obs["score_cumulative"][0] print('Your score is '+str(score)+'!') if FLAGS.save_replay: env.save_replay(agent.name)
def run_thread(agent, map_name, visualize, mlsh=False): scores = list() logger.info('Launching new SC2 environment...') with sc2_env.SC2Env(map_name=map_name, agent_race=FLAGS.agent_race, bot_race=FLAGS.bot_race, difficulty=FLAGS.difficulty, step_mul=FLAGS.step_mul, screen_size_px=(FLAGS.screen_resolution, FLAGS.screen_resolution), minimap_size_px=(FLAGS.minimap_resolution, FLAGS.minimap_resolution), visualize=visualize) as env: env = available_actions_printer.AvailableActionsPrinter(env) logger.info('New SC2 environment launched successfully') logger.info('Minigame: %s', map_name) ep_counter = 0 # counts episode for this particular thread replay_buffer = [ ] # will get observations of each step during an episode to learn once episode is done for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS, mlsh=mlsh, warmup=FLAGS.warmup_len, joint=FLAGS.joint_len): if FLAGS.training: replay_buffer.append(recorder) if is_done: # end of an episode, agent has interacted with env and now we learn from the "replay" counter = 0 with LOCK: # counter counts episode accross all threads: global COUNTER COUNTER += 1 counter = COUNTER # Learning rate schedule learning_rate = FLAGS.learning_rate * ( 1 - 0.9 * counter / FLAGS.max_steps) agent.update(replay_buffer, FLAGS.discount, learning_rate, counter) replay_buffer = [] if counter % FLAGS.snapshot_step == 1: logger.info('Saving model to %s', SNAPSHOT) agent.save_model(SNAPSHOT, counter) if counter >= FLAGS.max_steps: break if is_done: ep_counter += 1 obs = recorder[-1].observation score = obs["score_cumulative"][0] scores.append(score) # ep_counter is logger.info( '[Episode %s] Episode score: %.2f, mean score: %.2f, max score: %.2f', ep_counter, score, np.mean(scores[-300:]), np.max(scores)) if FLAGS.save_replay: env.save_replay(agent.name)
def run_thread(agent, players, map_name, visualize): global COUNTER with sc2_env.SC2Env( map_name=map_name, players=players, step_mul=FLAGS.step_mul, #screen_size_px=(FLAGS.screen_resolution, FLAGS.screen_resolution), #minimap_size_px=(FLAGS.minimap_resolution, FLAGS.minimap_resolution), agent_interface_format=sc2_env.parse_agent_interface_format( feature_screen=FLAGS.feature_screen_size, feature_minimap=FLAGS.feature_minimap_size, rgb_screen=FLAGS.rgb_screen_size, rgb_minimap=FLAGS.rgb_minimap_size, action_space=FLAGS.action_space), game_steps_per_episode=FLAGS.game_steps_per_episode, visualize=visualize) as env: env = available_actions_printer.AvailableActionsPrinter(env) max_avg_score = 0. # pure evaluation if not FLAGS.training: evaluate_k_episodes_and_avg(agent, env, k=20) exit(0) # Only for a single player! replay_buffer = [] for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS, COUNTER): if FLAGS.training: replay_buffer.append(recorder) if is_done: counter = 0 with LOCK: COUNTER += 1 counter = COUNTER # Learning rate schedule learning_rate = FLAGS.learning_rate * ( 1 - 0.9 * counter / FLAGS.max_steps) agent.update(replay_buffer, FLAGS.discount, learning_rate, counter) replay_buffer = [] if counter % FLAGS.snapshot_step == 1: agent.save_model(SNAPSHOT, counter) if counter >= FLAGS.max_steps: break # eval with interval if COUNTER % FLAGS.evaluate_every == 1 and COUNTER >= 0: avg_sc = evaluate_k_episodes_and_avg( agent, env, k=10, write_results=True) if avg_sc > max_avg_score: max_avg_score = avg_sc agent.save_model(SNAPSHOT, COUNTER) print( "Current max average score: {}".format(max_avg_score)) elif is_done: obs = recorder[-1].observation score = obs["score_cumulative"][0] print('Your score is ' + str(score) + '!') if FLAGS.save_replay: env.save_replay(agent.name)
def run_thread(agent, players, map_name, visualize): """Run one thread worth of the environment with agents.""" with sc2_env.SC2Env( map_name=map_name, players=players, agent_interface_format=sc2_env.parse_agent_interface_format( feature_screen=FLAGS.feature_minimap_size, feature_minimap=FLAGS.feature_minimap_size, rgb_screen=FLAGS.rgb_screen_size, rgb_minimap=FLAGS.rgb_minimap_size, action_space=FLAGS.action_space, use_feature_units=FLAGS.use_feature_units), step_mul=FLAGS.step_mul, game_steps_per_episode=FLAGS.game_steps_per_episode, disable_fog=FLAGS.disable_fog, visualize=visualize) as env: env = available_actions_printer.AvailableActionsPrinter(env) #agents = [agent_cls() for agent_cls in agent_classes] start_at = 0 global total_score replay_buffer = [] for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS): if FLAGS.training: replay_buffer.append(recorder) if is_done: counter = 0 obs = recorder[-1].observation score = obs["score_cumulative"][0] with LOCK: global COUNTER COUNTER += 1 if start_at == 0: start_at = COUNTER counter = COUNTER total_score += score mean_score = total_score/(COUNTER - start_at) # Learning rate schedule learning_rate = FLAGS.learning_rate * (1 - 0.9 * counter / FLAGS.max_steps) agent.update(replay_buffer, FLAGS.discount, learning_rate, counter) replay_buffer = [] if counter % FLAGS.snapshot_step == 1: agent.save_model(SNAPSHOT, counter) if counter >= FLAGS.max_steps: break # i want a diagram! summary = tf.Summary() summary.value.add(tag='episode_score', simple_value=score) summary_writer.add_summary(summary, COUNTER) summary.value.add(tag='mean_score', simple_value=mean_score) summary_writer.add_summary(summary, COUNTER) logging.info("Your score is: %s, mean score is %s !", str(score), str(mean_score)) #print('Your score is '+str(score)+'!') elif is_done: start_at += 1 obs = recorder[-1].observation score = obs["score_cumulative"][0] total_score += score mean_score = total_score/start_at # i want a diagram! summary = tf.Summary() summary.value.add(tag='episode_score', simple_value=score) summary_writer.add_summary(summary, COUNTER) summary.value.add(tag='mean_score', simple_value=mean_score) summary_writer.add_summary(summary, COUNTER) logging.info("Your score is: %s, mean score is %s !", str(score), str(mean_score)) if FLAGS.save_replay: env.save_replay(agent.name)
print('action space: {}'.format(env.action_space)) print('act low/high: {} {}'.format(env.action_space.low, env.action_space.high)) print('obs low/high: {} {}'.format(env.observation_space.low, env.observation_space.high)) observers = _build_observers(env) exploration_rate = decaying_value.DecayingValue(0.2, 0.1, TRAIN_EPISODES) replay_buffer = memory.Memory(100000, env.observation_space.high.shape) positive_demos = memory.from_demonstrations('positive_demos/', env.observation_space.high.shape) # negative_demos = memory.from_demonstrations('negative_demos/', env.observation_space.high.shape) agent = ddpg_agent.DDPGAgent(env.action_space, env.observation_space, exploration_rate, replay_buffer, positive_demos, None) agent.pretrain_actor(2000) agent.pretrain_critic(2000) run_loop.run_loop(env, agent, TRAIN_EPISODES, MAX_STEPS_PER_EPISODE, observers) wait = raw_input("Finished Training") agent.set_learning(False) observers.append(observer.Renderer(env, 20.)) # agent = keyboard_agent.KeyboardAgent(env) run_loop.run_loop(env, agent, 10, None, observers)
def run_thread(agent, map_name, visualize, update_unitsel=True, max_steps=FLAGS.max_steps, update_main=True, use_unitsel=False, update_both=True): with sc2_env.SC2Env( map_name=map_name, agent_race=FLAGS.agent_race, bot_race=FLAGS.bot_race, difficulty=FLAGS.difficulty, step_mul=FLAGS.step_mul, screen_size_px=(FLAGS.screen_resolution, FLAGS.screen_resolution), minimap_size_px=(FLAGS.minimap_resolution, FLAGS.minimap_resolution), visualize=visualize) as env: env = available_actions_printer.AvailableActionsPrinter(env) # Only for a single player! replay_buffer = [] if agent_name == 'UnitSelAgent': for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS, use_unitsel): if FLAGS.training: replay_buffer.append(recorder) if is_done: counter = 0 with LOCK: global COUNTER, SAVE_COUNTER, MAIN_UPDATE_COUNTER, UNIT_SEL_COUNTER COUNTER += 1 SAVE_COUNTER += 1 counter = COUNTER if update_unitsel: UNIT_SEL_COUNTER += 1 if update_main: MAIN_UPDATE_COUNTER += 1 # Learning rate schedule learning_rate = FLAGS.learning_rate * (1 - 0.9 * counter / FLAGS.max_steps) if update_unitsel: agent.update_unitsel(replay_buffer, FLAGS.discount, learning_rate, counter) if update_main: agent.update_main_policy(replay_buffer, FLAGS.discount, learning_rate, counter) replay_buffer = [] if SAVE_COUNTER % FLAGS.snapshot_step == 1: agent.save_model(SNAPSHOT, SAVE_COUNTER) if counter >= max_steps: break if is_done: agent.reset_init_counter() obs = recorder[-1].observation score = obs["score_cumulative"][0] global SCORE_BUFFER, MAX_SCORE if len(SCORE_BUFFER) == 100: SCORE_BUFFER.pop(0) SCORE_BUFFER.append(score) if len(SCORE_BUFFER) == 100: avg_score = sum(SCORE_BUFFER) / 100 else: avg_score = 0 if score > MAX_SCORE: MAX_SCORE = score print('Agent have been trained for totally ' + str(SAVE_COUNTER) + ' times') print('Unit selector have been updated ' + str(UNIT_SEL_COUNTER) + ' times') print('Main policy have been updated ' + str(MAIN_UPDATE_COUNTER) + ' times') print('Your score is ' + str(score) + '!') print('Average score in the last 100 run:' + str(avg_score)) print('Maximum score is:' + str(MAX_SCORE)) else: for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS): if FLAGS.training: replay_buffer.append(recorder) if is_done: counter = 0 with LOCK: global COUNTER global SAVE_COUNTER global BOTH_UPDATE_COUNTER global MASTER_UPDATE_COUNTER COUNTER += 1 SAVE_COUNTER += 1 counter = COUNTER if update_both: BOTH_UPDATE_COUNTER += 1 else: MASTER_UPDATE_COUNTER += 1 # Learning rate schedule learning_rate = FLAGS.learning_rate * (1 - 0.9 * counter / FLAGS.max_steps) if update_both: agent.update_master_policy(replay_buffer, FLAGS.discount, learning_rate, counter) agent.update(replay_buffer, FLAGS.discount, learning_rate, counter) else: agent.update_master_policy(replay_buffer, FLAGS.discount, learning_rate, counter) replay_buffer = [] if SAVE_COUNTER % FLAGS.snapshot_step == 1: agent.save_model(SNAPSHOT, SAVE_COUNTER) if counter >= max_steps: break if is_done: obs = recorder[-1].observation score = obs["score_cumulative"][0] print('Agent have been trained for totally ' + str(SAVE_COUNTER) + ' times') print('Master have been updated ' + str(MASTER_UPDATE_COUNTER) + ' times') print('Both policies have been updated ' + str(BOTH_UPDATE_COUNTER) + ' times') print('Your score is ' + str(score) + '!') if FLAGS.save_replay: env.save_replay(agent.name)
if cur_iter == 0: print("agent average reward {}: {}".format(episode, self._episode_reward)) self._episode_reward = 0.0 def render_observer(env, agent, episode, cur_iter, obs, action, reward): env.render() time.sleep(0.025) def _build_observers(): observers = [] # observers.append(lambda env, agent, episode, cur_iter, obs, action, reward: ) observers.append(RewardTracker()) return observers # env = gym.make('MountainCar-v0') env = gym.make('CartPole-v0') exploration_rate = decaying_value.DecayingValue(1.0, 0.1, TRAIN_EPISODES) beta = decaying_value.DecayingValue(0.4, 1.0, TRAIN_EPISODES) memory = memory.Memory(50000, env.observation_space.high.shape, 0.6, beta) agent = dqn_learner.DQNLearner(env.action_space, env.observation_space, exploration_rate, memory) observers = _build_observers() run_loop.run_loop(env, agent, TRAIN_EPISODES, MAX_STEPS_PER_EPISODE, observers) wait = raw_input("Finished Training") agent.set_learning(False) run_loop.run_loop(env, agent, 2, None, [render_observer])