def main(): FLAGS(sys.argv) with sc2_env.SC2Env( map_name="DefeatZerglingsAndBanelings", step_mul=step_mul, visualize=True, players=[sc2_env.Agent(sc2_env.Race.terran)], agent_interface_format=sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=64, minimap=64)), game_steps_per_episode=steps * step_mul) as env: model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=True) demo_replay = [] act = dqfd.learn(env, q_func=model, num_actions=3, lr=1e-4, max_timesteps=10000000, buffer_size=100000, exploration_fraction=0.5, exploration_final_eps=0.01, train_freq=2, learning_starts=100000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=True, demo_replay=demo_replay) act.save("defeat_zerglings.pkl")
def main(): FLAGS(sys.argv) with sc2_env.SC2Env( "DefeatZerglingsAndBanelings", step_mul=step_mul, visualize=True, game_steps_per_episode=steps * step_mul) as env: model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=True ) demo_replay = [] act = dqfd.learn( env, q_func=model, num_actions=3, lr=1e-4, max_timesteps=10000000, buffer_size=100000, exploration_fraction=0.5, exploration_final_eps=0.01, train_freq=2, learning_starts=100000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=True, demo_replay=demo_replay ) act.save("defeat_zerglings.pkl")
def main(): FLAGS(sys.argv) logdir = "tensorboard" if(FLAGS.algorithm == "deepq"): logdir = "./tensorboard/zergling/%s/%s_%s_prio%s_duel%s_lr%s/%s" % ( FLAGS.algorithm, FLAGS.timesteps, FLAGS.exploration_fraction, FLAGS.prioritized, FLAGS.dueling, FLAGS.lr, start_time ) if(FLAGS.log == "tensorboard"): Logger.DEFAULT \ = Logger.CURRENT \ = Logger(dir='log.txt', output_formats=[TensorBoardOutputFormat(logdir)]) elif(FLAGS.log == "stdout"): os.mkdir(logdir) Logger.DEFAULT \ = Logger.CURRENT \ = Logger(dir=None, output_formats=[HumanOutputFormat(logdir+"/log.txt")]) with sc2_env.SC2Env( map_name="DefeatZerglingsAndBanelings", minimap_size_px = (FLAGS.minimap_size_px, FLAGS.minimap_size_px), step_mul=FLAGS.step_mul, visualize=FLAGS.visualize, game_steps_per_episode= FLAGS.episode_steps) as env: model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1), (64, 3, 1), (64, 3, 1), (32, 3, 1)], hiddens=[256], dueling=True ) act = dqfd.learn( env, q_func=model, num_actions=FLAGS.num_actions, lr=FLAGS.lr, print_freq= FLAGS.print_freq, max_timesteps=FLAGS.timesteps, buffer_size=FLAGS.buffer_size, exploration_fraction=FLAGS.exploration_fraction, exploration_final_eps=FLAGS.exploration_final_eps, train_freq=FLAGS.train_freq, learning_starts=FLAGS.learning_starts, target_network_update_freq=FLAGS.target_network_update_freq, gamma=FLAGS.gamma, prioritized_replay=FLAGS.prioritized, callback=deepq_callback ) act.save("defeat_zerglings.pkl")
def main(): FLAGS(sys.argv) logdir = "tensorboard" if (FLAGS.algorithm == "deepq"): logdir = "tensorboard/zergling/%s/%s_%s_prio%s_duel%s_lr%s/%s" % ( FLAGS.algorithm, FLAGS.timesteps, FLAGS.exploration_fraction, FLAGS.prioritized, FLAGS.dueling, FLAGS.lr, start_time) elif (FLAGS.algorithm == "acktr"): logdir = "tensorboard/zergling/%s/%s_num%s_lr%s/%s" % ( FLAGS.algorithm, FLAGS.timesteps, FLAGS.num_cpu, FLAGS.lr, start_time) if (FLAGS.log == "tensorboard"): Logger.DEFAULT \ = Logger.CURRENT \ = Logger(dir=None, output_formats=[TensorBoardOutputFormat(logdir)]) elif (FLAGS.log == "stdout"): Logger.DEFAULT \ = Logger.CURRENT \ = Logger(dir=None, output_formats=[HumanOutputFormat(sys.stdout)]) with sc2_env.SC2Env( map_name="DefeatZerglingsAndBanelings", step_mul=step_mul, visualize=True, agent_interface_format=sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=32, minimap=32)), game_steps_per_episode=steps * step_mul) as env: obs = env.reset() #print(obs[0].observation) model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=True) demo_replay = [] act = dqfd.learn(env, q_func=model, num_actions=3, lr=1e-4, max_timesteps=10000000, buffer_size=100000, exploration_fraction=0.5, exploration_final_eps=0.01, train_freq=2, learning_starts=100000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=True, callback=deepq_callback) act.save("defeat_zerglings.pkl")
def main(): FLAGS(sys.argv) logdir = "tensorboard" if(FLAGS.algorithm == "deepq"): logdir = "tensorboard/zergling/%s/%s_%s_prio%s_duel%s_lr%s/%s" % ( FLAGS.algorithm, FLAGS.timesteps, FLAGS.exploration_fraction, FLAGS.prioritized, FLAGS.dueling, FLAGS.lr, start_time ) elif(FLAGS.algorithm == "acktr"): logdir = "tensorboard/zergling/%s/%s_num%s_lr%s/%s" % ( FLAGS.algorithm, FLAGS.timesteps, FLAGS.num_cpu, FLAGS.lr, start_time ) if(FLAGS.log == "tensorboard"): Logger.DEFAULT \ = Logger.CURRENT \ = Logger(dir=None, output_formats=[TensorBoardOutputFormat(logdir)]) elif(FLAGS.log == "stdout"): Logger.DEFAULT \ = Logger.CURRENT \ = Logger(dir=None, output_formats=[HumanOutputFormat(sys.stdout)]) with sc2_env.SC2Env( map_name="DefeatZerglingsAndBanelings", step_mul=step_mul, visualize=True, game_steps_per_episode=steps * step_mul) as env: model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=True ) demo_replay = [] act = dqfd.learn( env, q_func=model, num_actions=3, lr=1e-4, max_timesteps=10000000, buffer_size=100000, exploration_fraction=0.5, exploration_final_eps=0.01, train_freq=2, learning_starts=100000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=True, callback=deepq_callback ) act.save("defeat_zerglings.pkl")