def test2_greedy(self): env = Environment("map_init.txt", "population_init.txt") state = EnvState(env, None, None, GamePlayId.P1) player1 = Greedy(GamePlayId.P1) player2 = PacifistAgent(GamePlayId.P2) player1.search(state, player2) print(player1.writeout_path())
def test3_Astar(self): env = Environment("map_init.txt", "population_init.txt") state = EnvState(env, None, None, GamePlayId.P1) player1 = AStar(GamePlayId.P2) player2 = PassiveAgent(GamePlayId.P1) res = player1.search(state, player2) print(player1.writeout_path())
def test_invade_1(self): env = Environment("map_init.txt", "population_init.txt") p2 = Human(GamePlayId.P2) p2.invade(env, 2, 3, 6) self.assertEqual(env.country_list[1].troops_count, 1) self.assertEqual(env.country_list[2].troops_count, 1) self.assertEqual(env.country_list[1].owner_id , GamePlayId.P2)
def test_invade_2(self): env = Environment("map_init.txt", "population_init.txt") p2 = Human(GamePlayId.P2) try: p2.invade(env, 2, 4, 1) except Exception as error: self.assertEqual(str(error), "Can't invade your own country")
def test_invade_3(self): env = Environment("map_init.txt", "population_init.txt") try: env.invade(GamePlayId.P2, 1, 4, 1) except Exception as error: self.assertEqual( str(error), "Can't invade no route from ( " + str(1) + " ) to ( " + str(4) + " )")
def test_march_5(self): env = Environment("map_init.txt", "population_init.txt") try: env.march_troops(GamePlayId.P1, 1, 5, 1) self.fail() except Exception as error: self.assertEqual( str(error), "Can't march no route from ( " + str(1) + " ) to ( " + str(5) + " )")
def test_march_4(self): env = Environment("map_init.txt", "population_init.txt") try: env.march_troops(GamePlayId.P1, 1, 3, 13) self.fail() except Exception as error: self.assertEqual( str(error), "Not enough troops to march from ( " + str(1) + " ) to ( " + str(3) + " ) troops are in country are " + str(7))
def __init__(self, input_stream: TextIOBase, output_stream: TextIOBase, debug=False): self.env = Environment() extend_environment(self.env) self.handlers_net = HandlersNet(self.env, input_stream, output_stream) if debug: enable_debug_logging()
def test_copy(self): env = Environment("map_init.txt", "population_init.txt") env_c = copy.deepcopy(env) env_c.country_list[0].owner_id = None env_c.border_list[0].country1 = None env_c.continent_list[0].owner_id = None self.assertNotEqual(env.country_list[0].owner_id, None) self.assertNotEqual(env.border_list[0].country1, None) self.assertNotEqual(env.continent_list[0].owner_id, None)
def test1_RTAstar(self): env = Environment("map_init.txt", "population_init.txt") state = EnvState(env, None, None, GamePlayId.P1) player1 = RTAStar(GamePlayId.P1) state = player1.search(state, MoveType.DEPLOY) print(state.env.change, state) state = player1.search(state, MoveType.MARCH) print(state.env.change, state) state = player1.search(state, MoveType.INVADE) print(state.env.change, state)
def test_march_3(self): env = Environment("map_init.txt", "population_init.txt") p1 = Human(GamePlayId.P1) try: p1.march_troops(env, 1, 4, 1) self.fail() except Exception as error: self.assertEqual(str(error), "Can't march troops to unowned country : country owner (" + str(GamePlayId.P2) + ") ," + "player (" + str(GamePlayId.P1) + ")")
def test_march_2(self): env = Environment("map_init.txt", "population_init.txt") try: env.march_troops(GamePlayId.P1, 2, 3, 1) self.fail() except Exception as error: self.assertEqual( str(error), "Can't march troops from unowned country : country owner (" + str(GamePlayId.P2) + ") ," + "player (" + str(GamePlayId.P1) + ")")
def test_deploy_1(self): env = Environment("map_init.txt", "population_init.txt") p1 = Human(GamePlayId.P1) p2 = Human(GamePlayId.P2) p1.deploy_reserve_troops(env, 1, 1) try: p2.deploy_reserve_troops(env, 1, 1) self.fail() except Exception as error: self.assertEqual(str(error), "Can't deploy troops to unowned country : country owner (" + str(GamePlayId.P1) + ") ," + "player " + str(GamePlayId.P2) + ")")
def test_deploy_1(self): env = Environment("map_init.txt", "population_init.txt") env.reserve_1 = 1 env.deploy_reserve_troops(GamePlayId.P1, 1) try: env.deploy_reserve_troops(GamePlayId.NONE, 1) self.fail() except Exception as error: self.assertEqual( str(error), "Can't deploy troops to unowned country : country owner (" + str(GamePlayId.P1) + ") ," + "player " + str(GamePlayId.NONE) + ")")
def __init__(self, player1_type, player2_type): self.player1 = self.get_player(player1_type, GamePlayId.P1) self.player2 = self.get_player(player2_type, GamePlayId.P2) self.env = Environment("/server_files/map_init.txt", "/server_files/population_init.txt") self.env.reserve_1 = 2 self.env.reserve_2 = 2 self.turn = True self.state = EnvState(self.env, None, None, GamePlayId.P1) self.history = [] self.history_counter = 0 if player1_type == "astar": self.history = self.player1.search(self.state, self.player2) elif player1_type == "greedy": self.history = self.player1.search(self.state, self.player2) elif player2_type == "astar": self.history = self.player2.search(self.state, self.player1) elif player2_type == "greedy": self.history = self.player2.search(self.state, self.player1)
class ShellTransformerTest(unittest.TestCase): command_factory = CommandFactory(Environment()) def setUp(self) -> None: self.parser = ShellParser(ShellTransformerTest.command_factory) def parse(self, string): return self.parser.parse(string) def testEmpty(self): result = self.parse('') self.assertEqual(0, len(result)) def testEcho(self): result = self.parse('echo 123 | echo "hey" | echo 7') self.assertEqual(3, len(result)) args = ['123', 'hey', '7'] for command, arg in zip(result, args): self.assertTrue(isinstance(command, EchoCommand)) self.assertEqual([arg], command.args) def testExit(self): result = self.parse('exit') self.assertEqual(1, len(result)) command = result[0] self.assertTrue(isinstance(command, ExitCommand)) @parameterized.expand([ ('echo', 'echo 123', EchoCommand, ['123']), ('exit', 'exit', ExitCommand, []), ('cat', 'cat main.py', CatCommand, ['main.py']), ('custom command', '/bin/sh main.sh', CustomCommand, ['/bin/sh', 'main.sh']), ('equality', 'x = 3', AssignmentCommand, ['x', '3']), ('pwd', 'pwd', PwdCommand, []), ('wc', 'wc main.py', WcCommand, ['main.py']), ]) def test(self, _, string, command_type, args): result = self.parse(string) self.assertEqual(1, len(result)) command = result[0] self.assertTrue(isinstance(command, command_type)) self.assertEqual(args, command.args)
def test_deploy_2(self): env = Environment("map_init.txt", "population_init.txt") p1 = Human(GamePlayId.P1) p1.deploy_reserve_troops(env, 1, 1) self.assertEqual(env.country_list[0].troops_count, 4)
def test_init(self): env = Environment("map_init.txt", "population_init.txt") for i, continent in enumerate(env.continent_list): for country_id in continent.country_list: self.assertEqual(env.country_list[country_id - 1].continent_id, i + 1)
def optimize_joint(system_nn, pol_nn, log_writer, **kwargs): # unpack kwargs horizon = kwargs.get("horizon") nb_iterations = kwargs.get("nb_iterations") batch_size = kwargs.get("batch_size") policy_fit = kwargs.get("policy", False) system_fit = kwargs.get("system", False) mc_samples = kwargs.get("mc_samples", 128) env = Environment(system_nn) agent = Agent(pol_nn, env, horizon) # Optimizers parameters_list = [] if policy_fit: parameters_list = parameters_list + list(pol_nn.parameters()) if system_fit: parameters_list = parameters_list + list(system_nn.parameters()) if parameters_list: lr = kwargs.get("learning_rate", .001) optimizer = Adam(parameters_list, lr=lr) for it in range(nb_iterations): loss = {} params = {} # set gradient to zero optimizer.zero_grad() # generate the batch _, states_batch, dist_batch, _, oha_batch, rew_batch = agent.sample_trajectory( batch_size) # Loss # system_loss = system_error(system_nn, pol_nn, states_batch, dist_batch, oha_batch, rew_batch) system_loss.backward(retain_graph=policy_fit) optimizer.step() system_nn.project_parameters() pol_nn.project_parameters() if system_fit and log_writer is not None: params['system'] = system_nn.unwrapped.named_parameters() log_writer.add_system_parameters(system_nn.parameters_dict(), step=it) if policy_fit and log_writer is not None: params['policy'] = pol_nn.named_parameters() actions = pol_nn( states_batch) # (B, H, A), need to stack along the B dim log_writer.add_policy_histograms(actions.view( -1, actions.shape[2]), step=it) if log_writer is not None: loss['loss'] = system_loss.item() log_writer.add_grad_histograms(params, step=it) log_writer.add_loss(loss, step=it) # performance of the agent on the epoch ep_perf, return_estimate = agent.avg_performance(mc_samples) log_writer.add_expected_return(ep_perf, step=it) log_writer.add_return(return_estimate, step=it) return env, agent
def setUp(self) -> None: self.parser = SubstitutionParser() self.env = Environment()
def setUp(self) -> None: self.env = Environment()
def setUp(self) -> None: self.environment = Environment() self.input_stream = io.StringIO() self.output_stream = io.StringIO()
def main(): RECORD_GIF = True V_LEN = 500 # record last 500 frames V_N = 3 # record v of every n'th frame MEMORY_SIZE = 3 SCREEN_REGION = (3, 33, 1024, 606) # RECORD_REGION = (3,33,1024,606) # record for gif RECORD_REGION = (3, 33, 1624, 606) try: OD_init = os.path.join(os.getcwd(), 'logdir', 'OD') AE_init = os.path.join(os.getcwd(), 'logdir', 'AE', '0.0001_64', 'saved_model', 'model.ckpt') SL_init = os.path.join(os.getcwd(), 'logdir', 'SL', 'EXPERT_0.001_512_3', 'saved_model', 'model.ckpt') OD = OD_network(init_folder_path=OD_init) AE = AE_network(init_model_path=AE_init, testing=True) SL = SL_network(init_model_path=SL_init, testing=True, memory_size=MEMORY_SIZE) GIF = [] GIF_N = 4 # every N frames record GIF_K = 3 # times lower resolution GIF_counter = 0 plot = LivePlot(MAXLEN=V_LEN, N=V_N) state_memory = deque(maxlen=MEMORY_SIZE) env = Environment(SCREEN_REGION) new_frame, done = env.reset() paused = True while True: if not paused: GIF_counter += 1 start_time = time.time() if RECORD_GIF: if GIF_counter > GIF_N - 1: GIF_counter = 0 r_screen = grab_screen(RECORD_REGION) new_h, new_w = int(r_screen.shape[0] / GIF_K), int( r_screen.shape[1] / GIF_K) GIF.append(cv2.resize(r_screen, (new_w, new_h))) # GIF.append(r_screen) # run OD and AE nn's objects, features = OD.forward(new_frame) encoded_features = AE.forward(features) s = concat_outputs(objects, encoded_features, OD) state_memory.append(s[0]) if len(state_memory) > MEMORY_SIZE - 1: s_input = np.expand_dims(np.array(state_memory), axis=0) # (1,m,261) a_probs, v_normalized = SL.forward(s_input) # (1,nA) (1,1) v = v_normalized * (500 + 600) - 600 plot.emit(v[0][0]) # (1,1) -> (,) a = probs_to_onehot_action(a_probs) else: a = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1]) # 'nothing' action new_frame, r, done = env.step(a) if len(state_memory) > MEMORY_SIZE - 1: total_time = time.time() - start_time a_probs = np.array2string( a_probs, formatter={'float_kind': lambda x: "%.2f" % x}) print('time:{:>20} a:{}'.format(total_time, a_probs), end='\r') else: time.sleep(0.3) print('paused') paused, GIF = check_pause(paused, GIF, RECORD_GIF) except KeyboardInterrupt: env.stop() sys.exit()
def test_march_1(self): env = Environment("map_init.txt", "population_init.txt") p1 = Human(GamePlayId.P1) p1.march_troops(env, 1, 3, 1) self.assertEqual(env.country_list[0].troops_count, 2) self.assertEqual(env.country_list[2].troops_count, 6)
def main(): SUMM_FOLDER_NAME = 'original' PLOT_LEN = 500 PLOT_N = 3 # skip N frames MEMORY_SIZE = 3 SCREEN_REGION = (3, 33, 1024, 606) BATCHES = 64 EPISODE_LENGTH = 20 REPLAY_MEMORY_SIZE = 100 # total=this*episode_length try: plot = LivePlot(MAXLEN=PLOT_LEN, N=PLOT_N) OD_init = os.path.join(os.getcwd(), 'logdir', 'OD') AE_init = os.path.join(os.getcwd(), 'logdir', 'AE', '0.0001_64', 'saved_model', 'model.ckpt') RL_init = None RL_LOGDIR = os.path.join(os.getcwd(), 'logdir', 'RL', SUMM_FOLDER_NAME) OD = OD_network(init_folder_path=OD_init) AE = AE_network(init_model_path=AE_init, testing=True) RL = RL_network(LOGDIR=RL_LOGDIR, memory_size=MEMORY_SIZE, init_model_path=RL_init) env = Environment(SCREEN_REGION) REPLAY_MEMORY = deque(maxlen=REPLAY_MEMORY_SIZE) while True: new_frame, done = env.reset() state_memory = deque(maxlen=MEMORY_SIZE) r_array = [] # (n,) v_array = [] # (n,) s_array = [] # (n,[5,261]) a_array = [] # (n,[10,]) start_time = time.time() for counter in range(EPISODE_LENGTH + MEMORY_SIZE + 1): # +1 for last v # calculate current s objects, features = OD.forward(new_frame) encoded_features = AE.forward(features) s = concat_outputs(objects, encoded_features, OD) # (1,261) s = s[0] # (261) state_memory.append(s) # calc next a if len(state_memory) > MEMORY_SIZE - 1: s_input = np.expand_dims(np.array(state_memory), axis=0) # (1,m,261) a_probs, v_normalized = RL.forward(s_input) # (1,nA) (1,1) v_normalized = v_normalized[0][0] # (1,1) -> () v = v_normalized * (500 + 600) - 600 plot.emit(v) else: a_probs = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 1]]) a = probs_to_onehot_action(a_probs) # take step new_frame, r, done = env.step(a) # save if len(state_memory) > MEMORY_SIZE - 1: r_array.append(r) v_array.append(np.expand_dims(v_normalized, axis=0)) s_array.append(state_memory) a_array.append(a) # check if done if done: break # print time total_time = time.time() - start_time a_probs = np.array2string( a_probs, formatter={'float_kind': lambda x: "%.2f" % x}) print('time:{:>20} frame:{:>6} a:{}'.format( total_time, counter, a_probs), end='\r') if len(r_array) < 10: continue # if insta done then skip to next frame print('') # when finished env.stop() if len(r_array) < EPISODE_LENGTH: r_array[-2] = -50 v_correct = calc_v(r_array, v_end=v_array[-1]) # create advantage array adv_array = calc_advantage(v_correct, np.array(v_array[:-1])) # shuffle arrays random_index = np.arange(len(v_array[:-1])) np.random.shuffle(random_index) adv_array = np.array(adv_array)[random_index] v_array = np.array(v_array[:-1])[random_index] s_array = np.array(s_array[:-1])[random_index] a_array = np.array(a_array[:-1])[random_index] # split into baches adv_array = split(adv_array, BATCHES) v_array = split(v_array, BATCHES) s_array = split(s_array, BATCHES) a_array = split(a_array, BATCHES) # update weights for i in range(len(a_array)): REPLAY_MEMORY.append( [s_array[i], a_array[i], v_array[i], adv_array[i]]) indexes = np.random.choice(np.arange(len(REPLAY_MEMORY)), len(REPLAY_MEMORY)) for i in indexes: s_array, a_array, v_array, adv_array = REPLAY_MEMORY[i] RL.update(s_array, a_array, v_array, adv_array) print('updating weights {}/{} adv_mean: {}'.format( i, len(REPLAY_MEMORY), np.mean(adv_array)), end='\r') print('') except KeyboardInterrupt: env.stop() sys.exit()
def test_reference(self): env = Environment("map_init.txt", "population_init.txt") p1 = Player(GamePlayId.P1) self.assertEqual(1, 1)
import torch from environment.Environment import Environment from policy.Policy import Policy env = Environment('LunarLander-v2') policy: Policy = Policy(env.observation_space(), env.action_space()) policy.load_state_dict(torch.load('saved_policy/policy.pt')) policy.eval() for episode in range(500): state = env.reset() done = False for time in range(1000): action = policy.select_action(state) state, reward, done, _ = env.step(action) env.render() if done: break env.close()
parser.add_argument('--checkpoint-path', type=str, help="Path to checkpoint", default="") parser.add_argument('--mode', type=str, help='Mode', default="train", choices=["train", "infer"]) args = parser.parse_args() ray.init() config = {} with open(args.agent_config_path) as config_file: config.update(json.load(config_file)) config["explore"] = True if args.mode == "train" else False with open(args.environment_config_path) as config_file: config.update(json.load(config_file)) env = Environment(config) agent = Agent(config) if args.mode == "train": config.update({ "num_gpus": 0, "num_workers": 1, "monitor": False }) tune.run( agent.__class__, name=env.__class__.__name__+"_"+agent.__class__.__name__,
from tqdm import trange from agents.agents import * from environment.Environment import Environment if __name__ == '__main__': fst_agent = GreedyVFuncAgent(1, 0.7, 0.1) snd_agent = BasicVFuncAgent(2, 0.7) for _ in trange(1000): env = Environment() turn = 0 while not env.is_finished(env.field): # First agents turn current_state = env.get_state_description(env.field) possible_actions = env.get_possible_actions(fst_agent.player_num) chosen_action = fst_agent.chose_action(current_state, possible_actions) prev_field, action, new_field, is_done, rewards = env.step( chosen_action, fst_agent.player_num) fst_reward, snd_reward = rewards if is_done: fst_agent.learn(fst_reward) snd_agent.learn(snd_reward) break # Second agents turn current_state = env.get_state_description(env.field)
def setUp(self) -> None: self.parser = ShellParser(CommandFactory(Environment()))