コード例 #1
0
 def test2_greedy(self):
     env = Environment("map_init.txt", "population_init.txt")
     state = EnvState(env, None, None, GamePlayId.P1)
     player1 = Greedy(GamePlayId.P1)
     player2 = PacifistAgent(GamePlayId.P2)
     player1.search(state, player2)
     print(player1.writeout_path())
コード例 #2
0
 def test3_Astar(self):
     env = Environment("map_init.txt", "population_init.txt")
     state = EnvState(env, None, None, GamePlayId.P1)
     player1 = AStar(GamePlayId.P2)
     player2 = PassiveAgent(GamePlayId.P1)
     res = player1.search(state, player2)
     print(player1.writeout_path())
コード例 #3
0
 def test_invade_1(self):
     env = Environment("map_init.txt", "population_init.txt")
     p2 = Human(GamePlayId.P2)
     p2.invade(env, 2, 3, 6)
     self.assertEqual(env.country_list[1].troops_count, 1)
     self.assertEqual(env.country_list[2].troops_count, 1)
     self.assertEqual(env.country_list[1].owner_id , GamePlayId.P2)
コード例 #4
0
 def test_invade_2(self):
     env = Environment("map_init.txt", "population_init.txt")
     p2 = Human(GamePlayId.P2)
     try:
         p2.invade(env, 2, 4, 1)
     except Exception as error:
         self.assertEqual(str(error), "Can't invade your own country")
コード例 #5
0
 def test_invade_3(self):
     env = Environment("map_init.txt", "population_init.txt")
     try:
         env.invade(GamePlayId.P2, 1, 4, 1)
     except Exception as error:
         self.assertEqual(
             str(error), "Can't invade no route from ( " + str(1) +
             " ) to ( " + str(4) + " )")
コード例 #6
0
 def test_march_5(self):
     env = Environment("map_init.txt", "population_init.txt")
     try:
         env.march_troops(GamePlayId.P1, 1, 5, 1)
         self.fail()
     except Exception as error:
         self.assertEqual(
             str(error), "Can't march no route from ( " + str(1) +
             " ) to ( " + str(5) + " )")
コード例 #7
0
 def test_march_4(self):
     env = Environment("map_init.txt", "population_init.txt")
     try:
         env.march_troops(GamePlayId.P1, 1, 3, 13)
         self.fail()
     except Exception as error:
         self.assertEqual(
             str(error), "Not enough troops to march from ( " + str(1) +
             " ) to ( " + str(3) + " ) troops are in country are " + str(7))
コード例 #8
0
ファイル: Shell.py プロジェクト: zuevmaxim/CLI
 def __init__(self,
              input_stream: TextIOBase,
              output_stream: TextIOBase,
              debug=False):
     self.env = Environment()
     extend_environment(self.env)
     self.handlers_net = HandlersNet(self.env, input_stream, output_stream)
     if debug:
         enable_debug_logging()
コード例 #9
0
 def test_copy(self):
     env = Environment("map_init.txt", "population_init.txt")
     env_c = copy.deepcopy(env)
     env_c.country_list[0].owner_id = None
     env_c.border_list[0].country1 = None
     env_c.continent_list[0].owner_id = None
     self.assertNotEqual(env.country_list[0].owner_id, None)
     self.assertNotEqual(env.border_list[0].country1, None)
     self.assertNotEqual(env.continent_list[0].owner_id, None)
コード例 #10
0
 def test1_RTAstar(self):
     env = Environment("map_init.txt", "population_init.txt")
     state = EnvState(env, None, None, GamePlayId.P1)
     player1 = RTAStar(GamePlayId.P1)
     state = player1.search(state, MoveType.DEPLOY)
     print(state.env.change, state)
     state = player1.search(state, MoveType.MARCH)
     print(state.env.change, state)
     state = player1.search(state, MoveType.INVADE)
     print(state.env.change, state)
コード例 #11
0
 def test_march_3(self):
     env = Environment("map_init.txt", "population_init.txt")
     p1 = Human(GamePlayId.P1)
     try:
         p1.march_troops(env, 1, 4, 1)
         self.fail()
     except Exception as error:
         self.assertEqual(str(error),
                          "Can't march troops to unowned country : country owner ("
                          + str(GamePlayId.P2) + ") ,"
                          + "player (" + str(GamePlayId.P1) + ")")
コード例 #12
0
 def test_march_2(self):
     env = Environment("map_init.txt", "population_init.txt")
     try:
         env.march_troops(GamePlayId.P1, 2, 3, 1)
         self.fail()
     except Exception as error:
         self.assertEqual(
             str(error),
             "Can't march troops from unowned country : country owner (" +
             str(GamePlayId.P2) + ") ," + "player (" + str(GamePlayId.P1) +
             ")")
コード例 #13
0
 def test_deploy_1(self):
     env = Environment("map_init.txt", "population_init.txt")
     p1 = Human(GamePlayId.P1)
     p2 = Human(GamePlayId.P2)
     p1.deploy_reserve_troops(env, 1, 1)
     try:
         p2.deploy_reserve_troops(env, 1, 1)
         self.fail()
     except Exception as error:
         self.assertEqual(str(error), "Can't deploy troops to unowned country : country owner ("
                          + str(GamePlayId.P1) + ") ,"
                          + "player " + str(GamePlayId.P2) + ")")
コード例 #14
0
 def test_deploy_1(self):
     env = Environment("map_init.txt", "population_init.txt")
     env.reserve_1 = 1
     env.deploy_reserve_troops(GamePlayId.P1, 1)
     try:
         env.deploy_reserve_troops(GamePlayId.NONE, 1)
         self.fail()
     except Exception as error:
         self.assertEqual(
             str(error),
             "Can't deploy troops to unowned country : country owner (" +
             str(GamePlayId.P1) + ") ," + "player " + str(GamePlayId.NONE) +
             ")")
コード例 #15
0
 def __init__(self, player1_type, player2_type):
     self.player1 = self.get_player(player1_type, GamePlayId.P1)
     self.player2 = self.get_player(player2_type, GamePlayId.P2)
     self.env = Environment("/server_files/map_init.txt",
                            "/server_files/population_init.txt")
     self.env.reserve_1 = 2
     self.env.reserve_2 = 2
     self.turn = True
     self.state = EnvState(self.env, None, None, GamePlayId.P1)
     self.history = []
     self.history_counter = 0
     if player1_type == "astar":
         self.history = self.player1.search(self.state, self.player2)
     elif player1_type == "greedy":
         self.history = self.player1.search(self.state, self.player2)
     elif player2_type == "astar":
         self.history = self.player2.search(self.state, self.player1)
     elif player2_type == "greedy":
         self.history = self.player2.search(self.state, self.player1)
コード例 #16
0
class ShellTransformerTest(unittest.TestCase):
    command_factory = CommandFactory(Environment())

    def setUp(self) -> None:
        self.parser = ShellParser(ShellTransformerTest.command_factory)

    def parse(self, string):
        return self.parser.parse(string)

    def testEmpty(self):
        result = self.parse('')
        self.assertEqual(0, len(result))

    def testEcho(self):
        result = self.parse('echo 123 | echo "hey" | echo 7')
        self.assertEqual(3, len(result))
        args = ['123', 'hey', '7']
        for command, arg in zip(result, args):
            self.assertTrue(isinstance(command, EchoCommand))
            self.assertEqual([arg], command.args)

    def testExit(self):
        result = self.parse('exit')
        self.assertEqual(1, len(result))
        command = result[0]
        self.assertTrue(isinstance(command, ExitCommand))

    @parameterized.expand([
        ('echo', 'echo 123', EchoCommand, ['123']),
        ('exit', 'exit', ExitCommand, []),
        ('cat', 'cat main.py', CatCommand, ['main.py']),
        ('custom command', '/bin/sh main.sh', CustomCommand, ['/bin/sh', 'main.sh']),
        ('equality', 'x = 3', AssignmentCommand, ['x', '3']),
        ('pwd', 'pwd', PwdCommand, []),
        ('wc', 'wc main.py', WcCommand, ['main.py']),
    ])
    def test(self, _, string, command_type, args):
        result = self.parse(string)
        self.assertEqual(1, len(result))
        command = result[0]
        self.assertTrue(isinstance(command, command_type))
        self.assertEqual(args, command.args)
コード例 #17
0
 def test_deploy_2(self):
     env = Environment("map_init.txt", "population_init.txt")
     p1 = Human(GamePlayId.P1)
     p1.deploy_reserve_troops(env, 1, 1)
     self.assertEqual(env.country_list[0].troops_count, 4)
コード例 #18
0
 def test_init(self):
     env = Environment("map_init.txt", "population_init.txt")
     for i, continent in enumerate(env.continent_list):
         for country_id in continent.country_list:
             self.assertEqual(env.country_list[country_id - 1].continent_id,
                              i + 1)
def optimize_joint(system_nn, pol_nn, log_writer, **kwargs):
    # unpack kwargs
    horizon = kwargs.get("horizon")
    nb_iterations = kwargs.get("nb_iterations")
    batch_size = kwargs.get("batch_size")

    policy_fit = kwargs.get("policy", False)
    system_fit = kwargs.get("system", False)

    mc_samples = kwargs.get("mc_samples", 128)

    env = Environment(system_nn)
    agent = Agent(pol_nn, env, horizon)

    # Optimizers
    parameters_list = []
    if policy_fit:
        parameters_list = parameters_list + list(pol_nn.parameters())

    if system_fit:
        parameters_list = parameters_list + list(system_nn.parameters())
    if parameters_list:
        lr = kwargs.get("learning_rate", .001)
        optimizer = Adam(parameters_list, lr=lr)

        for it in range(nb_iterations):
            loss = {}
            params = {}

            # set gradient to zero
            optimizer.zero_grad()

            # generate the batch
            _, states_batch, dist_batch, _, oha_batch, rew_batch = agent.sample_trajectory(
                batch_size)

            # Loss #
            system_loss = system_error(system_nn, pol_nn, states_batch,
                                       dist_batch, oha_batch, rew_batch)

            system_loss.backward(retain_graph=policy_fit)

            optimizer.step()
            system_nn.project_parameters()
            pol_nn.project_parameters()

            if system_fit and log_writer is not None:
                params['system'] = system_nn.unwrapped.named_parameters()
                log_writer.add_system_parameters(system_nn.parameters_dict(),
                                                 step=it)

            if policy_fit and log_writer is not None:
                params['policy'] = pol_nn.named_parameters()
                actions = pol_nn(
                    states_batch)  # (B, H, A), need to stack along the B dim
                log_writer.add_policy_histograms(actions.view(
                    -1, actions.shape[2]),
                                                 step=it)

            if log_writer is not None:
                loss['loss'] = system_loss.item()

                log_writer.add_grad_histograms(params, step=it)
                log_writer.add_loss(loss, step=it)

                # performance of the agent on the epoch
                ep_perf, return_estimate = agent.avg_performance(mc_samples)
                log_writer.add_expected_return(ep_perf, step=it)
                log_writer.add_return(return_estimate, step=it)

    return env, agent
コード例 #20
0
ファイル: test_Substitution.py プロジェクト: zuevmaxim/CLI
 def setUp(self) -> None:
     self.parser = SubstitutionParser()
     self.env = Environment()
コード例 #21
0
ファイル: test_Environment.py プロジェクト: zuevmaxim/CLI
 def setUp(self) -> None:
     self.env = Environment()
コード例 #22
0
ファイル: CommandTest.py プロジェクト: zuevmaxim/CLI
 def setUp(self) -> None:
     self.environment = Environment()
     self.input_stream = io.StringIO()
     self.output_stream = io.StringIO()
コード例 #23
0
ファイル: test_agent.py プロジェクト: doas3140/limbo-ai
def main():
    RECORD_GIF = True
    V_LEN = 500  # record last 500 frames
    V_N = 3  # record v of every n'th frame
    MEMORY_SIZE = 3
    SCREEN_REGION = (3, 33, 1024, 606)
    # RECORD_REGION = (3,33,1024,606) # record for gif
    RECORD_REGION = (3, 33, 1624, 606)
    try:
        OD_init = os.path.join(os.getcwd(), 'logdir', 'OD')
        AE_init = os.path.join(os.getcwd(), 'logdir', 'AE', '0.0001_64',
                               'saved_model', 'model.ckpt')
        SL_init = os.path.join(os.getcwd(), 'logdir', 'SL',
                               'EXPERT_0.001_512_3', 'saved_model',
                               'model.ckpt')
        OD = OD_network(init_folder_path=OD_init)
        AE = AE_network(init_model_path=AE_init, testing=True)
        SL = SL_network(init_model_path=SL_init,
                        testing=True,
                        memory_size=MEMORY_SIZE)

        GIF = []
        GIF_N = 4  # every N frames record
        GIF_K = 3  # times lower resolution
        GIF_counter = 0

        plot = LivePlot(MAXLEN=V_LEN, N=V_N)

        state_memory = deque(maxlen=MEMORY_SIZE)
        env = Environment(SCREEN_REGION)
        new_frame, done = env.reset()
        paused = True
        while True:
            if not paused:
                GIF_counter += 1
                start_time = time.time()
                if RECORD_GIF:
                    if GIF_counter > GIF_N - 1:
                        GIF_counter = 0
                        r_screen = grab_screen(RECORD_REGION)
                        new_h, new_w = int(r_screen.shape[0] / GIF_K), int(
                            r_screen.shape[1] / GIF_K)
                        GIF.append(cv2.resize(r_screen, (new_w, new_h)))
                        # GIF.append(r_screen)
                # run OD and AE nn's
                objects, features = OD.forward(new_frame)
                encoded_features = AE.forward(features)
                s = concat_outputs(objects, encoded_features, OD)
                state_memory.append(s[0])
                if len(state_memory) > MEMORY_SIZE - 1:
                    s_input = np.expand_dims(np.array(state_memory),
                                             axis=0)  # (1,m,261)
                    a_probs, v_normalized = SL.forward(s_input)  # (1,nA) (1,1)
                    v = v_normalized * (500 + 600) - 600
                    plot.emit(v[0][0])  # (1,1) -> (,)
                    a = probs_to_onehot_action(a_probs)
                else:
                    a = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0,
                                  1])  # 'nothing' action
                new_frame, r, done = env.step(a)
                if len(state_memory) > MEMORY_SIZE - 1:
                    total_time = time.time() - start_time
                    a_probs = np.array2string(
                        a_probs,
                        formatter={'float_kind': lambda x: "%.2f" % x})
                    print('time:{:>20} a:{}'.format(total_time, a_probs),
                          end='\r')
            else:
                time.sleep(0.3)
                print('paused')
            paused, GIF = check_pause(paused, GIF, RECORD_GIF)
    except KeyboardInterrupt:
        env.stop()
        sys.exit()
コード例 #24
0
 def test_march_1(self):
     env = Environment("map_init.txt", "population_init.txt")
     p1 = Human(GamePlayId.P1)
     p1.march_troops(env, 1, 3, 1)
     self.assertEqual(env.country_list[0].troops_count, 2)
     self.assertEqual(env.country_list[2].troops_count, 6)
コード例 #25
0
def main():
    SUMM_FOLDER_NAME = 'original'
    PLOT_LEN = 500
    PLOT_N = 3  # skip N frames
    MEMORY_SIZE = 3
    SCREEN_REGION = (3, 33, 1024, 606)
    BATCHES = 64
    EPISODE_LENGTH = 20
    REPLAY_MEMORY_SIZE = 100  # total=this*episode_length
    try:
        plot = LivePlot(MAXLEN=PLOT_LEN, N=PLOT_N)
        OD_init = os.path.join(os.getcwd(), 'logdir', 'OD')
        AE_init = os.path.join(os.getcwd(), 'logdir', 'AE', '0.0001_64',
                               'saved_model', 'model.ckpt')
        RL_init = None
        RL_LOGDIR = os.path.join(os.getcwd(), 'logdir', 'RL', SUMM_FOLDER_NAME)
        OD = OD_network(init_folder_path=OD_init)
        AE = AE_network(init_model_path=AE_init, testing=True)
        RL = RL_network(LOGDIR=RL_LOGDIR,
                        memory_size=MEMORY_SIZE,
                        init_model_path=RL_init)
        env = Environment(SCREEN_REGION)
        REPLAY_MEMORY = deque(maxlen=REPLAY_MEMORY_SIZE)

        while True:
            new_frame, done = env.reset()
            state_memory = deque(maxlen=MEMORY_SIZE)
            r_array = []  # (n,)
            v_array = []  # (n,)
            s_array = []  # (n,[5,261])
            a_array = []  # (n,[10,])
            start_time = time.time()
            for counter in range(EPISODE_LENGTH + MEMORY_SIZE +
                                 1):  # +1 for last v
                # calculate current s
                objects, features = OD.forward(new_frame)
                encoded_features = AE.forward(features)
                s = concat_outputs(objects, encoded_features, OD)  # (1,261)
                s = s[0]  # (261)
                state_memory.append(s)
                # calc next a
                if len(state_memory) > MEMORY_SIZE - 1:
                    s_input = np.expand_dims(np.array(state_memory),
                                             axis=0)  # (1,m,261)
                    a_probs, v_normalized = RL.forward(s_input)  # (1,nA) (1,1)
                    v_normalized = v_normalized[0][0]  # (1,1) -> ()
                    v = v_normalized * (500 + 600) - 600
                    plot.emit(v)
                else:
                    a_probs = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])
                a = probs_to_onehot_action(a_probs)
                # take step
                new_frame, r, done = env.step(a)
                # save
                if len(state_memory) > MEMORY_SIZE - 1:
                    r_array.append(r)
                    v_array.append(np.expand_dims(v_normalized, axis=0))
                    s_array.append(state_memory)
                    a_array.append(a)
                # check if done
                if done:
                    break
                # print time
                total_time = time.time() - start_time
                a_probs = np.array2string(
                    a_probs, formatter={'float_kind': lambda x: "%.2f" % x})
                print('time:{:>20} frame:{:>6} a:{}'.format(
                    total_time, counter, a_probs),
                      end='\r')
            if len(r_array) < 10:
                continue  # if insta done then skip to next frame
            print('')  # when finished
            env.stop()
            if len(r_array) < EPISODE_LENGTH:
                r_array[-2] = -50
            v_correct = calc_v(r_array, v_end=v_array[-1])
            # create advantage array
            adv_array = calc_advantage(v_correct, np.array(v_array[:-1]))
            # shuffle arrays
            random_index = np.arange(len(v_array[:-1]))
            np.random.shuffle(random_index)
            adv_array = np.array(adv_array)[random_index]
            v_array = np.array(v_array[:-1])[random_index]
            s_array = np.array(s_array[:-1])[random_index]
            a_array = np.array(a_array[:-1])[random_index]
            # split into baches
            adv_array = split(adv_array, BATCHES)
            v_array = split(v_array, BATCHES)
            s_array = split(s_array, BATCHES)
            a_array = split(a_array, BATCHES)
            # update weights
            for i in range(len(a_array)):
                REPLAY_MEMORY.append(
                    [s_array[i], a_array[i], v_array[i], adv_array[i]])

            indexes = np.random.choice(np.arange(len(REPLAY_MEMORY)),
                                       len(REPLAY_MEMORY))
            for i in indexes:
                s_array, a_array, v_array, adv_array = REPLAY_MEMORY[i]
                RL.update(s_array, a_array, v_array, adv_array)
                print('updating weights {}/{} adv_mean: {}'.format(
                    i, len(REPLAY_MEMORY), np.mean(adv_array)),
                      end='\r')
            print('')

    except KeyboardInterrupt:
        env.stop()
        sys.exit()
コード例 #26
0
 def test_reference(self):
     env = Environment("map_init.txt", "population_init.txt")
     p1 = Player(GamePlayId.P1)
     self.assertEqual(1, 1)
コード例 #27
0
import torch

from environment.Environment import Environment
from policy.Policy import Policy

env = Environment('LunarLander-v2')

policy: Policy = Policy(env.observation_space(), env.action_space())
policy.load_state_dict(torch.load('saved_policy/policy.pt'))
policy.eval()

for episode in range(500):
    state = env.reset()
    done = False

    for time in range(1000):
        action = policy.select_action(state)
        state, reward, done, _ = env.step(action)
        env.render()

        if done:
            break

    env.close()
コード例 #28
0
ファイル: main.py プロジェクト: dorianb/RL_toolkit
parser.add_argument('--checkpoint-path', type=str, help="Path to checkpoint", default="")
parser.add_argument('--mode', type=str, help='Mode', default="train", choices=["train", "infer"])
args = parser.parse_args()

ray.init()

config = {}

with open(args.agent_config_path) as config_file:
    config.update(json.load(config_file))
    config["explore"] = True if args.mode == "train" else False

with open(args.environment_config_path) as config_file:
    config.update(json.load(config_file))

env = Environment(config)
agent = Agent(config)

if args.mode == "train":

    config.update({

        "num_gpus": 0,
        "num_workers": 1,

        "monitor": False
    })

    tune.run(
        agent.__class__,
        name=env.__class__.__name__+"_"+agent.__class__.__name__,
コード例 #29
0
ファイル: main.py プロジェクト: NonameUntitled/rl-tic-tac-toe
from tqdm import trange

from agents.agents import *
from environment.Environment import Environment

if __name__ == '__main__':
    fst_agent = GreedyVFuncAgent(1, 0.7, 0.1)
    snd_agent = BasicVFuncAgent(2, 0.7)

    for _ in trange(1000):
        env = Environment()

        turn = 0
        while not env.is_finished(env.field):
            # First agents turn
            current_state = env.get_state_description(env.field)
            possible_actions = env.get_possible_actions(fst_agent.player_num)
            chosen_action = fst_agent.chose_action(current_state,
                                                   possible_actions)

            prev_field, action, new_field, is_done, rewards = env.step(
                chosen_action, fst_agent.player_num)
            fst_reward, snd_reward = rewards

            if is_done:
                fst_agent.learn(fst_reward)
                snd_agent.learn(snd_reward)
                break

            # Second agents turn
            current_state = env.get_state_description(env.field)
コード例 #30
0
 def setUp(self) -> None:
     self.parser = ShellParser(CommandFactory(Environment()))