Beispiel #1
0
def run_async_torille(tick_counter, quit_flag, match_frames, turn_frames):
    # Runs Toribash and increments the Value tick_counter on every frame
    controller = ToribashControl()
    controller.settings.set("matchframes", match_frames)
    controller.settings.set("turnframes", turn_frames)
    controller.settings.set("engagement_distance", 1500)
    controller.init()
    while quit_flag.value == 0:
        s, terminal = controller.get_state()
        if terminal:
            s = controller.reset()
        actions = create_random_actions()
        controller.make_actions(actions)
        # Others are not writing here, so no need for locks
        tick_counter.value += 1
    controller.close()
Beispiel #2
0
#
#  controller_example.py
#  A minimalistic example running ToribashController
#
#  Author: Anssi "Miffyli" Kanervisto, 2018
from torille import ToribashControl
import random as r

# How many games will be played
NUM_EPISODES = 5

# If game should be rendered or not
DRAW_GAME = True

# Create ToribashController. This won't launch the game yet
controller = ToribashControl(draw_game=DRAW_GAME)

# Set some settings. You can find more info on these from Toribash forums / game
# How long one game is
controller.settings.set("matchframes", 1000)
# How many frames one action will be repeated for
# AKA "frame skip", "action repeat"
controller.settings.set("turnframes", 2)
# How far two players will spawn
controller.settings.set("engagement_distance", 1000)
# Record replay file of the game for later playback
# by setting this to something else than "None"/None.
# This will record replay file under [toribash directory]/replay
# at the end of the episode.
# Note: Remember to change this setting between episodes!
#       Otherwise your replays will be overwritten!
Beispiel #3
0
def watch_actions(actions, pause=1):
    """
     Watch the set of actions chosen by builder. Note, this does not play the action 
     within any temporal context, so it could be a decent action but hard to visualize as 
     the first action. 

     Args:
        actions (dict): a dictionary for the actions. See action_space for methods to produce these
        actions
        pause (float): how long to pause the action
    """

    try:
        controller = ToribashControl(draw_game=True)
        controller.init()
        _, t = controller.get_state()
        for i in actions.keys():
            act = actions[i]
            action = [act, [3] * constants.NUM_CONTROLLABLES]
            action[1][-2] = 1
            action[1][-1] = 1
            controller.make_actions(action)
            _, t = controller.get_state()
            sleep(pause)
            controller.finish_game()
            if (controller.requires_reset):
                _ = controller.reset()
        controller.close()
    except RuntimeWarning as inst:
        print("manual controller shutdown")
        controller.close()
        print("error: {}".format(inst))
Beispiel #4
0
def train(batch_size, num_steps, report_every_episodes, reward_function,
          logfile, save_every_trains, save_file, num_frames):
    """
    Train A2C on Toribash tasks.
    """

    controller = ToribashControl()

    # Even for destroy-uke, to make it comparable with
    # experiments run previously
    num_players = 1

    controller.settings.set("matchframes", 1000)
    controller.settings.set("turnframes", 5)
    if reward_function != reward_destroy_uke:
        controller.settings.set("engagement_distance", 1500)

    num_joints = controller.get_num_joints()
    num_joint_states = controller.get_num_joint_states()
    num_inputs = controller.get_state_dim() * num_players

    a2c = ToribashA2C(
        num_inputs * num_frames,
        num_joints,
        num_joint_states,
    )

    train_op_ctr = 0
    step_ctr = 0
    episode_ctr = 0
    batch_states = []
    batch_stateprimes = []
    batch_actions = []
    batch_rewards = []

    last_s = None
    last_a = None
    last_r = None
    injury = None
    last_orig_s = None
    pi_losses = []
    v_losses = []
    h_losses = []
    vs = []
    # List of lists, one for each episode
    last_episode_rewards = []
    episode_reward = 0
    stacker = deque([np.zeros(num_inputs, ) for i in range(num_frames)],
                    maxlen=num_frames)

    controller.init()

    print_and_log("--- Training starts ---", logfile)
    start_time = time()

    while step_ctr < num_steps:
        orig_s, terminal = controller.get_state()
        s = get_refined_state(orig_s, num_players)
        # Create "correct" state right away
        # (correct = numpy array and has successive frames stacked)
        stacker.append(s)
        s = np.concatenate(stacker)

        # Add to batch if have last states and actions etc
        if last_s is not None and last_a is not None and last_r is not None:
            batch_states.append(last_s)
            batch_stateprimes.append(s)
            batch_actions.append(last_a)
            batch_rewards.append(last_r)
            if len(batch_states) == batch_size:
                states = np.array(batch_states)
                stateprimes = np.array(batch_stateprimes)
                # We need to substract by one for training...
                actions = np.array(batch_actions)
                actions -= 1
                returns = np.array(batch_rewards)
                losses = a2c.train_on_batch(states, stateprimes, actions,
                                            returns)
                pi_losses.append(losses[0])
                v_losses.append(losses[1])
                h_losses.append(losses[2])
                train_op_ctr += 1
                batch_states.clear()
                batch_stateprimes.clear()
                batch_actions.clear()
                batch_rewards.clear()
        step_ctr += 1

        if terminal:
            last_s = None
            last_a = None
            last_r = None
            last_orig_s = None
            # Reset stacker by putting zeros in
            stacker = deque(
                [np.zeros(num_inputs, ) for i in range(num_frames)],
                maxlen=num_frames)
            # Reset Toribash
            orig_s = controller.reset()
            episode_ctr += 1
            # Store episode reward
            last_episode_rewards.append(episode_reward)
            episode_reward = 0
            s = get_refined_state(orig_s, num_players)
            stacker.append(s)
            s = np.concatenate(stacker)

            # Print results every X episodes
            if (episode_ctr % report_every_episodes) == 0:
                print_and_log(
                    ("Steps: %d\tTime: %d\tPloss: %.4f\tVloss: %.4f\t" +
                     "Hloss: %.4f" +
                     "\tAvrgR: %.4f\t;MaxR: %.4f\tMinR: %.4f\tAvrgV: %.4f") %
                    (step_ctr, int(time() - start_time), sum(pi_losses) /
                     len(pi_losses), sum(v_losses) / len(v_losses),
                     sum(h_losses) / len(h_losses), sum(last_episode_rewards) /
                     len(last_episode_rewards), max(last_episode_rewards),
                     min(last_episode_rewards), sum(vs) / len(vs)), logfile)
                pi_losses.clear()
                v_losses.clear()
                h_losses.clear()
                last_episode_rewards.clear()
                vs.clear()

        # Get policy and value estimation for state
        pi, v = a2c.predict_pi_and_v(np.expand_dims(s, 0))
        pi = pi[0]
        v = v[0]
        vs.append(v)

        # Select joint states according to policy
        a = []
        for probs in pi:
            a.append(np.random.choice(num_joint_states, p=probs) + 1)

        # Add in player1's actions (needed. Set all to "hold", excepts hands)
        action = [a, [3 for i in range(num_joints)]]

        controller.make_actions(action)

        last_s = s
        last_a = action[0]

        if last_orig_s is not None:
            last_r = reward_function(last_orig_s, orig_s)
        else:
            last_r = 0
        episode_reward += last_r
        last_orig_s = orig_s

        if ((train_op_ctr + 1) % save_every_trains) == 0:
            a2c.save(save_file)
Beispiel #5
0
#  A minimalistic example of going through an
#  existing replay file
#
#  Author: Anssi "Miffyli" Kanervisto, 2019
from torille import ToribashControl

# Path to the replay file inside Toribash's "replay" folder.
# Make sure this file exists!
# You can find bunch of cool replays along with Toribash installation
REPLAY_FILE = "0headkick.rpl"

# You can enjoy the replay file being played
DRAW_GAME = False

# Create ToribashController. This won't launch the game yet
controller = ToribashControl(draw_game=DRAW_GAME)
controller.init()

# `init()` begins a new game, and we need to reach end
# of the episode to be able to start with a replay
_ = controller.get_state()
controller.finish_game()

# Read the replay file and go through it.
# We will end up with list of ToribashStates
states = controller.read_replay(REPLAY_FILE)

print("Obtained %d ToribashStates" % len(states))

# Close the environment
controller.close()
Beispiel #6
0
 and can be played. Will list the number of replays and also play 
 a random replay found in the folder. To play specific replays see
 utils.viz_tools.visuals.watch_replay
"""

if __name__ == "__main__":
    print("Checking for replays")
    path = os.path.dirname(torille.__file__)
    replays_path = os.path.join(path, 'toribash/replay')
    replays = [f for f in os.listdir(replays_path) if '.rpl' in f]
    if len(replays) == 0:
        raise ValueError("There does not seem to be any replays at {} \n Make sure to download replays from data/ or from {}".format(
    replays_path, 'https://forum.toribash.com/forumdisplay.php?f=10.'
    ))
    else:
        print("Found {} replays".format(len(replays)))
        print("Playing random replay")
        sample = random.sample(replays, 1)[0]
        try:
            controller = ToribashControl(draw_game=True)
            controller.init()
            controller.finish_game()
            states = controller.read_replay(sample)
            controller.close()
        except RuntimeError as inst:
            print("Manual controller shutdown. This could be caused by a variety " + 
            "of issues. Please check torille install.")
            controller.close()
            print("error: {}".format(inst))

Beispiel #7
0
    def watch_replay(self):
        """
         Plays the currently selected match in a Toribash game window
        """
        print("Playing Match: {}".format(self.match))

        try:
            controller = ToribashControl(draw_game=True)
            controller.init()
            controller.finish_game()
            states = controller.read_replay(self.match[0])
            controller.close()
        except RuntimeError as inst:
            print("manual controller shutdown")
            controller.close()
            print("error: {}".format(inst))
Beispiel #8
0
def _load_replay(REPLAY_FILE):
    """
     Loading single replay
    """

    # Closes the controller even if there is a error somewhere in the state recording process
    try:
        controller = ToribashControl(draw_game=False)  # create controller
        controller.init()  # init
        controller.finish_game(
        )  # can't start controller with a replay, has to load it in after at least one game
        states = controller.read_replay(
            REPLAY_FILE)  # play and record replay file
        controller.close()  # close controller
        return states
    except RuntimeError as err:
        print(
            "There was a runtime error causing the program to stop and potentially not close the game window"
        )
        print("Closing game window")
        controller.close()
        print(err)