Example #1
0
def main(argv):
    roms_path = "roms/"
    env = Environment("env1", roms_path)

    policy_gradient = PolicyGradientBuilder(100800, 100800, False)

    rl = AgentProxy(env, 100800)
    dqn = DQNFlyweight(agent=rl)
    net = ReinforcementLearning(rl)

    env.start()
    while True:
        move_action = random.randint(0, 8)
        p_move_action = rl.action_space_down_sample(move_action)
        steps_move_action = net.steps_action(p_move_action)
        attack_action = random.randint(0, 9)
        p_attack_action = rl.action_space_down_sample(attack_action)
        steps_attack_action = net.steps_action(p_attack_action)
        #frames, reward, round_done, stage_done, game_done = env.step(move_action, attack_action)
        frames, reward, \
        round_done, stage_done, \
        game_done = policy_gradient.learn(steps_move_action, steps_attack_action)
        if game_done:
            env.new_game()
        elif stage_done:
            env.next_stage()
        elif round_done:
            env.next_round()
Example #2
0
 def __init__(self, index, monitor=None):
     roms_path = "/home/zhangchao/Downloads/"
     self.env = Environment("env{}".format(index), roms_path)
     if monitor:
         self.monitor = monitor
     else:
         self.monitor = None
     self.env.start()
Example #3
0
 def __init__(self, index, monitor=None):
     roms_path = "roms/"
     self.env = Environment("env{}".format(index), roms_path)
     if monitor:
         self.monitor = monitor
     else:
         self.monitor = None
     self.env.start()
Example #4
0
def run_env(worker_id, roms_path):
    env = Environment(f"env{worker_id}", roms_path)
    env.start()
    while True:
        move_action = random.randint(0, 8)
        attack_action = random.randint(0, 9)
        frames, reward, round_done, stage_done, game_done = env.step(
            move_action, attack_action)
        if game_done:
            env.new_game()
        elif stage_done:
            env.next_stage()
        elif round_done:
            env.next_round()
Example #5
0
    def __init__(self, index, difficulty, monitor=None):
        roms_path = "roms/"
        self.env = Environment("env{}".format(index),
                               roms_path,
                               difficulty=difficulty)
        if monitor:
            self.monitor = monitor
        else:
            self.monitor = None
        self.env.start()

        self.action_space = gym.spaces.Discrete(90)
        self.observation_space = gym.spaces.Box(low=0,
                                                high=1,
                                                shape=(3 + self.action_space.n,
                                                       128, 128),
                                                dtype=np.float32)
Example #6
0
    def run(self):
        try:
            logger.info("Starting Worker")
            self.env = Environment(self.env_id, self.roms_path, difficulty=self.difficulty, frame_ratio=self.frameRatio, frames_per_step=self.framesPerStep, throttle=False)
            frames = self.env.start()
            while True:
                self.model.eval()

                observations, histories, frames = self.generate_playthrough(frames)

                self.model.train()

                dataset = wu.compileHistories(observations, histories)
                wu.train(self.model, self.optim, self.criterion, dataset)

        except Exception as identifier:
            logger.error(identifier)
            logger.error(traceback.format_exc())
Example #7
0
def run(worker_no, roms_path, learning_rate, cluster, data_bins, stats,
        saves_path, save_frequency):
    name = "worker%d" % worker_no

    with tf.device(
            tf.train.replica_device_setter(
                worker_device="/job:worker/task:%d" % worker_no,
                cluster=cluster)):
        Model("global", learning_rate)

    saver = tf.train.Saver(var_list=tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope="global"))

    local_model = Model(name, learning_rate)

    update_local_ops = update_target_graph('global', name)

    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(gpu_options=gpu_options)
    server = tf.train.Server(cluster,
                             job_name="worker",
                             task_index=worker_no,
                             config=config)
    with tf.train.MonitoredTrainingSession(master=server.target) as sess:
        try:
            if stats.get_episode() != 0:
                saver.restore(sess._sess._sess._sess._sess,
                              f'{saves_path}/model.ckpt')

            sess.run(update_local_ops)
            print("Started Worker Updates")
            env = Environment(name, roms_path, difficulty=3)
            frames = env.start()

            while True:
                history = {
                    "observation": [],
                    "move_action": [],
                    "attack_action": [],
                    "reward": []
                }
                game_done = False
                total_reward = 0

                while not game_done:

                    observation = wu.prepro(frames)

                    history["observation"].append(observation)

                    move_out, attack_out = sess.run(
                        [local_model.move_out_sym, local_model.attack_out_sym],
                        feed_dict={local_model.observation_sym: observation})

                    move_action_hot = wu.choose_action(move_out)
                    attack_action_hot = wu.choose_action(attack_out)

                    history["move_action"].append(move_action_hot)
                    history["attack_action"].append(attack_action_hot)

                    frames, r, round_done, stage_done, game_done = env.step(
                        np.argmax(move_action_hot),
                        np.argmax(attack_action_hot))
                    total_reward += r["P1"]

                    history["reward"].append(r["P1"])

                    if round_done:
                        wu.store_history(data_bins, worker_no, history)
                        history = {
                            "observation": [],
                            "move_action": [],
                            "attack_action": [],
                            "reward": []
                        }
                        if game_done:
                            wu.train(sess, local_model,
                                     *data_bins.empty_bin(worker_no))
                            sess.run(update_local_ops)
                            stats.update({
                                "score": total_reward,
                                "stage": env.stage
                            })
                            if stats.get_episode() > 0 and stats.get_episode(
                            ) % save_frequency == 0:
                                saver.save(sess._sess._sess._sess._sess,
                                           f'{saves_path}/model.ckpt')
                        frames = env.reset()
        except:
            error = traceback.format_exc()
            print(error)
            logging.error(error)
            exit(1)
import random
import time
from MAMEToolkit.sf_environment import Environment

roms_path = 'roms/'  # Replace this with the path to your ROMs
env = Environment('env1', roms_path)
print('[test] Loaded SF3 from ROM file')

# https://github.com/M-J-Murray/MAMEToolkit/blob/3041734391292376aa909938ea5b51030e3c0240/MAMEToolkit/sf_environment/Environment.py#L88
print('[test] Wait until learnable gameplay starts...')
frames = env.start()
print('[test] Start!')

env.close()
print('[test] Your installation is complete!')
Example #9
0
import random
from MAMEToolkit.sf_environment import Environment

roms_path = 'rom/'

env = Environment("sfiii3n", roms_path)

env.start()
while True:
    move_action = random.randint(0, 8)
    attack_action = random.randint(0, 9)
    frames, reward, round_done, stage_done, game_done = env.step(
        move_action, attack_action)
    if game_done:
        env.new_game()
    elif stage_done:
        env.next_stage()
    elif round_done:
        env.next_round()
Example #10
0
        move_action = random.randint(0, 8)
        attack_action = random.randint(0, 9)
        frames, reward, round_done, stage_done, game_done = env.step(
            move_action, attack_action)
        if game_done:
            env.new_game()
        elif stage_done:
            env.next_stage()
        elif round_done:
            env.next_round()


if __name__ == '__main__':
    # Environments must be created outside of the threads
    #threads = [Process(target=run_env, args=(i, roms_path)) for i in range(workers)]
    #[thread.start() for thread in threads]

    env = Environment("env1", roms_path)
    env.start()
    while True:
        move_action = random.randint(0, 8)
        attack_action = random.randint(0, 9)
        frames, reward, round_done, stage_done, game_done = env.step(
            move_action, attack_action)
        if game_done:
            env.new_game()
        elif stage_done:
            env.next_stage()
        elif round_done:
            env.next_round()
Example #11
0
    frame = frame[32:214, 12:372]  # crop
    frame = frame[::3, ::3]
    if isGrey:
        frame = 0.2989 * frame[:, :,
                               0] + 0.5870 * frame[:, :,
                                                   1] + 0.1140 * frame[:, :,
                                                                       2]  # greyscale
    return frame


isGrey = True

roms_path = "../roms/"  # Replace this with the path to your ROMs
env = Environment("env1",
                  roms_path,
                  frames_per_step=1,
                  frame_ratio=3,
                  throttle=False)

fig = plt.figure()
plt.ion()
im: AxesImage = plt.imshow(prepro(env.start(), isGrey),
                           cmap="gray" if isGrey else None)
plt.axis("off")
plt.show()
while True:
    move_action = random.randint(0, 8)
    attack_action = random.randint(0, 9)
    frame, reward, round_done, stage_done, game_done = env.step(
        move_action, attack_action)
    im.set_data(prepro(frame, isGrey))