Ejemplo n.º 1
0
def main(argv):
    roms_path = "roms/"
    env = Environment("env1", roms_path)

    policy_gradient = PolicyGradientBuilder(100800, 100800, False)

    rl = AgentProxy(env, 100800)
    dqn = DQNFlyweight(agent=rl)
    net = ReinforcementLearning(rl)

    env.start()
    while True:
        move_action = random.randint(0, 8)
        p_move_action = rl.action_space_down_sample(move_action)
        steps_move_action = net.steps_action(p_move_action)
        attack_action = random.randint(0, 9)
        p_attack_action = rl.action_space_down_sample(attack_action)
        steps_attack_action = net.steps_action(p_attack_action)
        #frames, reward, round_done, stage_done, game_done = env.step(move_action, attack_action)
        frames, reward, \
        round_done, stage_done, \
        game_done = policy_gradient.learn(steps_move_action, steps_attack_action)
        if game_done:
            env.new_game()
        elif stage_done:
            env.next_stage()
        elif round_done:
            env.next_round()
Ejemplo n.º 2
0
def run_env(worker_id, roms_path):
    env = Environment(f"env{worker_id}", roms_path)
    env.start()
    while True:
        move_action = random.randint(0, 8)
        attack_action = random.randint(0, 9)
        frames, reward, round_done, stage_done, game_done = env.step(
            move_action, attack_action)
        if game_done:
            env.new_game()
        elif stage_done:
            env.next_stage()
        elif round_done:
            env.next_round()
Ejemplo n.º 3
0
class StreetFighterEnv(object):
    def __init__(self, index, monitor=None):
        roms_path = "/home/zhangchao/Downloads/"
        self.env = Environment("env{}".format(index), roms_path)
        if monitor:
            self.monitor = monitor
        else:
            self.monitor = None
        self.env.start()

    # 单步执行返回数据:游戏画面,一轮结束,场景切换,游戏结束
    def step(self, action):
        move_action = action // 10
        attack_action = action % 10
        frames, reward, round_done, stage_done, game_done = self.env.step(
            move_action, attack_action)

        if self.monitor:
            for frame in frames:
                # 通过管道pipe为子进程写入图像数据
                self.monitor.record(frame)
        if not (round_done or stage_done or game_done):
            # frame拼接
            frames = np.concatenate([process_frame(frame) for frame in frames],
                                    0)[None, :, :, :].astype(np.float32)
        else:
            frames = np.zeros((1, 3, 168, 168), dtype=np.float32)

        # 奖励设计
        reward = reward["P1"]
        if stage_done:
            reward = 25
        elif game_done:
            reward = -50
        reward *= (1 + (self.env.stage - 1) / 10)
        reward /= 10

        return frames, reward, round_done, stage_done, game_done

    # 重启
    def reset(self, round_done, stage_done, game_done):
        if game_done:
            self.env.new_game()
        elif stage_done:
            self.env.next_stage()
        elif round_done:
            self.env.next_round()
        return np.zeros((1, 3, 168, 168), dtype=np.float32)
Ejemplo n.º 4
0
import random
from MAMEToolkit.sf_environment import Environment

roms_path = 'rom/'

env = Environment("sfiii3n", roms_path)

env.start()
while True:
    move_action = random.randint(0, 8)
    attack_action = random.randint(0, 9)
    frames, reward, round_done, stage_done, game_done = env.step(
        move_action, attack_action)
    if game_done:
        env.new_game()
    elif stage_done:
        env.next_stage()
    elif round_done:
        env.next_round()