예제 #1
0
파일: model.py 프로젝트: cair/experiments
 def __init__(self):
     self.env = Game({
         "game": {
             "width": 15,
             "height": 11,
             "tile_width": 32,
             "tile_height": 32
         },
         "mechanics": {
             "complexity": {
                 "build_anywhere": False
             },
             "start_health": 50,
             "start_gold": 100,
             "start_lumber": 0,
             "start_income": 20,
             "income_frequency": 10,
             "ticks_per_second": 20,
             "fps": 10,
             "ups": 10,
             "income_ratio": 0.20,
             "kill_gold_ratio": 0.10
         },
         "gui": {
             "enabled": True,
             "draw_friendly": True,
             "minimal": True
         }
     })
     self.updates = 10  # Updates per actions
예제 #2
0
    def __init__(self):
        self.frames = 3
        self.action_space_a = 13  # Linear
        self.action_space_m = 2  # Softmax

        self.DUMMY_ACTION, self.DUMMY_VALUE = np.zeros(
            (1, self.action_space_a)), np.zeros((1, 1))
        self.NUM_STATE = 84 * 84 * 3
        self.GAMMA = 0.99
        self.BATCH_SIZE = 32
        self.episode = 0
        self.EPISODES = 10000
        self.reward = []
        self.reward_over_time = []
        self.LOSS_CLIPPING = 0.2  # Only implemented clipping for the surrogate loss, paper said it was best
        self.EPOCHS = 10

        self.env = Game({
            "game": {
                "width": 15,
                "height": 11,
                "tile_width": 32,
                "tile_height": 32
            },
            "mechanics": {
                "complexity": {
                    "build_anywhere": False
                },
                "start_health": 50,
                "start_gold": 100,
                "start_lumber": 0,
                "start_income": 20,
                "income_frequency": 10,
                "ticks_per_second": 20,
                "fps": 10,
                "ups": 10,
                "income_ratio": 0.20,
                "kill_gold_ratio": 0.10
            },
            "gui": {
                "enabled": True,
                "draw_friendly": True,
                "minimal": True
            }
        })
        self.env.reset()
        self.env.render()
        self.observation = self.env.get_state("image")

        self.actor_discrete = self.build_actor(
            discrete=True, action_space=self.action_space_a, activation=linear)
        #self.actor_continous = self.build_actor(discrete=False, action_space=self.action_space_m, activation=softmax)
        self.critic = self.build_critic()
예제 #3
0
파일: game_1.py 프로젝트: cair/experiments
    def __init__(self, data_queue):
        self.id = uuid.uuid4()
        print("Game %s - Start" % self.id)
        self.data_queue = data_queue
        self.game = Game({
            "game": {
                "width": 11,
                "height": 11,
                "tile_width": 32,
                "tile_height": 32
            },
            "mechanics": {
                "complexity": {
                    "build_anywhere": False
                },
                "start_health": 50,
                "start_gold": 100,
                "start_lumber": 0,
                "start_income": 20,
                "income_frequency": 10,
                "ticks_per_second": 20,
                "fps": 10,
                "ups": 10008000,
                "income_ratio": 0.20,
                "kill_gold_ratio": 0.10
            },
            "gui": {
                "enabled": True,
                "draw_friendly": True,
                "minimal": True
            }
        })

        self.states = list()
        self.experience_replay = list()
        self.s0 = None
        self.player_1 = self.game.players[0]
        self.player_2 = self.game.players[1]
        self.episode = 1
        self.representation = "image_grayscaled"
        self.running = False
        self.stack = 4
        self.num_ticks = 10
        self.tick_limit = 30000
예제 #4
0
파일: model.py 프로젝트: cair/experiments
class Environment:

    def __init__(self):
        self.env = Game({
            "game": {
                "width": 15,
                "height": 11,
                "tile_width": 32,
                "tile_height": 32
            },
            "mechanics": {
                "complexity": {
                    "build_anywhere": False
                },
                "start_health": 50,
                "start_gold": 100,
                "start_lumber": 0,
                "start_income": 20,
                "income_frequency": 10,
                "ticks_per_second": 20,
                "fps": 10,
                "ups": 10,
                "income_ratio": 0.20,
                "kill_gold_ratio": 0.10
            },
            "gui": {
                "enabled": True,
                "draw_friendly": True,
                "minimal": True
            }
        })
        self.updates = 10  # Updates per actions

    def step(self, a):
        data = self.env.step(a, representation="image")
        for i in range(self.updates):
            self.env.update()
        return data

    def reset(self):
        return self.env.reset()
예제 #5
0
if __name__ == "__main__":

    game = Game({
        "game": {
            "width": 15,
            "height": 11,
            "tile_width": 32,
            "tile_height": 32
        },
        "mechanics": {
            "complexity": {
                "build_anywhere": False
            },
            "start_health": 50,
            "start_gold": 100,
            "start_lumber": 0,
            "start_income": 20,
            "income_frequency": 10,
            "ticks_per_second": 20,
            "fps": 10,
            "ups": 10,
            "income_ratio": 0.20,
            "kill_gold_ratio": 0.10
        },
        "gui": {
            "enabled": True,
            "draw_friendly": True,
            "minimal": True
        }
    })

    while True:
예제 #6
0
파일: ppo_1.py 프로젝트: cair/experiments
    pass

    env = Game({
        "game": {
            "width": 15,
            "height": 11,
            "tile_width": 32,
            "tile_height": 32
        },
        "mechanics": {
            "complexity": {
                "build_anywhere": False
            },
            "start_health": 50,
            "start_gold": 100,
            "start_lumber": 0,
            "start_income": 20,
            "income_frequency": 10,
            "ticks_per_second": 5,
            "fps": 1,
            "ups": 1,
            "income_ratio": 0.20,
            "kill_gold_ratio": 0.10
        },
        "gui": {
            "enabled": True,
            "draw_friendly": True,
            "minimal": True
        }
    })

    s = env.reset()
예제 #7
0
class RLModel:
    def __init__(self):
        self.frames = 3
        self.action_space_a = 13  # Linear
        self.action_space_m = 2  # Softmax

        self.DUMMY_ACTION, self.DUMMY_VALUE = np.zeros(
            (1, self.action_space_a)), np.zeros((1, 1))
        self.NUM_STATE = 84 * 84 * 3
        self.GAMMA = 0.99
        self.BATCH_SIZE = 32
        self.episode = 0
        self.EPISODES = 10000
        self.reward = []
        self.reward_over_time = []
        self.LOSS_CLIPPING = 0.2  # Only implemented clipping for the surrogate loss, paper said it was best
        self.EPOCHS = 10

        self.env = Game({
            "game": {
                "width": 15,
                "height": 11,
                "tile_width": 32,
                "tile_height": 32
            },
            "mechanics": {
                "complexity": {
                    "build_anywhere": False
                },
                "start_health": 50,
                "start_gold": 100,
                "start_lumber": 0,
                "start_income": 20,
                "income_frequency": 10,
                "ticks_per_second": 20,
                "fps": 10,
                "ups": 10,
                "income_ratio": 0.20,
                "kill_gold_ratio": 0.10
            },
            "gui": {
                "enabled": True,
                "draw_friendly": True,
                "minimal": True
            }
        })
        self.env.reset()
        self.env.render()
        self.observation = self.env.get_state("image")

        self.actor_discrete = self.build_actor(
            discrete=True, action_space=self.action_space_a, activation=linear)
        #self.actor_continous = self.build_actor(discrete=False, action_space=self.action_space_m, activation=softmax)
        self.critic = self.build_critic()
        # Actor 1
        # -------
        # Send Unit 0
        # Send Unit 1
        # Send unit 2

        # Build 0
        # Build 1
        # Build 2

        # Mouse_on_off

        # Actor 2
        # -------
        # Mouse_vel_x
        # Mouse_vel_y

    def build_actor(self, discrete=True, action_space=None, activation=None):
        print(action_space)
        input_image = Input(shape=(84, 84, self.frames))
        actual_value = Input(shape=(1, ))
        predicted_value = Input(shape=(1, ))
        old_prediction = Input(shape=(action_space, ))

        x = Conv2D(32, (8, 8), (2, 2), 'same', activation=relu)(input_image)
        x = Conv2D(64, (4, 4), (2, 2), 'same', activation=relu)(x)
        x = Conv2D(128, (2, 2), (2, 2), 'same', activation=relu)(x)
        x = Conv2D(256, (1, 1), (2, 2), 'same', activation=relu)(x)
        x = Flatten()(x)

        x = Dense(512, activation=relu)(x)

        out_actions = Dense(action_space, activation=softmax, name='output')(x)
        #out_actions = NoisyDense(action_space, activation=softmax, sigma_init=0.02, name='output')(x)

        model = Model(inputs=[
            input_image, actual_value, predicted_value, old_prediction
        ],
                      outputs=[out_actions])
        model.compile(optimizer=Adam(lr=10e-4),
                      loss=[
                          proximal_policy_optimization_loss(
                              actual_value=actual_value,
                              old_prediction=old_prediction,
                              predicted_value=predicted_value)
                      ])
        model.summary()
        return model

    def build_critic(self):
        input_image = Input(shape=(84, 84, self.frames))
        x = Conv2D(32, (8, 8), (2, 2), 'same', activation=relu)(input_image)
        x = Conv2D(64, (4, 4), (2, 2), 'same', activation=relu)(x)
        x = Conv2D(128, (2, 2), (2, 2), 'same', activation=relu)(x)
        x = Conv2D(256, (1, 1), (2, 2), 'same', activation=relu)(x)
        x = Flatten()(x)

        x = Dense(512, activation=relu)(x)

        out_value = Dense(1, activation=linear)(x)

        model = Model(inputs=[input_image], outputs=[out_value])
        model.compile(optimizer=Adam(lr=10e-4), loss='mse')
        model.summary()
        return model

    def get_action(self):
        p = self.actor_discrete.predict([
            np.reshape(self.observation, (1, ) + self.observation.shape),
            self.DUMMY_VALUE, self.DUMMY_VALUE, self.DUMMY_ACTION
        ])
        action = np.random.choice(self.action_space_a, p=np.nan_to_num(p[0]))
        action_matrix = np.zeros(p[0].shape)
        action_matrix[action] = 1
        return action, action_matrix, p

    def get_batch(self):
        batch = [
            [],  # Observations
            [],  # Actions
            [],  # Predicted
            []  # Reward
        ]

        tmp_batch = [[], [], []]

        while len(batch[0]) < self.BATCH_SIZE:
            action, action_matrix, predicted_action = self.get_action()

            self.env.primary_player.opponent.do_action(random.randint(0, 12))
            observation, reward, done, info = self.env.step(action)

            self.reward.append(reward)

            tmp_batch[0].append(self.observation)
            tmp_batch[1].append(action_matrix)
            tmp_batch[2].append(predicted_action)

            self.observation = observation

            if done:
                self.env.render_window()
                self.transform_reward()
                for i in range(len(tmp_batch[0])):
                    obs, action, pred = tmp_batch[0][i], tmp_batch[1][
                        i], tmp_batch[2][i]
                    r = self.reward[i]
                    batch[0].append(obs)
                    batch[1].append(action)
                    batch[2].append(pred)
                    batch[3].append(r)
                tmp_batch = [[], [], []]
                self.reset_env()

        obs, action, pred, reward = np.array(batch[0]), np.array(
            batch[1]), np.array(batch[2]), np.reshape(np.array(batch[3]),
                                                      (len(batch[3]), 1))
        pred = np.reshape(pred, (pred.shape[0], pred.shape[2]))
        return obs, action, pred, reward

    def reset_env(self):
        self.episode += 1
        self.observation = self.env.reset()
        self.reward = []

    def run(self):

        while self.episode < self.EPISODES:
            obs, action, pred, reward = self.get_batch()

            old_prediction = pred
            pred_values = self.critic.predict(obs)

            # Train Actor
            for e in range(self.EPOCHS):
                self.actor_discrete.train_on_batch(
                    [obs, reward, pred_values, old_prediction], [action])

            # Train Critic
            for e in range(self.EPOCHS):
                self.critic.train_on_batch([obs], [reward])

    def transform_reward(self):

        print(
            'Episode #', self.episode, '\tfinished with reward',
            np.array(self.reward).sum(), '\tAverage Noisy Weights',
            np.mean(self.actor_discrete.get_layer('output').get_weights()[1]))
        self.reward_over_time.append(np.array(self.reward).sum())
        for j in range(len(self.reward)):
            reward = self.reward[j]
            for k in range(j + 1, len(self.reward)):
                reward += self.reward[k] * self.GAMMA**k
            self.reward[j] = reward
예제 #8
0
파일: game_1.py 프로젝트: cair/experiments
class GameInstance:

    @staticmethod
    def start(data_queue):
        g = GameInstance(data_queue)
        g.loop()
        return True

    def get_stacked_state(self, swapaxes=False):
        if len(self.states) > self.stack:
            if swapaxes:
                return np.swapaxes(np.array(self.states[-1 * self.stack:]), 0, 2)
            else:
                return np.array(self.states[-1 * self.stack:])

        return None

    def __init__(self, data_queue):
        self.id = uuid.uuid4()
        print("Game %s - Start" % self.id)
        self.data_queue = data_queue
        self.game = Game({
            "game": {
                "width": 11,
                "height": 11,
                "tile_width": 32,
                "tile_height": 32
            },
            "mechanics": {
                "complexity": {
                    "build_anywhere": False
                },
                "start_health": 50,
                "start_gold": 100,
                "start_lumber": 0,
                "start_income": 20,
                "income_frequency": 10,
                "ticks_per_second": 20,
                "fps": 10,
                "ups": 10008000,
                "income_ratio": 0.20,
                "kill_gold_ratio": 0.10
            },
            "gui": {
                "enabled": True,
                "draw_friendly": True,
                "minimal": True
            }
        })

        self.states = list()
        self.experience_replay = list()
        self.s0 = None
        self.player_1 = self.game.players[0]
        self.player_2 = self.game.players[1]
        self.episode = 1
        self.representation = "image_grayscaled"
        self.running = False
        self.stack = 4
        self.num_ticks = 10
        self.tick_limit = 30000

    def loop(self):
        self.running = True
        t = 0
        while self.running:

            # Do action
            self.player_1.do_action(random.randint(0, 12))
            self.player_2.do_action(random.randint(0, 12))

            # Process game
            for i in range(self.num_ticks):
                self.game.update()
                t += 1

            # Update image state
            self.game.render()

            # Retrieve state, add to list of states,
            s1 = self.game.get_state(representation=self.representation)
            self.states.append(s1)
            self.s0 = s1

            # Terminal State, Reset Game
            if self.game.is_terminal() or t >= self.tick_limit:
                self.game.reset()
                print("Game %s - %s#%s" % (self.id, self.episode, t))
                self.episode += 1

                if t < self.tick_limit:
                    self.data_queue.put(self.states)
                self.states.clear()
                t = 0