Example #1
0
File: PPO.py Project: war3gu/gykRL
def play():
    print("play")

    env = gym.make('LunarLander-v2')

    state = env.reset()

    actor = Actor(env.action_space, env.observation_space)

    actor.load()

    #critic = Critic(env.action_space, env.observation_space)

    #replayMemory = ReplayMemory()

    #summary_ops, summary_vars = build_summaries()

    #writer = tf.summary.FileWriter("./log", tf.Session().graph)

    #episode_reward = 0

    #step = 1

    while True:

        env.render()

        state1 = state[np.newaxis, :]

        action, action_matrix, prob = actor.predict(state1)

        next_state, reward, done, info = env.step(action)

        #replayMemory.add(state, action_matrix, reward, done, next_state, prob)

        state = next_state

        if done:
            #summary_str = tf.Session().run(summary_ops, feed_dict={summary_vars[0]: episode_reward})
            #writer.add_summary(summary_str, step)
            #writer.flush()
            state = env.reset()

    return 0
Example #2
0
    def get_actions(self, fuid=None):
        """Create and return a hash of all possible actions this
		player might perform"""
        acts = {}
        if self.ap <= 0:
            # No actions are possible at negative AP
            return acts

        if fuid is None:
            action_id = None
            name = None
        else:
            action_id = fuid.split('.')
            action_id[1] = int(action_id[1])
            name = action_id[2]

        # What can we do of ourselves?
        # We could say "Boo!" (debug action)
        # FIXME: Remove this
        if Util.match_id(action_id, self, "sayboo"):
            uid = Action.make_id(self, "sayboo")
            acts[uid] = Action(uid,
                               self,
                               caption="Say Boo",
                               action=lambda d: self.say_boo(),
                               group="player")

        # We can change the held item.
        if Util.match_id(action_id, self, "changeitem"):
            uid = Action.make_id(self, "changeitem")
            act_html = "Use item <input id='%s_id' size='3' />. " % uid
            acts[uid] = ActionChangeItem(
                uid,
                self,
                caption="Change",
                cost=Cost(),
                group="inventory",
                action=lambda d: self.change_item_action(d),
                html=act_html,
                parameters=['id'])
            acts[uid].html += acts[uid].make_button_for()

        # What can we do to the item we're holding?
        item = self.held_item()
        # Match any action at this stage
        if item is not None and Util.match_id(action_id, item):
            item.external_actions(acts, self, name)

        # What can we do to the items we're wearing?
        # FIXME: Fill in here

        # What can we do to the current location?
        loc = self.loc()
        if Util.match_id(action_id, loc):
            loc.external_actions(acts, self, name)

        # What can we do to actors here?
        for actid in self.loc().actor_ids():
            actor = Actor.load(actid)
            if Util.match_id(action_id, actor):
                actor.external_actions(acts, self, name)

        # What can we do to actors nearby?
        # FIXME: Fill in here

        return acts
Example #3
0
def actor_handler(req, player, target, components):
    """Handle a request for actor information, for the given target ID"""
    # We must have precisely one component in the request URL
    if len(components) != 1:
        return apache.HTTP_NOT_FOUND

    req.content_type = "text/plain"
    actor = Actor.load(target)
    context = player.get_context()

    # Check for actions first -- simplifies the handling of action POSTs
    if components[0] == 'actions':
        if req.method == 'GET':
            # List of actions
            acts = actor.get_actions()
            for id, act in acts.iteritems():
                info = act.context_get(context)
                Util.render_info(info, req)
                req.write("-\n")

        elif req.method == 'POST':
            data = Util.parse_input(req)
            if 'action' in data:
                actor.perform_action(data['action'], data)

            # Save any game state that might have changed
            GameUtil.save()
        else:
            # If it's not GET or POST, complain
            return apache.HTTP_METHOD_NOT_ALLOWED

        return apache.OK

    # Now handle everything else: it's all GETs from here on
    if req.method != 'GET':
        # If it's not a GET, throw a wobbly
        return apache.HTTP_METHOD_NOT_ALLOWED

    log.debug("Actor handler: requested " + str(components))
    if components[0] == 'desc':
        # Description
        info = actor.context_get(context)
        Util.render_info(info, req)
    elif components[0] == 'inventory':
        # Inventory
        info = actor.inventory.context_get_equip(context)
        Util.render_table(info, req)
    elif components[0] == 'equipment':
        # Equipment
        info = actor.equipment.context_get_equip(context)
        Util.render_table(info, req)
    elif components[0] == 'log':
        # Actor logs
        if 'X-WoR-Messages-Since' in req.headers_in:
            since = req.headers_in['X-WoR-Messages-Since']
        else:
            since = getattr(actor, 'last_action', 0)

        info = actor.get_messages(since)
        Util.render_table(info, req)
    else:
        return apache.HTTP_NOT_FOUND

    return apache.OK
Example #4
0
File: PPO.py Project: war3gu/gykRL
def train():

    env = gym.make('LunarLander-v2')

    state = env.reset()

    actor = Actor(env.action_space, env.observation_space)

    critic = Critic(env.action_space, env.observation_space)

    actor.load()
    critic.load()

    replayMemory = ReplayMemory()

    summary_ops, summary_vars = build_summaries()

    writer = tf.summary.FileWriter("./log", tf.Session().graph)

    episode_reward = 0

    step = 1

    while True:

        #env.render()

        state1 = state[np.newaxis, :]

        action, action_matrix, prob = actor.predict(state1)

        next_state, reward, done, info = env.step(action)

        replayMemory.add(state, action_matrix, reward, done, next_state, prob)

        state = next_state

        episode_reward += reward

        #train
        if replayMemory.size() % 128 == 0 or done == True:

            state_b, action_matrix_b, reward_b, done_b, next_state_b, prob_b = replayMemory.miniAll(
            )

            reward_b = reward_b[:, np.newaxis]

            c_pre = critic.predict(next_state_b)

            state_pre_value = reward_b + c_pre * 0.7

            state_value = critic.predict(state_b)

            count = 5000 // step

            if count > 500:
                count = 500

            if count < 1:
                count = 1

            count = 10

            for _ in range(count):
                critic.train(state_b, state_pre_value)

            for _ in range(count):
                actor.train(state_b, state_value, state_pre_value,
                            action_matrix_b, prob_b)

            replayMemory.clear()
        ########################

        if done:

            summary_str = tf.Session().run(
                summary_ops, feed_dict={summary_vars[0]: episode_reward})
            writer.add_summary(summary_str, step)
            writer.flush()

            ##print("step = ", step, "episode_reward = ", episode_reward)

            state = env.reset()

            episode_reward = 0

            step += 1

            if step % 25 == 0:
                actor.save()
                critic.save()