Python WindowAugmentation Examples

Programming Language: Python

Namespace/Package Name: agentnet.memory

Examples at hotexamples.com: 5

Python WindowAugmentation - 5 examples found. These are the top rated real world Python examples of agentnet.memory.WindowAugmentation extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

WindowAugmentation(5)

Frequently Used Methods

WindowAugmentation (5)

Example #1

Show file

File: RL_agents.py Project: Mariewelt/mariewelt-mdp-med

    def build_model(self):

        # reshape to [batch, color, x, y] to allow for convolution layers to work correctly
        observation_reshape = DimshuffleLayer(self.observation_layer,
                                              (0, 3, 1, 2))
        observation_reshape = Pool2DLayer(observation_reshape,
                                          pool_size=(2, 2))

        # memory
        window_size = 5
        # prev state input
        prev_window = InputLayer(
            (None, window_size) + tuple(observation_reshape.output_shape[1:]),
            name="previous window state")

        # our window
        memory_layer = WindowAugmentation(observation_reshape,
                                          prev_window,
                                          name="new window state")

        memory_dict = {memory_layer: prev_window}

        # pixel-wise maximum over the temporal window (to avoid flickering)
        memory_layer = ExpressionLayer(memory_layer,
                                       lambda a: a.max(axis=1),
                                       output_shape=(None, ) +
                                       memory_layer.output_shape[2:])

        # neural network body
        nn = batch_norm(
            lasagne.layers.Conv2DLayer(memory_layer,
                                       num_filters=16,
                                       filter_size=(8, 8),
                                       stride=(4, 4)))
        nn = batch_norm(
            lasagne.layers.Conv2DLayer(nn,
                                       num_filters=32,
                                       filter_size=(4, 4),
                                       stride=(2, 2)))
        nn = batch_norm(lasagne.layers.DenseLayer(nn, num_units=256))
        # q_eval
        policy_layer = DenseLayer(nn,
                                  num_units=self.n_actions,
                                  nonlinearity=lasagne.nonlinearities.linear,
                                  name="QEvaluator")
        # resolver
        resolver = EpsilonGreedyResolver(policy_layer, name="resolver")

        # all together
        agent = Agent(self.observation_layer, memory_dict, policy_layer,
                      resolver)

        return resolver, agent

Example #2

Show file

File: atari.py Project: pshvechikov/tinyverse

    def make_agent(self,
                   observation_shape=(1, 64, 64), # same as env.observation_space.shape
                   n_actions = 6,  # same as env.action_space.n
        ):
        """builds agent network"""

        #observation
        inp = InputLayer((None,)+observation_shape,)

        #4-tick window over images
        from agentnet.memory import WindowAugmentation
        prev_wnd = InputLayer((None,4)+observation_shape)
        new_wnd = WindowAugmentation(inp,prev_wnd)
        
        #reshape to (channels, h,w). If you don't use grayscale, 4 should become 12.
        wnd_reshape = reshape(new_wnd, (-1,4)+observation_shape[1:])

        #network body
        conv0 = Conv2DLayer(wnd_reshape,32,5,stride=2,nonlinearity=elu)
        conv1 = Conv2DLayer(conv0,32,5,stride=2,nonlinearity=elu)
        conv2 = Conv2DLayer(conv1,64,5,stride=1,nonlinearity=elu)
        
        dense = DenseLayer(dropout(conv2,0.1),512,nonlinearity=tanh)
        
        #actor head
        logits_layer = DenseLayer(dense,n_actions,nonlinearity=None) 
        #^^^ store policy logits to regularize them later
        policy_layer = NonlinearityLayer(logits_layer,T.nnet.softmax)
        
        #critic head
        V_layer = DenseLayer(dense,1,nonlinearity=None)
        
        #sample actions proportionally to policy_layer
        from agentnet.resolver import ProbabilisticResolver
        action_layer = ProbabilisticResolver(policy_layer)
        
        #get all weights (just like any lasagne network). new_out mentioned just in case.
        self.weights = get_all_params([V_layer,policy_layer],trainable=True)


        return Agent(observation_layers=inp,
                     policy_estimators=(logits_layer,V_layer),
                     agent_states={new_wnd:prev_wnd},
                     action_layers=action_layer)

Example #3

Show file

File: test_learning.py Project: yanyankangkang/AgentNet

def test_space_invaders(
    game_title='SpaceInvaders-v0',
    n_parallel_games=3,
    replay_seq_len=2,
):
    """
    :param game_title: name of atari game in Gym
    :param n_parallel_games: how many games we run in parallel
    :param replay_seq_len: how long is one replay session from a batch
    """

    atari = gym.make(game_title)
    atari.reset()

    # Game Parameters
    n_actions = atari.action_space.n
    observation_shape = (None, ) + atari.observation_space.shape
    action_names = atari.get_action_meanings()
    del atari
    # ##### Agent observations

    # image observation at current tick goes here
    observation_layer = InputLayer(observation_shape, name="images input")

    # reshape to [batch, color, x, y] to allow for convolutional layers to work correctly
    observation_reshape = DimshuffleLayer(observation_layer, (0, 3, 1, 2))

    # Agent memory states
    window_size = 3

    # prev state input
    prev_window = InputLayer(
        (None, window_size) + tuple(observation_reshape.output_shape[1:]),
        name="previous window state")

    # our window
    window = WindowAugmentation(observation_reshape,
                                prev_window,
                                name="new window state")

    memory_dict = {window: prev_window}

    # ##### Neural network body
    # you may use any other lasagne layers, including convolutions, batch_norms, maxout, etc

    # pixel-wise maximum over the temporal window (to avoid flickering)
    window_max = ExpressionLayer(window,
                                 lambda a: a.max(axis=1),
                                 output_shape=(None, ) +
                                 window.output_shape[2:])

    # a simple lasagne network (try replacing with any other lasagne network and see what works best)
    nn = DenseLayer(window_max, num_units=50, name='dense0')

    # Agent policy and action picking
    q_eval = DenseLayer(nn,
                        num_units=n_actions,
                        nonlinearity=lasagne.nonlinearities.linear,
                        name="QEvaluator")

    #fakes for a2c
    policy_eval = DenseLayer(nn,
                             num_units=n_actions,
                             nonlinearity=lasagne.nonlinearities.softmax,
                             name="a2c action probas")
    state_value_eval = DenseLayer(nn,
                                  num_units=1,
                                  nonlinearity=None,
                                  name="a2c state values")
    # resolver
    resolver = ProbabilisticResolver(policy_eval, name="resolver")

    # agent
    agent = Agent(observation_layer, memory_dict,
                  (q_eval, policy_eval, state_value_eval), resolver)

    # Since it's a single lasagne network, one can get it's weights, output, etc
    weights = lasagne.layers.get_all_params(resolver, trainable=True)

    # Agent step function
    print('compiling react')
    applier_fun = agent.get_react_function()

    # a nice pythonic interface
    def step(observation, prev_memories='zeros', batch_size=n_parallel_games):
        """ returns actions and new states given observation and prev state
        Prev state in default setup should be [prev window,]"""
        # default to zeros
        if prev_memories == 'zeros':
            prev_memories = [
                np.zeros((batch_size, ) + tuple(mem.output_shape[1:]),
                         dtype='float32') for mem in agent.agent_states
            ]
        res = applier_fun(np.array(observation), *prev_memories)
        action = res[0]
        memories = res[1:]
        return action, memories

    # # Create and manage a pool of atari sessions to play with

    pool = GamePool(game_title, n_parallel_games)

    observation_log, action_log, reward_log, _, _, _ = pool.interact(step, 50)

    print(np.array(action_names)[np.array(action_log)[:3, :5]])

    # # experience replay pool
    # Create an environment with all default parameters
    env = SessionPoolEnvironment(observations=observation_layer,
                                 actions=resolver,
                                 agent_memories=agent.agent_states)

    def update_pool(env, pool, n_steps=100):
        """ a function that creates new sessions and ads them into the pool
        throwing the old ones away entirely for simplicity"""

        preceding_memory_states = list(pool.prev_memory_states)

        # get interaction sessions
        observation_tensor, action_tensor, reward_tensor, _, is_alive_tensor, _ = pool.interact(
            step, n_steps=n_steps)

        # load them into experience replay environment
        env.load_sessions(observation_tensor, action_tensor, reward_tensor,
                          is_alive_tensor, preceding_memory_states)

    # load first  sessions
    update_pool(env, pool, replay_seq_len)

    # A more sophisticated way of training is to store a large pool of sessions and train on random batches of them.
    # ### Training via experience replay

    # get agent's Q-values, policy, etc obtained via experience replay
    _env_states, _observations, _memories, _imagined_actions, estimators = agent.get_sessions(
        env,
        session_length=replay_seq_len,
        batch_size=env.batch_size,
        optimize_experience_replay=True,
    )
    (q_values_sequence, policy_sequence, value_sequence) = estimators

    # Evaluating loss function

    scaled_reward_seq = env.rewards
    # For SpaceInvaders, however, not scaling rewards is at least working

    elwise_mse_loss = 0.

    #1-step algos
    for algo in qlearning, sarsa:
        elwise_mse_loss += algo.get_elementwise_objective(
            q_values_sequence,
            env.actions[0],
            scaled_reward_seq,
            env.is_alive,
            gamma_or_gammas=0.99,
        )
    #qlearning_n_step
    for n in (1, 3, replay_seq_len - 1, replay_seq_len, replay_seq_len + 1,
              None):
        elwise_mse_loss += qlearning_n_step.get_elementwise_objective(
            q_values_sequence,
            env.actions[0],
            scaled_reward_seq,
            env.is_alive,
            gamma_or_gammas=0.99,
            n_steps=n)

    #a2c n_step

    elwise_mse_loss += a2c_n_step.get_elementwise_objective(
        policy_sequence,
        value_sequence[:, :, 0],
        env.actions[0],
        scaled_reward_seq,
        env.is_alive,
        gamma_or_gammas=0.99,
        n_steps=3)

    # compute mean over "alive" fragments
    mse_loss = elwise_mse_loss.sum() / env.is_alive.sum()

    # regularize network weights
    reg_l2 = regularize_network_params(resolver, l2) * 10**-4

    loss = mse_loss + reg_l2

    # Compute weight updates
    updates = lasagne.updates.adadelta(loss, weights, learning_rate=0.01)

    # mean session reward
    mean_session_reward = env.rewards.sum(axis=1).mean()

    # # Compile train and evaluation functions

    print('compiling')
    train_fun = theano.function([], [loss, mean_session_reward],
                                updates=updates)
    evaluation_fun = theano.function(
        [], [loss, mse_loss, reg_l2, mean_session_reward])
    print("I've compiled!")

    # # Training loop

    for epoch_counter in range(10):
        update_pool(env, pool, replay_seq_len)
        loss, avg_reward = train_fun()
        full_loss, q_loss, l2_penalty, avg_reward_current = evaluation_fun()

        print("epoch %i,loss %.5f, rewards: %.5f " %
              (epoch_counter, full_loss, avg_reward_current))
        print("rec %.3f reg %.3f" % (q_loss, l2_penalty))

Example #4

Show file

def test_memory(
    game_title='SpaceInvaders-v0',
    n_parallel_games=3,
    replay_seq_len=2,
):
    """
    :param game_title: name of atari game in Gym
    :param n_parallel_games: how many games we run in parallel
    :param replay_seq_len: how long is one replay session from a batch
    """

    atari = gym.make(game_title)
    atari.reset()

    # Game Parameters
    n_actions = atari.action_space.n
    observation_shape = (None, ) + atari.observation_space.shape
    action_names = atari.get_action_meanings()
    del atari
    # ##### Agent observations

    # image observation at current tick goes here
    observation_layer = InputLayer(observation_shape, name="images input")

    # reshape to [batch, color, x, y] to allow for convolutional layers to work correctly
    observation_reshape = DimshuffleLayer(observation_layer, (0, 3, 1, 2))

    # Agent memory states

    memory_dict = OrderedDict([])

    ###Window
    window_size = 3

    # prev state input
    prev_window = InputLayer(
        (None, window_size) + tuple(observation_reshape.output_shape[1:]),
        name="previous window state")

    # our window
    window = WindowAugmentation(observation_reshape,
                                prev_window,
                                name="new window state")

    # pixel-wise maximum over the temporal window (to avoid flickering)
    window_max = ExpressionLayer(window,
                                 lambda a: a.max(axis=1),
                                 output_shape=(None, ) +
                                 window.output_shape[2:])

    memory_dict[window] = prev_window

    ###Stack
    #prev stack
    stack_w, stack_h = 4, 5
    stack_inputs = DenseLayer(observation_reshape, stack_w, name="prev_stack")
    stack_controls = DenseLayer(observation_reshape,
                                3,
                                nonlinearity=lasagne.nonlinearities.softmax,
                                name="prev_stack")
    prev_stack = InputLayer((None, stack_h, stack_w),
                            name="previous stack state")
    stack = StackAugmentation(stack_inputs, prev_stack, stack_controls)
    memory_dict[stack] = prev_stack

    stack_top = lasagne.layers.SliceLayer(stack, 0, 1)

    ###RNN preset

    prev_rnn = InputLayer((None, 16), name="previous RNN state")
    new_rnn = RNNCell(prev_rnn, observation_reshape)
    memory_dict[new_rnn] = prev_rnn

    ###GRU preset
    prev_gru = InputLayer((None, 16), name="previous GRUcell state")
    new_gru = GRUCell(prev_gru, observation_reshape)
    memory_dict[new_gru] = prev_gru

    ###GRUmemorylayer
    prev_gru1 = InputLayer((None, 15), name="previous GRUcell state")
    new_gru1 = GRUMemoryLayer(15, observation_reshape, prev_gru1)
    memory_dict[new_gru1] = prev_gru1

    #LSTM with peepholes
    prev_lstm0_cell = InputLayer(
        (None, 13), name="previous LSTMCell hidden state [with peepholes]")

    prev_lstm0_out = InputLayer(
        (None, 13), name="previous LSTMCell output state [with peepholes]")

    new_lstm0_cell, new_lstm0_out = LSTMCell(
        prev_lstm0_cell,
        prev_lstm0_out,
        input_or_inputs=observation_reshape,
        peepholes=True,
        name="newLSTM1 [with peepholes]")

    memory_dict[new_lstm0_cell] = prev_lstm0_cell
    memory_dict[new_lstm0_out] = prev_lstm0_out

    #LSTM without peepholes
    prev_lstm1_cell = InputLayer(
        (None, 14), name="previous LSTMCell hidden state [no peepholes]")

    prev_lstm1_out = InputLayer(
        (None, 14), name="previous LSTMCell output state [no peepholes]")

    new_lstm1_cell, new_lstm1_out = LSTMCell(
        prev_lstm1_cell,
        prev_lstm1_out,
        input_or_inputs=observation_reshape,
        peepholes=False,
        name="newLSTM1 [no peepholes]")

    memory_dict[new_lstm1_cell] = prev_lstm1_cell
    memory_dict[new_lstm1_out] = prev_lstm1_out

    ##concat everything

    for i in [flatten(window_max), stack_top, new_rnn, new_gru, new_gru1]:
        print(i.output_shape)
    all_memory = concat([
        flatten(window_max),
        stack_top,
        new_rnn,
        new_gru,
        new_gru1,
        new_lstm0_out,
        new_lstm1_out,
    ])

    # ##### Neural network body
    # you may use any other lasagne layers, including convolutions, batch_norms, maxout, etc

    # a simple lasagne network (try replacing with any other lasagne network and see what works best)
    nn = DenseLayer(all_memory, num_units=50, name='dense0')

    # Agent policy and action picking
    q_eval = DenseLayer(nn,
                        num_units=n_actions,
                        nonlinearity=lasagne.nonlinearities.linear,
                        name="QEvaluator")

    # resolver
    resolver = EpsilonGreedyResolver(q_eval, epsilon=0.1, name="resolver")

    # agent
    agent = Agent(observation_layer, memory_dict, q_eval, resolver)

    # Since it's a single lasagne network, one can get it's weights, output, etc
    weights = lasagne.layers.get_all_params(resolver, trainable=True)

    # Agent step function
    print('compiling react')
    applier_fun = agent.get_react_function()

    # a nice pythonic interface
    def step(observation, prev_memories='zeros', batch_size=n_parallel_games):
        """ returns actions and new states given observation and prev state
        Prev state in default setup should be [prev window,]"""
        # default to zeros
        if prev_memories == 'zeros':
            prev_memories = [
                np.zeros((batch_size, ) + tuple(mem.output_shape[1:]),
                         dtype='float32') for mem in agent.agent_states
            ]
        res = applier_fun(np.array(observation), *prev_memories)
        action = res[0]
        memories = res[1:]
        return action, memories

    # # Create and manage a pool of atari sessions to play with

    pool = GamePool(game_title, n_parallel_games)

    observation_log, action_log, reward_log, _, _, _ = pool.interact(step, 50)

    print(np.array(action_names)[np.array(action_log)[:3, :5]])

    # # experience replay pool
    # Create an environment with all default parameters
    env = SessionPoolEnvironment(observations=observation_layer,
                                 actions=resolver,
                                 agent_memories=agent.agent_states)

    def update_pool(env, pool, n_steps=100):
        """ a function that creates new sessions and ads them into the pool
        throwing the old ones away entirely for simplicity"""

        preceding_memory_states = list(pool.prev_memory_states)

        # get interaction sessions
        observation_tensor, action_tensor, reward_tensor, _, is_alive_tensor, _ = pool.interact(
            step, n_steps=n_steps)

        # load them into experience replay environment
        env.load_sessions(observation_tensor, action_tensor, reward_tensor,
                          is_alive_tensor, preceding_memory_states)

    # load first  sessions
    update_pool(env, pool, replay_seq_len)

    # A more sophisticated way of training is to store a large pool of sessions and train on random batches of them.
    # ### Training via experience replay

    # get agent's Q-values obtained via experience replay
    _env_states, _observations, _memories, _imagined_actions, q_values_sequence = agent.get_sessions(
        env,
        session_length=replay_seq_len,
        batch_size=env.batch_size,
        optimize_experience_replay=True,
    )

    # Evaluating loss function

    scaled_reward_seq = env.rewards
    # For SpaceInvaders, however, not scaling rewards is at least working

    elwise_mse_loss = qlearning.get_elementwise_objective(
        q_values_sequence,
        env.actions[0],
        scaled_reward_seq,
        env.is_alive,
        gamma_or_gammas=0.99,
    )

    # compute mean over "alive" fragments
    mse_loss = elwise_mse_loss.sum() / env.is_alive.sum()

    # regularize network weights
    reg_l2 = regularize_network_params(resolver, l2) * 10**-4

    loss = mse_loss + reg_l2

    # Compute weight updates
    updates = lasagne.updates.adadelta(loss, weights, learning_rate=0.01)

    # mean session reward
    mean_session_reward = env.rewards.sum(axis=1).mean()

    # # Compile train and evaluation functions

    print('compiling')
    train_fun = theano.function([], [loss, mean_session_reward],
                                updates=updates)
    evaluation_fun = theano.function(
        [], [loss, mse_loss, reg_l2, mean_session_reward])
    print("I've compiled!")

    # # Training loop

    for epoch_counter in range(10):
        update_pool(env, pool, replay_seq_len)
        loss, avg_reward = train_fun()
        full_loss, q_loss, l2_penalty, avg_reward_current = evaluation_fun()

        print("epoch %i,loss %.5f, rewards: %.5f " %
              (epoch_counter, full_loss, avg_reward_current))
        print("rec %.3f reg %.3f" % (q_loss, l2_penalty))

Example #5

Show file

    def __init__(
        self,
        observation_shape,
        n_actions,
        n_goals=32,
        metacontroller_period=5,
        window_size=3,
        embedding_size=128,
    ):

        #image observation at current tick goes here
        self.observation_layer = InputLayer(observation_shape,
                                            name="images input")

        #reshape to [batch, color, x, y] to allow for convolutional layers to work correctly
        observation_reshape = DimshuffleLayer(self.observation_layer,
                                              (0, 3, 1, 2))

        observation_reshape = lasagne.layers.Pool2DLayer(
            observation_reshape, (2, 2), mode='average_inc_pad')

        #prev state input
        prev_window = InputLayer(
            (None, window_size) + tuple(observation_reshape.output_shape[1:]),
            name="previous window state")
        #our window
        window = WindowAugmentation(observation_reshape,
                                    prev_window,
                                    name="new window state")
        # pixel-wise maximum over the temporal window (to avoid flickering)
        window_max = ExpressionLayer(window,
                                     lambda a: a.max(axis=1),
                                     output_shape=(None, ) +
                                     window.output_shape[2:])

        memory_dict = {window: prev_window}

        #a simple lasagne network (try replacing with any other lasagne network and see what works best)
        nn = batch_norm(
            Conv2DLayer(window_max,
                        16,
                        filter_size=8,
                        stride=(4, 4),
                        name='cnn0'))
        nn = batch_norm(
            Conv2DLayer(nn, 32, filter_size=4, stride=(2, 2), name='cnn1'))
        nn = batch_norm(
            Conv2DLayer(nn, 64, filter_size=4, stride=(2, 2), name='cnn2'))

        #nn = DropoutLayer(nn,name = "dropout", p=0.05) #will get deterministic during evaluation
        self.dnn_output = nn = DenseLayer(nn, num_units=256, name='dense1')

        self.goal_layer = InputLayer((None, ), T.ivector(), name='boss goal')
        self.goal_layer.output_dtype = 'int32'
        goal_emb = EmbeddingLayer(self.goal_layer, n_goals, embedding_size)

        nn = lasagne.layers.ConcatLayer([goal_emb, nn])

        #q_eval
        q_eval = DenseLayer(nn,
                            num_units=n_actions,
                            nonlinearity=lasagne.nonlinearities.linear,
                            name="QEvaluator")

        #resolver
        self.resolver = EpsilonGreedyResolver(q_eval, name="resolver")

        #all together
        self.agent = Agent([self.observation_layer, self.goal_layer],
                           memory_dict, q_eval,
                           [self.resolver, self.dnn_output])

        self.observation_shape = observation_shape
        self.n_actions = n_actions
        self.n_goals = n_goals
        self.metacontroller_period = metacontroller_period
        self.window_size = window_size
        self.embedding_size = embedding_size

        self.applier_fun = self.agent.get_react_function()

        self.weights = lasagne.layers.get_all_params(self.resolver,
                                                     trainable=True)