Python PPOAgent.initialize Examples

Programming Language: Python

Namespace/Package Name: tensorforce.agents

Class/Type: PPOAgent

Method/Function: initialize

Examples at hotexamples.com: 4

Python PPOAgent.initialize - 4 examples found. These are the top rated real world Python examples of tensorforce.agents.PPOAgent.initialize extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

PPOAgent(30)

act(26)

restore_model(25)

observe(21)

initialize(4)

restore(4)

close(2)

__init__(1)

initialize_model(1)

reset(1)

Example #1

Show file

File: test_readme.py Project: srinivasiyengar/drl_shape_optimization-1

    def test_readme(self):
        environment = UnittestEnvironment(states=dict(type='float',
                                                      shape=(10, )),
                                          actions=dict(type='int',
                                                       num_values=5))

        def get_current_state():
            return environment.reset()

        def execute_decision(x):
            return environment.execute(actions=x)[2]

        # Instantiate a Tensorforce agent
        agent = PPOAgent(states=dict(type='float', shape=(10, )),
                         actions=dict(type='int', num_values=5),
                         memory=10000,
                         network='auto',
                         update_mode=dict(unit='episodes', batch_size=10),
                         step_optimizer=dict(type='adam', learning_rate=1e-4))

        # Initialize the agent
        agent.initialize()

        # Retrieve the latest (observable) environment state
        state = get_current_state()  # (float array of shape [10])

        # Query the agent for its action decision
        action = agent.act(states=state)  # (scalar between 0 and 4)

        # Execute the decision and retrieve the current performance score
        reward = execute_decision(action)  # (any scalar float)

        # Pass feedback about performance (and termination) to the agent
        agent.observe(reward=reward, terminal=False)

        agent.close()
        environment.close()
        self.assertTrue(expr=True)

Example #2

Show file

File: player_pun.py Project: ClemsonTRACE/Team-Cog-Agents-Semester2

        actions={
            "up": dict(type="float", min_value=0.0, max_value=1.0),
            "down": dict(type="float", min_value=0.0, max_value=1.0),
            "left": dict(type="float", min_value=0.0, max_value=1.0),
            "right": dict(type="float", min_value=0.0, max_value=1.0),
        },
        network='auto',
        memory=10000,
    )

else:
    print("Available agents: vpg, ppo, dqn")
    exit()

print("agent ready", agent)
agent.initialize()  # Set up base of agent

try:  # Looks to see if a saved model is available and loads it
    lastEpoch = int(
        os.listdir(tmp + "/saved/player_pun/" + args.agent)[2].split("-")[0])

    agent.restore(directory=tmp + "/saved/player_pun/" + args.agent)
    print("restored")
except Exception as e:  # starts fresh if no saved model is available
    print("DID NOT RESTORE")
    lastEpoch = 0

epochs = 2000000

for epoch in tqdm(range(lastEpoch, epochs + 1)):
    #print(epoch)

Example #3

Show file

File: recommender.py Project: lbarberiscanoni/bubble-poppers

            "user": dict(type="int", num_values=G.graph.shape[0]),
            "item": dict(type="int", num_values=G.graph.shape[1])
        },
        network=[
            dict(type='flatten'),
            dict(type="dense", size=32),
        ],
        memory=10000,
    )

print("agent ready", agent)

if args.process == "train":

    new_agent = copy.deepcopy(agent)
    agent.initialize()

    try:
        lastEpoch = int(os.listdir("saved/" + args.agent)[2].split("-")[0])

        agent.restore(directory="saved/" + args.agent + "/" + args.contrarian)
        print("restored")
    except:
        lastEpoch = 0

    epochs = 100000
    cluster_vals = []
    for epoch in tqdm(range(lastEpoch, epochs)):
        G = Audience(20, 15)

        #20 reccomendations for every user

Example #4

Show file

class SerpentPPO:
    def __init__(self, frame_shape=None, game_inputs=None):

        if frame_shape is None:
            raise SerpentError("A 'frame_shape' tuple kwarg is required...")

        states_spec = {"type": "float", "shape": frame_shape}

        if game_inputs is None:
            raise SerpentError("A 'game_inputs' dict kwarg is required...")

        self.game_inputs = game_inputs
        self.game_inputs_mapping = self._generate_game_inputs_mapping()

        print('game inputs mapping:')
        print(self.game_inputs_mapping)
        actions_spec = {"type": "int", "num_values": len(self.game_inputs)}

        summary_spec = {
            "directory":
            "./board/",
            "steps":
            50,
            "labels": [
                "configuration", "gradients_scalar", "regularization",
                "inputs", "losses", "variables"
            ]
        }

        network_spec = [{
            "type": "conv2d",
            "size": 16,
            "window": 8,
            "stride": 4
        }, {
            "type": "conv2d",
            "size": 32,
            "window": 4,
            "stride": 2
        }, {
            "type": "conv2d",
            "size": 32,
            "window": 3,
            "stride": 1
        }, {
            "type": "flatten"
        }, {
            "type": "dense",
            "size": 64
        }]

        baseline_spec = {
            "type": "cnn",
            "conv_sizes": [32, 32],
            "dense_sizes": [32]
        }

        saver_spec = {
            "directory": os.path.join(os.getcwd(), "datasets",
                                      "t4androidmodel"),
            "seconds": 120
        }
        #         memory_spec = {'type':'latest', 'include_next_states':False, 'capacity':1000*1000}

        self.agent = PPOAgent(
            states=states_spec,
            actions=actions_spec,
            network=network_spec,
            #             baseline_mode='states',
            #             baseline=baseline_spec,
            summarizer=summary_spec,
            memory=10,
            update_mode=dict(unit='timesteps', batch_size=2),
            discount=0.97,
            saver=saver_spec)

        self.agent.initialize()
#
#             batched_observe=2560,
#             scope="ppo",
#             summarizer=summary_spec,
#             network=network_spec,
#             device=None,
#             session_config=None,
#             saver_spec=None,
#             distributed_spec=None,
#             discount=0.97,
#             variable_noise=None,
#             states_preprocessing_spec=None,
#             explorations_spec=None,
#             reward_preprocessing_spec=None,
#             distributions_spec=None,
#             entropy_regularization=0.01,
#             batch_size=2560,
#             keep_last_timestep=True,
#             baseline_mode=None,
#             baseline=None,
#             baseline_optimizer=None,
#             gae_lambda=None,
#             likelihood_ratio_clipping=None,
#             step_optimizer=None,
#             optimization_steps=10
#
#         )

    def generate_action(self, game_frame_buffer):
        states = np.stack(game_frame_buffer, axis=2)

        action = self.agent.act(states)
        label = self.game_inputs_mapping[action]

        return action, label, self.game_inputs[label]

    def observe(self, reward=0, terminal=False):
        self.agent.observe(reward=reward, terminal=terminal)

    def _generate_game_inputs_mapping(self):
        mapping = dict()

        for index, key in enumerate(self.game_inputs):
            mapping[index] = key

        return mapping