def test_readme(self):
        environment = UnittestEnvironment(states=dict(type='float',
                                                      shape=(10, )),
                                          actions=dict(type='int',
                                                       num_values=5))

        def get_current_state():
            return environment.reset()

        def execute_decision(x):
            return environment.execute(actions=x)[2]

        # Instantiate a Tensorforce agent
        agent = PPOAgent(states=dict(type='float', shape=(10, )),
                         actions=dict(type='int', num_values=5),
                         memory=10000,
                         network='auto',
                         update_mode=dict(unit='episodes', batch_size=10),
                         step_optimizer=dict(type='adam', learning_rate=1e-4))

        # Initialize the agent
        agent.initialize()

        # Retrieve the latest (observable) environment state
        state = get_current_state()  # (float array of shape [10])

        # Query the agent for its action decision
        action = agent.act(states=state)  # (scalar between 0 and 4)

        # Execute the decision and retrieve the current performance score
        reward = execute_decision(action)  # (any scalar float)

        # Pass feedback about performance (and termination) to the agent
        agent.observe(reward=reward, terminal=False)

        agent.close()
        environment.close()
        self.assertTrue(expr=True)
        actions={
            "up": dict(type="float", min_value=0.0, max_value=1.0),
            "down": dict(type="float", min_value=0.0, max_value=1.0),
            "left": dict(type="float", min_value=0.0, max_value=1.0),
            "right": dict(type="float", min_value=0.0, max_value=1.0),
        },
        network='auto',
        memory=10000,
    )

else:
    print("Available agents: vpg, ppo, dqn")
    exit()

print("agent ready", agent)
agent.initialize()  # Set up base of agent

try:  # Looks to see if a saved model is available and loads it
    lastEpoch = int(
        os.listdir(tmp + "/saved/player_pun/" + args.agent)[2].split("-")[0])

    agent.restore(directory=tmp + "/saved/player_pun/" + args.agent)
    print("restored")
except Exception as e:  # starts fresh if no saved model is available
    print("DID NOT RESTORE")
    lastEpoch = 0

epochs = 2000000

for epoch in tqdm(range(lastEpoch, epochs + 1)):
    #print(epoch)
            "user": dict(type="int", num_values=G.graph.shape[0]),
            "item": dict(type="int", num_values=G.graph.shape[1])
        },
        network=[
            dict(type='flatten'),
            dict(type="dense", size=32),
        ],
        memory=10000,
    )

print("agent ready", agent)

if args.process == "train":

    new_agent = copy.deepcopy(agent)
    agent.initialize()

    try:
        lastEpoch = int(os.listdir("saved/" + args.agent)[2].split("-")[0])

        agent.restore(directory="saved/" + args.agent + "/" + args.contrarian)
        print("restored")
    except:
        lastEpoch = 0

    epochs = 100000
    cluster_vals = []
    for epoch in tqdm(range(lastEpoch, epochs)):
        G = Audience(20, 15)

        #20 reccomendations for every user
Example #4
0
class SerpentPPO:
    def __init__(self, frame_shape=None, game_inputs=None):

        if frame_shape is None:
            raise SerpentError("A 'frame_shape' tuple kwarg is required...")

        states_spec = {"type": "float", "shape": frame_shape}

        if game_inputs is None:
            raise SerpentError("A 'game_inputs' dict kwarg is required...")

        self.game_inputs = game_inputs
        self.game_inputs_mapping = self._generate_game_inputs_mapping()

        print('game inputs mapping:')
        print(self.game_inputs_mapping)
        actions_spec = {"type": "int", "num_values": len(self.game_inputs)}

        summary_spec = {
            "directory":
            "./board/",
            "steps":
            50,
            "labels": [
                "configuration", "gradients_scalar", "regularization",
                "inputs", "losses", "variables"
            ]
        }

        network_spec = [{
            "type": "conv2d",
            "size": 16,
            "window": 8,
            "stride": 4
        }, {
            "type": "conv2d",
            "size": 32,
            "window": 4,
            "stride": 2
        }, {
            "type": "conv2d",
            "size": 32,
            "window": 3,
            "stride": 1
        }, {
            "type": "flatten"
        }, {
            "type": "dense",
            "size": 64
        }]

        baseline_spec = {
            "type": "cnn",
            "conv_sizes": [32, 32],
            "dense_sizes": [32]
        }

        saver_spec = {
            "directory": os.path.join(os.getcwd(), "datasets",
                                      "t4androidmodel"),
            "seconds": 120
        }
        #         memory_spec = {'type':'latest', 'include_next_states':False, 'capacity':1000*1000}

        self.agent = PPOAgent(
            states=states_spec,
            actions=actions_spec,
            network=network_spec,
            #             baseline_mode='states',
            #             baseline=baseline_spec,
            summarizer=summary_spec,
            memory=10,
            update_mode=dict(unit='timesteps', batch_size=2),
            discount=0.97,
            saver=saver_spec)

        self.agent.initialize()
#
#             batched_observe=2560,
#             scope="ppo",
#             summarizer=summary_spec,
#             network=network_spec,
#             device=None,
#             session_config=None,
#             saver_spec=None,
#             distributed_spec=None,
#             discount=0.97,
#             variable_noise=None,
#             states_preprocessing_spec=None,
#             explorations_spec=None,
#             reward_preprocessing_spec=None,
#             distributions_spec=None,
#             entropy_regularization=0.01,
#             batch_size=2560,
#             keep_last_timestep=True,
#             baseline_mode=None,
#             baseline=None,
#             baseline_optimizer=None,
#             gae_lambda=None,
#             likelihood_ratio_clipping=None,
#             step_optimizer=None,
#             optimization_steps=10
#
#         )

    def generate_action(self, game_frame_buffer):
        states = np.stack(game_frame_buffer, axis=2)

        action = self.agent.act(states)
        label = self.game_inputs_mapping[action]

        return action, label, self.game_inputs[label]

    def observe(self, reward=0, terminal=False):
        self.agent.observe(reward=reward, terminal=terminal)

    def _generate_game_inputs_mapping(self):
        mapping = dict()

        for index, key in enumerate(self.game_inputs):
            mapping[index] = key

        return mapping