network='auto',
        memory=10000,
    )

else:
    print("Available agents: vpg, ppo, dqn")
    exit()

print("agent ready", agent)
agent.initialize()  # Set up base of agent

try:  # Looks to see if a saved model is available and loads it
    lastEpoch = int(
        os.listdir(tmp + "/saved/player_pun/" + args.agent)[2].split("-")[0])

    agent.restore(directory=tmp + "/saved/player_pun/" + args.agent)
    print("restored")
except Exception as e:  # starts fresh if no saved model is available
    print("DID NOT RESTORE")
    lastEpoch = 0

epochs = 2000000

for epoch in tqdm(range(lastEpoch, epochs + 1)):
    #print(epoch)
    #pdb.set_trace()
    map_num = random.randint(1, 20)
    simulation = GameSim(map_num)
    # penalize over-estimation
    #print('THIS IS A RESET OF THE SHIT')
    saved_pos = None
        memory=10000,
    )

else:
    print("Available agents: vpg, ppo, dqn")
    exit()

print("agent ready", agent)
agent.initialize()  # Set up base of agent


try:  # Looks to see if a saved model is available and loads it
    lastEpoch = int(os.listdir(tmp + "/saved/sim_rew_mult/" +
                               args.agent)[2].split("-")[0])

    agent.restore(directory=tmp + "/saved/sim_rew_mult/" + args.agent)
    print("restored")
except Exception as e:  # starts fresh if no saved model is available
    print(e)
    lastEpoch = 0


epochs = 100000
level_index = 1


for epoch in tqdm(range(lastEpoch, epochs)):
    # print(epoch)
    simulation = GameSim(level_index)
    # penalize over-estimation
    saved_pos = [None]*NUMBER_OF_PLAYERS
            dict(type="dense", size=32),
        ],
        memory=10000,
    )

print("agent ready", agent)

if args.process == "train":

    new_agent = copy.deepcopy(agent)
    agent.initialize()

    try:
        lastEpoch = int(os.listdir("saved/" + args.agent)[2].split("-")[0])

        agent.restore(directory="saved/" + args.agent + "/" + args.contrarian)
        print("restored")
    except:
        lastEpoch = 0

    epochs = 100000
    cluster_vals = []
    for epoch in tqdm(range(lastEpoch, epochs)):
        G = Audience(20, 15)

        #20 reccomendations for every user
        training_size = G.graph.shape[0] * 20
        changes = []
        for step in range(training_size):
            action = agent.act(G.graph)
class TensorForceAgent(BaseAgent):
    """The TensorForceAgent. Acts through the algorith, not here."""

    def __init__(self, character=characters.Bomber, algorithm='ppo'):
        super(TensorForceAgent, self).__init__(character)
        self.algorithm = algorithm
        self.tf_agent = None
        self.agent_id = 0
        self.env = None
        
    def act(self, obs, action_space):
        ppo_state = self.envWrapper(obs)
        return self.tf_agent.act(ppo_state)

    def initialize(self, env, parallel_interactions=1, summarizer=None, saver=None):
        from gym import spaces
        from tensorforce.agents import PPOAgent

        self.env = env

        if self.algorithm == "ppo":
            if type(env.action_space) == spaces.Tuple:
                actions = {
                    str(num): {
                        'type': int,
                        'num_values': space.n
                    }
                    for num, space in enumerate(env.action_space.spaces)
                }
            else:
                actions = dict(type='int', num_values=env.action_space.n)

            self.tf_agent = PPOAgent(
                states=dict(type='float', shape=env.observation_space.shape),
                actions=actions,
                max_episode_timesteps=2000,
                network=[
                    dict(type='dense', size=64),
                    dict(type='dense', size=64)
                ],
                # critic_network=[
                #     dict(type='dense', size=64),
                #     dict(type='dense', size=64)
                # ],
                parallel_interactions=parallel_interactions,
                summarizer=summarizer,
                saver=saver,
                #execution={'num_parallel':64, 'type': 'single', 'session_config':None, 'distributed_spec':None},
                batch_size=10)
                # batching_capacity=1000,
                # step_optimizer=dict(type='adam', learning_rate=1e-4))

            return self.tf_agent
        return None

    def set_agent_id(self, agent_id):
        self.agent_id = agent_id

    def restore_model(self, directory=None,filename=None):
        self.tf_agent.restore(directory=directory,filename=filename)

    def envWrapper(self, state):
        return self.env.featurize(state)