network='auto', memory=10000, ) else: print("Available agents: vpg, ppo, dqn") exit() print("agent ready", agent) agent.initialize() # Set up base of agent try: # Looks to see if a saved model is available and loads it lastEpoch = int( os.listdir(tmp + "/saved/player_pun/" + args.agent)[2].split("-")[0]) agent.restore(directory=tmp + "/saved/player_pun/" + args.agent) print("restored") except Exception as e: # starts fresh if no saved model is available print("DID NOT RESTORE") lastEpoch = 0 epochs = 2000000 for epoch in tqdm(range(lastEpoch, epochs + 1)): #print(epoch) #pdb.set_trace() map_num = random.randint(1, 20) simulation = GameSim(map_num) # penalize over-estimation #print('THIS IS A RESET OF THE SHIT') saved_pos = None
memory=10000, ) else: print("Available agents: vpg, ppo, dqn") exit() print("agent ready", agent) agent.initialize() # Set up base of agent try: # Looks to see if a saved model is available and loads it lastEpoch = int(os.listdir(tmp + "/saved/sim_rew_mult/" + args.agent)[2].split("-")[0]) agent.restore(directory=tmp + "/saved/sim_rew_mult/" + args.agent) print("restored") except Exception as e: # starts fresh if no saved model is available print(e) lastEpoch = 0 epochs = 100000 level_index = 1 for epoch in tqdm(range(lastEpoch, epochs)): # print(epoch) simulation = GameSim(level_index) # penalize over-estimation saved_pos = [None]*NUMBER_OF_PLAYERS
dict(type="dense", size=32), ], memory=10000, ) print("agent ready", agent) if args.process == "train": new_agent = copy.deepcopy(agent) agent.initialize() try: lastEpoch = int(os.listdir("saved/" + args.agent)[2].split("-")[0]) agent.restore(directory="saved/" + args.agent + "/" + args.contrarian) print("restored") except: lastEpoch = 0 epochs = 100000 cluster_vals = [] for epoch in tqdm(range(lastEpoch, epochs)): G = Audience(20, 15) #20 reccomendations for every user training_size = G.graph.shape[0] * 20 changes = [] for step in range(training_size): action = agent.act(G.graph)
class TensorForceAgent(BaseAgent): """The TensorForceAgent. Acts through the algorith, not here.""" def __init__(self, character=characters.Bomber, algorithm='ppo'): super(TensorForceAgent, self).__init__(character) self.algorithm = algorithm self.tf_agent = None self.agent_id = 0 self.env = None def act(self, obs, action_space): ppo_state = self.envWrapper(obs) return self.tf_agent.act(ppo_state) def initialize(self, env, parallel_interactions=1, summarizer=None, saver=None): from gym import spaces from tensorforce.agents import PPOAgent self.env = env if self.algorithm == "ppo": if type(env.action_space) == spaces.Tuple: actions = { str(num): { 'type': int, 'num_values': space.n } for num, space in enumerate(env.action_space.spaces) } else: actions = dict(type='int', num_values=env.action_space.n) self.tf_agent = PPOAgent( states=dict(type='float', shape=env.observation_space.shape), actions=actions, max_episode_timesteps=2000, network=[ dict(type='dense', size=64), dict(type='dense', size=64) ], # critic_network=[ # dict(type='dense', size=64), # dict(type='dense', size=64) # ], parallel_interactions=parallel_interactions, summarizer=summarizer, saver=saver, #execution={'num_parallel':64, 'type': 'single', 'session_config':None, 'distributed_spec':None}, batch_size=10) # batching_capacity=1000, # step_optimizer=dict(type='adam', learning_rate=1e-4)) return self.tf_agent return None def set_agent_id(self, agent_id): self.agent_id = agent_id def restore_model(self, directory=None,filename=None): self.tf_agent.restore(directory=directory,filename=filename) def envWrapper(self, state): return self.env.featurize(state)