class Training(): def __init__(self, rng, id_num, arr, num_moves, args): self.args = args self.rng = rng self.num_moves = num_moves self.id_num = id_num self.env = ALE_env(args, rng=rng) self.agent = AOCAgent_THEANO(self.env.action_space, id_num, arr, num_moves, args) self.train() def train(self): total_reward = 0 x = self.env.reset() self.agent.reset(x) timer = time.time() recent_fps = [] frame_counter = 0 total_games = 0 done = False while self.num_moves.value < self.args.max_num_frames: if done: #ugly code, beautiful print total_games += 1 secs = round(time.time()-timer, 1) frames = self.env.get_frame_count() fps = int(frames/secs) recent_fps = recent_fps[-9:]+[fps] eta = ((self.args.max_num_frames-self.num_moves.value)*self.args.frame_skip/(self.args.num_threads*np.mean(recent_fps))) print "id: %d\treward: %d\ttime: %.1f\tframes: %d\t %dfps \tmoves: %d \t ETA: %dh %dm %ds \t%.2f%%" % \ (self.id_num, total_reward, secs, frames, fps, self.num_moves.value, int(eta/3600), int(eta/60)%60, int(eta%60), float(self.num_moves.value)/self.args.max_num_frames*100) timer = time.time() frame_counter = 0 if total_games % 1 == 0 and self.id_num == 1 and not self.args.testing: self.agent.save_values(folder_name) print "saved model" total_reward = 0 x = self.env.reset() self.agent.reset(x) done = False action = self.agent.get_action(x) new_x, reward, done, death = self.env.act(action) self.agent.store(x, new_x, action, reward, done, death) if self.args.testing: self.env.render() total_reward += reward x = np.copy(new_x)
class Training(): def __init__(self, rng, id_num, arr, num_moves, args): self.args = args self.rng = rng self.num_moves = num_moves self.id_num = id_num self.env = ALE_env(args, rng=rng) self.agent = AOCAgent_THEANO(self.env.action_space, id_num, arr, num_moves, args) self.train() def train(self): total_reward = 0 x = self.env.reset() #returns the current x self.agent.reset(x) timer = time.time() recent_fps = [] total_games = 0 frame_counter = 0 done = False totalreward_k_games = 0 reward_k_games = [] while self.num_moves.value < self.args.max_num_frames: if done: total_games += 1 secs = round(time.time() - timer, 1) frames = self.env.get_frame_count() fps = int(frames / secs) recent_fps = recent_fps[-9:] + [fps] eta = ((self.args.max_num_frames - self.num_moves.value) * self.args.frame_skip / (self.args.num_threads * np.mean(recent_fps))) print "id: %d\treward: %d\ttime: %.1f\tframes: %d\t %dfps \tmoves: %d \t ETA: %dh %dm %ds \t%.2f%%" % \ (self.id_num, total_reward, secs, frames, fps, self.num_moves.value, int(eta/3600), int(eta/60)%60, int(eta%60), float(self.num_moves.value)/self.args.max_num_frames*100) if self.args.testing: if params.load_folder != "": new_folder_name = params.load_folder else: print("No load folders for testing !!!") exit(0) if total_games <= self.args.kgames: reward_k_games = np.append(reward_k_games, total_reward) totalreward_k_games += total_reward if total_games > self.args.kgames: rewards_saving = np.asarray(reward_k_games) np.save(new_folder_name + "/test_result.npy", rewards_saving) # saved rewards for k games avgreward_k_games = totalreward_k_games / self.args.kgames mean_reward = np.mean(reward_k_games) std_reward = np.std(reward_k_games) final_reward_stats = [mean_reward, std_reward] np.savetxt(new_folder_name + "/test_result_stats.txt", np.asarray(final_reward_stats)) print "----------------------------------------------" print "average reward for k games: ", avgreward_k_games print "Numpy average reward for k games: ", ( np.mean(reward_k_games)) print "std deviation for k games: ", ( np.std(reward_k_games)) exit(0) timer = time.time() frame_counter = 0 if total_games % 1 == 0 and self.id_num == 1 and not self.args.testing: self.agent.save_values(folder_name) print "saved model" total_reward = 0 x = self.env.reset() self.agent.reset(x) done = False action = self.agent.get_action(x) new_x, reward, done, death = self.env.act(action) self.agent.store(x, new_x, action, reward, done, death) if self.args.testing: self.env.render() total_reward += reward x = np.copy(new_x)