from gym.envs.external_games.supertuxkart import SuperTuxKart import sys import random SCREENSHOTS = (len(sys.argv) > 1 and sys.argv[1] == '--screenshots') FRAMEBUFFER_SIZE = 10 env = SuperTuxKart(spf=0.000887, framebuffer_size=FRAMEBUFFER_SIZE, width=800, height=600, level='scotland', mode='time', speedup=1, observe_performance=False) ACTIONS_WORDS = [ 'none', 'throttle', 'brake', 'use_powerup', 'drift', 'nitro', 'left', 'right' ] ACTIONS_NUMBERS = list(range(len(ACTIONS_WORDS))) screenshot_id = 0 def make_action(action): global screenshot_id print(action) ob, reward, done, scores = env.step(action) print(scores) print(env.fps)
import time from gym import logger from gym.envs.external_games.supertuxkart import SuperTuxKart FRAMEBUFFER_SIZE = 4 env = SuperTuxKart(spf=0.000887, framebuffer_size=FRAMEBUFFER_SIZE, width=800, height=600, level='scotland', mode='time') logger.set_level(logger.DEBUG) last_gt = None last_st = None delta_gts = [] delta_sts = [] step_times = [] tot_game_fps = [] tot_gym_fps = [] for i in range(100): before = time.time() ob, reward, done, scores = env.step('throttle') after = time.time() gt = scores['game_time'] st = scores['system_time'] if last_st is not None and last_gt is not None: delta_gt = gt - last_gt
next_state_values = next_state_values.detach() # return the tensor without connection to its calculation history expected_state_action_values = next_state_values * GAMMA + rewards_v return nn.MSELoss()(state_action_values, expected_state_action_values) # ********************************************************************************************************************** # * TRAINING STARTS * # ********************************************************************************************************************** if __name__ == "__main__": # set the device -> GPU or CPU device = "cuda" # create the wrapped environment env = SuperTuxKart(spf=0.000887, framebuffer_size=FRAME_BUFFER_SIZE, width=SCREEN_WIDTH, height=SCREEN_HEIGHT, level=GAME_LEVEL, mode=GAME_MODE, speedup=GAME_SPEED, observe_performance=OBS_PERFORMANCE, performance_window_size=WINDOW_ACTIONS, performance_window_overlap=OVERLAP_ACTIONS, laps=LAPS) # ********************************************************************************************************************** # * DEFINE THRESHOLD * # ********************************************************************************************************************** if NUM_PREGAMES > 0: print('******************************* STARTING PRE GAMES TO SET THE THRESHOLD *******************************') # create the net and the target net net = dqn_model.DQN((FRAME_BUFFER_SIZE, SCREEN_WIDTH, SCREEN_HEIGHT), len(POSSIBLE_ACTIONS)).to(device) tgt_net = dqn_model.DQN((FRAME_BUFFER_SIZE, SCREEN_WIDTH, SCREEN_HEIGHT), len(POSSIBLE_ACTIONS)).to(device) net.load_state_dict(torch.load(DEFAULT_ENV_NAME + "-best_RL-baseline.dat")) tgt_net.load_state_dict(torch.load(DEFAULT_ENV_NAME + "-best_RL-baseline.dat")) print(net)
return ['throttle', 'left'] elif centering < -self.CENTERING_TOLERANCE: return ['throttle', 'right'] else: return 'throttle' if __name__ == '__main__': # You can set the level to logger.DEBUG or logger.WARN if you # want to change the amount of output. logger.set_level(logger.DEBUG) env = SuperTuxKart(spf=0.000887, framebuffer_size=4, width=800, height=600, level='scotland', mode='time', speedup=1, observe_performance=False) # You provide the directory to write to (can be an existing # directory, including one with existing data -- all monitor files # will be namespaced). You can also dump to a tempdir if you'd # like: tempfile.mkdtemp(). outdir = '/tmp/random-agent-results' agent = SuperTuxKartAgent(env.action_space) episode_count = 5 reward = 0 scores = None
[ACTIONS[5]], # RIGHT [ACTIONS[1], ACTIONS[4]], # UP + LEFT [ACTIONS[1], ACTIONS[5]], # UP + RIGHT [ACTIONS[1], ACTIONS[2]], # UP + POWER [ACTIONS[1], ACTIONS[3]] # UP + NITRO ] # set the device -> GPU or CPU device = "cuda" # create the wrapped environment env = SuperTuxKart(spf=0.000887, framebuffer_size=FRAME_BUFFER_SIZE, width=SCREEN_WIDTH, height=SCREEN_HEIGHT, level=GAME_LEVEL, mode=GAME_MODE, speedup=GAME_SPEED, observe_performance=True, performance_window_size=2, performance_window_overlap=1, laps=1) all_games_positions = [] all_consumptions = [] for game in range(1000): env.reset() ob, r, is_done, info = env.step(ACTIONS[0]) while info['game_time'] < 0: ob, r, is_done, info = env.step(ACTIONS[0]) starting_time = info['game_time']