def main(): sess = tf.Session() K.set_session(sess) #env = gym.make("Pendulum-v0") env = gym_gvgai.make("gvgai-" + game + "-lvl1-v0") actor_critic = ActorCritic(env, sess) num_trials = 10000 trial_len = 500 cur_state = env.reset() cur_state = numpy.array(cur_state) cur_state = cur_state.flatten() action = env.action_space.sample() while True: env.render() # cur_state = cur_state.reshape((1, env.observation_space.shape[0])) #cur_state = action = actor_critic.act(cur_state) action = numpy.array(action) # action = action.reshape((1, env.action_space.shape[0])) new_state, reward, done, _ = env.step(action) new_state = numpy.array(new_state) new_state = new_state.flatten() #new_state = new_state.reshape((1, env.observation_space.shape[0])) actor_critic.remember(cur_state, action, reward, new_state, done) actor_critic.train() cur_state = new_state
def make_env(env_name): """Helper function to create dm_control environment""" env = gym_gvgai.make(env_name) gym_gvgai.envs.gvgai_env.gvgai.LEARNING_SSO_TYPE.IMAGE = ( gym_gvgai.envs.gvgai_env.gvgai.LEARNING_SSO_TYPE.BOTH ) return env
def __init__(self, game, lvl): self.env = gvg.make('gvgai-' + game + '-' + lvl + '-v0') self.stateObs = self.env.reset() size = (len(self.stateObs), len(self.stateObs[0])) self.transpose = size[0] < size[1] if self.transpose: self.size = (size[1] * 2, size[0] * 2) else: self.size = (size[0] * 2, size[1] * 2) self.done = False self.score = 0 self.frame = 0 self.nAction = self.env.action_space.n
def __init__(self, game, play_length, shape, path=None, id=0, reward_mode='time', reward_scale=2, elite_prob=0): """Returns Grid instead of pixels Sets the reward Generates new level on reset #PPO wants to maximize, Generator wants a score of 0 -------- """ self.id = id self.name = game self.levels = path self.level_id = -1 self.version = 1 self.env = gym_gvgai.make('gvgai-{}-lvl0-v{}'.format( game, self.version)) gym.Wrapper.__init__(self, self.env) self.compiles = False self.state = None self.steps = 0 self.score = 0 self.play_length = play_length self.shape = shape self.observation_space = gym.spaces.Box(low=0, high=1, shape=shape, dtype=np.float32) self.elitep = elite_prob self.rmode = reward_mode self.rscale = reward_scale
import gym import gym_gvgai import Agent as Agent import json import datetime #just as testRLAgent.py do to load the game and reset the environment env = gym_gvgai.make('gvgai-aai-lvl0-v0') #build a trainAgent object train_agent = Agent.trainAgent() print('Starting ' + env.env.game + " with Level " + str(env.env.lvl)) # reset environment stateObs = env.reset() #get the actions actions = env.env.GVGAI.actions() #inital the Qtable for the first time and inital "lastState" train_agent.inital(stateObs) #read the model from a json file with open("model.json", 'r', encoding='utf-8') as json_file: train_agent.Qtable = json.load(json_file) print("start training...") #This is just for recording time start = datetime.datetime.now() for i in range(10):
import gym_gvgai import Agent as Agent games = ['gvgai-testgame1', 'gvgai-testgame2', 'gvgai-testgame3'] validateLevels = ['lvl1-v0', 'lvl2-v0', 'lvl3-v0'] totalTimes = 20 # variables for recording the results results = {} for game in games: levelRecord = {} for level in validateLevels: timeRecord = {} for t in range(totalTimes): env = gym_gvgai.make(game + '-' + level) agent = Agent.Agent() print('Starting ' + env.env.game + " with Level " + str(env.env.lvl)) stateObs = env.reset() actions = env.unwrapped.get_action_meanings() totalScore = 0 for tick in range(2000): action_id = agent.act(stateObs, actions) stateObs, diffScore, done, debug = env.step(action_id) totalScore += diffScore print("Action " + str(action_id) + " tick " + str(tick+1) + " reward " + str(diffScore) + " win " + debug["winner"]) if done: break timeRecord[t] = [tick, totalScore, debug["winner"]] levelRecord[level] = timeRecord results[game] = levelRecord
return action_id usr = sys.argv[1] new_team = team(user, [], []) testing_times = 20 # Predefined names referring to framework games = ['golddigger', 'treasurekeeper', 'waterpuzzle'] test_levels = ['lvl2', 'lvl3', 'lvl4'] try: import Agent as Agent for game_name in games: for level in test_levels: env = gym_gvgai.make('gvgai-' + game_name + '-' + level + '-v0') agent = Agent.Agent() # print('Starting ' + env.env.game + " with Level " + str(env.env.lvl)) total_score = [] # record every testing score win_num = 0 # reset environment actions = env.env.GVGAI.actions() state_obs = None INFO = game_name + level + "\n" print(game_name, level) for i in range(testing_times): # testing 20 times current_score = 0 # record current testing round score state_obs = env.reset() for t in range(2000): action_id = agent_action(state_obs, actions=actions) state_obs, incre_score, done, debug = env.step(action_id)
#!/usr/bin/env python import gym import gym_gvgai import Agent as Agent import json env = gym_gvgai.make('gvgai-aai-lvl0-v0') #load the game agent = Agent.Agent() #build the agent object print('Starting ' + env.env.game + " with Level " + str(env.env.lvl)) # reset environment stateObs = env.reset() actions = env.env.GVGAI.actions() #get the actions #############this is new to load the model######### with open("model.json", 'r', encoding='utf-8') as json_file: # load the model agent.Qtable = json.load(json_file) #############this is new to load the model######### #############this is new to compute the total score######### Score = 0 #############this is new to compute the total score######### for t in range(1000): # choose action based on trained policy action_id = agent.act(stateObs, actions) # do action and get new state and its reward stateObs, increScore, done, debug = env.step(action_id)
# New best model, you could save the agent here if mean_reward > best_mean_reward: best_mean_reward = mean_reward # Example for saving best model print("Saving new best model") _locals['self'].save(log_dir + 'best_model.pkl') n_steps += 1 return True # Create log dir log_dir = args.log_path os.makedirs(log_dir, exist_ok=True) env = gym_gvgai.make(args.env) env = WarpFrame(env) env = Monitor(env, log_dir, allow_early_resets=True) if args.save_video_interval != 0: env = gym.wrappers.Monitor( env, os.path.join(log_dir, "videos"), video_callable=(lambda ep: ep % args.save_video_interval == 0), force=True) model = DQN(CnnPolicy, env, verbose=1, exploration_fraction=args.exploration_fraction, exploration_final_eps=args.exploration_final_eps,
def gvgai_test_old(): import gym_gvgai return gym_gvgai.make("gvgai-sokoban-lvl0-v0")
def restart(self, e, path): #self.log(e) open(path + ".no_compile", 'w').close() self.env = gym_gvgai.make('gvgai-{}-lvl0-v{}'.format( self.name, self.version))