예제 #1
0
 def step(self, actionWanted):
   current_lowest_dist = 190
   action = 0
   reward_modifier = 0
   for i in range(0, 190):
     action_possible = False
     if len(self.prev_obs) == 0:
       action_possible = True
     else:
       newIndex = MAX_FOR_BLOCKS + i
       action_possible = self.prev_obs[int(newIndex / 10)][newIndex % 10] == 1
     action_probability = abs(actionWanted - i)
     if action_probability < current_lowest_dist and action_possible:
       current_lowest_dist = action_probability
       action = i
   if action == actionWanted:
     reward_modifier = 0.5 if action == actionWanted else -0.5
   if self.invalid_tries > self.invalid_try_limit:
     self.amount_limitsurpass += 1
     if self.max_invalid_tries != -1 and self.amount_limitsurpass >= self.max_invalid_tries:
       print("ABORTING LEARNING DUE TO TOO MANY WRONG TRIES", self.amount_limitsurpass)
       sys.exit(-1)
     print("ANOTHER 1k wrong tries", self.force_progression)
     self.invalid_tries = 0
     self.reward_finding_right = True
     if self.force_progression:
       print("Forcing game to progress")
       rustLib.field_counter_action(self.field, 1)
       is_over = rustLib.field_is_game_over(self.field) == 1
       if not is_over:
         rustLib.field_counter_action(self.field, 0)
       return field_to_array(self.field), -2.0, rustLib.field_is_game_over(self.field) == 1, {}
   answer = rustLib.field_do_action_with_answer(self.field, action, 2)
   placed = answer.placed
   reward = answer.reward
   done = answer.done == 0
   if placed == 0:
     self.invalid_tries = 0
     self.amount_limitsurpass = 0
     if self.reward_finding_right:
       self.reward_finding_right = False
       reward += 1
       print("Gave an extra bonus for finding the right combo after a lot of invalid tries", reward)
   else:
     self.invalid_tries += 1
     reward = -1
   if done:
     winner = answer.winner
     reward += 10.0 if winner == 0 else -10.0
     nprew = np.array(self.rewards)
     print("game is over average reward is", np.average(nprew), " median is", np.median(nprew), " high and low are ", np.min(nprew), np.max(nprew))
   new_observation = field_to_array(self.field)
   self.prev_obs = new_observation
   self.rewards.append(reward)
   return new_observation, reward, done, {}
예제 #2
0
 def reset(self):
   rustLib.field_reset(self.field)
   self.invalid_tries = 0
   self.reward_finding_right = False
   self.rewards = []
   self.amount_limitsurpass = 0
   return field_to_array(self.field)
예제 #3
0
 def render(self, mode='human'):
   outfile = StringIO() if mode == 'ansi' else sys.stdout
   outfile.write(field_to_array(self.field))
   # No need to return anything for human
   if mode != 'human':
     with closing(outfile):
       return outfile.getvalue()
예제 #4
0
 def get_obs(self):
     if self.use_cnn:
         return field_to_cnn_array(
             self.field, 1 if self.self_play_is_second_player else 0)
     else:
         return field_to_array(self.field,
                               1 if self.self_play_is_second_player else 0)
예제 #5
0
dirname = "D:\\4-System\\rusty\\"
filename = "50000_heutistic_pretrain_"
filename += "box" if is_box_space else "discrete"
np.seterr(all='raise')

origEnv = gym.make("rustybox-v0" if is_box_space else "rustydiscrete-v0")

origEnv.max_invalid_tries = 7
env = VecCheckNan(DummyVecEnv([lambda: origEnv]))

# Instantiate the agent
model = PPO2.load("models/ppo2boxbestparam/2e4-30.pkl", env=env)
# model.load("models/pretrain/"+filename)

rustLib.field_restore_log(origEnv.field, oldLog.encode('utf-8'))
obs = field_to_array(origEnv.field)
actions, _states = model.predict(obs)
if is_box_space:
    action = 0
    current_max = -1
    for i in range(0, len(actions)):
        action_probability = actions[i]
        newIndex = MAX_FOR_BLOCKS + i
        action_possible = obs[int(newIndex / 10)][newIndex % 10] == 1
        if action_probability > current_max and action_possible:
            current_max = action_probability
            action = i
        rustLib.field_do_action(origEnv.field, action)
else:
    rustLib.field_do_action(origEnv.field, actions)
print("1;" + field_to_log(origEnv.field))
예제 #6
0
def generate_pretraindata_heuristics(save_interval, amount_of_games,
                                     is_box_space):
    env = gym.make("rustydiscrete-v0")
    actions = []
    boxActions = []
    observations = []
    rewards = []
    episode_returns = []
    episode_starts = []
    lastFile = ""
    startAt = 0
    directory = "D:\\4-System\\rusty\\"
    for f in os.listdir(directory):
        if f.endswith("npz"):
            lastFile = f
    # if len(lastFile) > 0:
    #   print("LOADING FILE", directory+lastFile, os.listdir(directory))
    #   loadedData = np.load(directory+lastFile)
    #   actions = loadedData["actions"].tolist()
    #   observations = loadedData["obs"].tolist()
    #   rewards = loadedData["rewards"].tolist()
    #   episode_returns = loadedData["episode_returns"].tolist()
    #   episode_starts = loadedData["episode_starts"].tolist()
    #   startAt = int(lastFile.split("_")[0]) + 1
    #   print("LOADED DATA IS", lastFile, len(episode_returns), startAt)
    #   loadedData = None
    gameField = rustLib.field_new()
    print("Beginning Games")
    for i in range(startAt, amount_of_games):
        game_over = False
        reward_sum = 0
        while not game_over:
            action = rustLib.field_counter_action_index(gameField)
            answer = rustLib.field_do_action_with_answer(gameField, action, 1)
            if answer.placed != 0:
                print("Error a block couldn't be placed", action, answer)
                sys.exit(-1)
            actionArr = np.zeros(190, dtype=float)
            actionArr[action] = 1
            boxActions.append(actionArr)
            actions.append(action)
            observations.append(field_to_array(gameField))
            game_over = answer.done == 0
            reward = answer.reward
            episode_starts.append(game_over)
            if game_over:
                winner = answer.winner
                reward += 3.0 if winner == 0 else -3.0
            rewards.append(reward)
            reward_sum += reward
            if game_over:
                episode_returns.append(reward_sum)
                print("DONE WITH GAME NUMBER", i, reward_sum)
        rustLib.field_reset(gameField)
        if i > 0 and i % save_interval == 0:
            numpy_dict = {
                'actions': np.array(actions).reshape((-1, 1)),
                'obs': np.array(observations),
                'rewards': np.array(rewards),
                'episode_returns': np.array(episode_returns),
                'episode_starts': np.array(episode_starts[:-1])
            }

            np.savez(directory + str(i) + "_heutistic_pretrain_discrete.npz",
                     **numpy_dict)

            numpy_dict = {
                'actions':
                np.concatenate(boxActions).reshape((-1, ) +
                                                   boxEnv.action_space.shape),
                'obs':
                np.array(observations),
                'rewards':
                np.array(rewards),
                'episode_returns':
                np.array(episode_returns),
                'episode_starts':
                np.array(episode_starts[:-1])
            }

            np.savez(directory + str(i) + "_heutistic_pretrain_box.npz",
                     **numpy_dict)
            numpy_dict = None

    rustLib.field_free(gameField)

    numpy_dict = {
        'actions':
        np.concatenate(boxActions).reshape((-1, ) + boxEnv.action_space.shape)
        if is_box_space else np.array(actions).reshape((-1, 1)),
        'obs':
        np.array(observations),
        'rewards':
        np.array(rewards),
        'episode_returns':
        np.array(episode_returns),
        'episode_starts':
        np.array(episode_starts[:-1])
    }
    return numpy_dict