def predict(self,state): ghost1_state_number=utils.state_2_number(state[2:4]) ghost2_state_number=utils.state_2_number(state[4:6]) ghost1_next=[] ghost1_next_probs=[] ghost2_next=[] ghost2_next_probs=[] for index,x in enumerate(self.model[ghost1_state_number]): if x>0: ghost1_next.append(index) ghost1_next_probs.append(x) for index,x in enumerate(self.model[ghost2_state_number]): if x>0: ghost2_next.append(index) ghost2_next_probs.append(x) li=[] li_probs=[] for index1,g1 in enumerate(ghost1_next): for index2,g2 in enumerate(ghost2_next): g1_state=utils.number_2_state(g1) g2_state=utils.number_2_state(g2) li.append(g1_state+g2_state) li_probs.append(ghost1_next_probs[index1]*ghost2_next_probs[index2]) return li,li_probs
def run(run_ID): states,next_states=load_synthetic_data(5*49) ghost_tabular_array=numpy.zeros(49*49).reshape((49,49)) for x,y in zip(states,next_states): numberS=utils.state_2_number(x) numberSp=utils.state_2_number(y) ghost_tabular_array[numberS,numberSp]=ghost_tabular_array[numberS,numberSp]+1 for index in range(ghost_tabular_array.shape[0]): ghost_tabular_array[index,:]=ghost_tabular_array[index,:]/(numpy.sum(ghost_tabular_array[index,:])) numpy.savetxt("learn_tabular_models/tabular"+str(run_ID)+".h5",ghost_tabular_array)
for y in range(7): s = [x, y] for _ in range(each): while True: case = numpy.random.randint(0, 4) if case == 0: s_p = [x + 1, y] elif case == 1: s_p = [x, y + 1] elif case == 2: s_p = [x - 1, y] elif case == 3: s_p = [x, y - 1] if numpy.min(s_p) >= 0 and numpy.max(s_p) <= 6: break li_s.append(s) li_sprime.append(s_p) return li_s, li_sprime states, next_states = load_synthetic_data(5 * 49) ghost_tabular_array = numpy.zeros(49 * 49).reshape((49, 49)) for x, y in zip(states, next_states): numberS = utils.state_2_number(x) numberSp = utils.state_2_number(y) ghost_tabular_array[numberS, numberSp] = ghost_tabular_array[numberS, numberSp] + 1 for index in range(ghost_tabular_array.shape[0]): ghost_tabular_array[index, :] = ghost_tabular_array[index, :] / (numpy.sum( ghost_tabular_array[index, :])) numpy.savetxt("tabular.h5", ghost_tabular_array)
def predict(self, state, action): action_array = numpy.array(4 * [0]).reshape(1, 4) action_array[0, action] = 1 ghost_state = state[2:] ghost_state_array = numpy.array(ghost_state).reshape( 1, len(ghost_state)) pacman_state = state[0:2] pacman_state_array = numpy.array(pacman_state).reshape( 1, len(pacman_state)) li_ghosts = self.em_model_object.predict(ghost_state_array) li_next_states = [] li_rewards = [] li_dones = [] if self.type == 'stochastic': pacman_next_state = self.other_models_object.pacman_model.predict( [pacman_state_array, action_array]) for index, gh in enumerate(li_ghosts): next_state = numpy.concatenate((pacman_next_state, gh), axis=1) reward = self.other_models_object.reward_model.predict( next_state) done = self.other_models_object.done_model.predict(next_state) li_next_states.append(next_state[0].tolist()) li_rewards.append(reward[0, 0]) if done > 0.5: li_dones.append(True) else: li_dones.append(False) return li_next_states, li_rewards, li_dones elif self.type == 'deterministic': pacman_next_state = self.other_models_object.pacman_model.predict( [pacman_state_array, action_array]) ghost_next_state = self.other_models_object.ghosts_model.predict( ghost_state_array) next_state = numpy.concatenate( (pacman_next_state, ghost_next_state), axis=1) reward = self.other_models_object.reward_model.predict(next_state) done = self.other_models_object.done_model.predict(next_state) li_next_states.append(next_state[0].tolist()) li_rewards.append(reward[0, 0]) if done > 0.5: li_dones.append(True) else: li_dones.append(False) return li_next_states, li_rewards, li_dones elif self.type == 'tabular': state_number = utils.state_2_number(ghost_state) li_next_states = [] for j in range(49 * 49): if self.other_models_object.ghosts_tabular_model[state_number, j] > 0: pacman_next_state = self.other_models_object.pacman_model.predict( [pacman_state_array, action_array]) ghost_next_state = numpy.array( utils.number_2_state(j)).reshape(1, 4) next_state = numpy.concatenate( (pacman_next_state, ghost_next_state), axis=1) reward = self.other_models_object.reward_model.predict( next_state) done = self.other_models_object.done_model.predict( next_state) li_next_states.append(next_state[0].tolist()) li_rewards.append(reward[0, 0]) if done > 0.5: li_dones.append(True) else: li_dones.append(False) #print("hihoo!") return li_next_states, li_rewards, li_dones elif self.type == 'random': 'planner is random' sys.exit(1)