class Bot4: ACTION_GO_LEFT = 0 ACTION_GO_RIGHT = 1 ACTION_GO_UP = 2 ACTION_GO_DOWN = 3 ACTION_FREE = 4 ACTION_CRAFT = 5 def __init__(self, id): self.state = State() self.info = PlayerInfo(id) def distance(self, posA, posB): return (posA[0] - posB[0])**2 + (posA[1] - posB[1])**2 def next_action(self): golds = self.state.mapInfo.golds mindis = 1000 bot_posx, bot_posy = self.info.posx, self.info.posy for gold in golds: dist = self.distance([gold["posx"], gold["posy"]], [bot_posx, bot_posy]) if dist < mindis: mindis = dist target_x = gold["posx"] target_y = gold["posy"] if self.state.mapInfo.gold_amount(self.info.posx, self.info.posy) > 0: if self.info.energy >= 6: return self.ACTION_CRAFT if self.info.energy < 10: return self.ACTION_FREE if (target_x - bot_posx) < 0: return self.ACTION_GO_LEFT if (target_x - bot_posx) > 0: return self.ACTION_GO_RIGHT if (target_y - bot_posy) < 0: return self.ACTION_GO_UP if (target_y - bot_posy) > 0: return self.ACTION_GO_DOWN return self.np.random.randrange(0, 4) # if self.info.posx == 9: # return 4 # return 0 def new_game(self, data): try: self.state.init_state(data) except Exception as e: import traceback traceback.print_exc() def new_state(self, data): # action = self.next_action(); # self.socket.send(action) try: self.state.update_state(data) except Exception as e: import traceback traceback.print_exc()
class Bot1: ACTION_GO_LEFT = 0 ACTION_GO_RIGHT = 1 ACTION_GO_UP = 2 ACTION_GO_DOWN = 3 ACTION_FREE = 4 ACTION_CRAFT = 5 def __init__(self, id): self.state = State() self.info = PlayerInfo(id) def next_action(self): if self.state.mapInfo.gold_amount(self.info.posx, self.info.posy) > 0: if self.info.energy >= 6: return self.ACTION_CRAFT else: return self.ACTION_FREE if self.info.energy < 5: return self.ACTION_FREE else: action = self.ACTION_GO_UP if self.info.posy % 2 == 0: if self.info.posx < self.state.mapInfo.max_x: action = self.ACTION_GO_RIGHT else: if self.info.posx > 0: action = self.ACTION_GO_LEFT else: action = self.ACTION_GO_DOWN return action # if self.info.posx == 9: # return 4 # return 0 def new_game(self, data): try: self.state.init_state(data) except Exception as e: import traceback traceback.print_exc() def new_state(self, data): # action = self.next_action(); # self.socket.send(action) try: self.state.update_state(data) except Exception as e: import traceback traceback.print_exc()
class Bot2: ACTION_GO_LEFT = 0 ACTION_GO_RIGHT = 1 ACTION_GO_UP = 2 ACTION_GO_DOWN = 3 ACTION_FREE = 4 ACTION_CRAFT = 5 def __init__(self, id): self.state = State() self.info = PlayerInfo(id) def next_action(self): # print("bot2 :", self.info.posx, self.info.posy) if self.state.mapInfo.gold_amount(self.info.posx, self.info.posy) > 0: if self.info.energy >= 6: return self.ACTION_CRAFT else: return self.ACTION_FREE if self.info.energy < 21: return self.ACTION_FREE else: action = np.random.randint(0, 4) while (self.info.posx == 0 and action == 0) or ( self.info.posx == 28 and action == 1) or (self.info.posy == 0 and action == 3) or (self.info.posy == 28 and action == 2): action = np.random.randint(0, 4) return action def new_game(self, data): try: self.state.init_state(data) except Exception as e: import traceback traceback.print_exc() def new_state(self, data): # action = self.next_action(); # self.socket.send(action) try: self.state.update_state(data) except Exception as e: import traceback traceback.print_exc()
class MinerEnv: def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() # define action space self.INPUTNUM = 198 # The number of input values for the DQN model self.ACTIONNUM = 6 # The number of actions output from the DQN model # define state space self.gameState = None self.reward = 0 self.terminate = False self.score_pre = self.state.score # Storing the last score for designing the reward function self.energy_pre = self.state.energy # Storing the last energy for designing the reward function self.viewer = None self.steps_beyond_done = None def start(self): # connect to server self.socket.connect() def end(self): # disconnect server self.socket.close() def send_map_info(self, request): # tell server which map to run self.socket.send(request) def reset(self): # start new game # Choosing a map in the list # mapID = np.random.randint(1, 6) # Choosing a map ID from 5 maps in Maps folder randomly mapID = 1 posID_x = np.random.randint(MAP_MAX_X) # Choosing a initial position of the DQN agent on # X-axes randomly posID_y = np.random.randint(MAP_MAX_Y) # Choosing a initial position of the DQN agent on Y-axes randomly # Creating a request for initializing a map, initial position, the initial energy, and the maximum number of steps of the DQN agent request = ("map" + str(mapID) + "," + str(posID_x) + "," + str(posID_y) + ",50,100") # Send the request to the game environment (GAME_SOCKET_DUMMY.py) self.send_map_info(request) # Initialize the game environment try: message = self.socket.receive() #receive game info from server self.state.init_state(message) #init state except Exception as e: import traceback traceback.print_exc() self.gameState = self.get_state() # Get the state after resetting. # This function (get_state()) is an example of creating a state for the DQN model self.reward = 0 # The amount of rewards for the entire episode self.terminate = False # The variable indicates that the episode ends self.steps_beyond_done = None return self.gameState def step(self, action): # step process self.socket.send(str(action)) # send action to server try: message = self.socket.receive() # receive new state from server self.state.update_state(message) # update to local state except Exception as e: import traceback traceback.print_exc() self.gameState = self.get_state() self.reward = self.get_reward() done = self.check_terminate() return self.gameState, self.reward, done, {} # Functions are customized by client def get_state(self): # Building the map view = np.zeros([self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.state.mapInfo.get_obstacle(i, j) == TreeID: # Tree view[i, j] = -20 if self.state.mapInfo.get_obstacle(i, j) == TrapID: # Trap view[i, j] = -10 if self.state.mapInfo.get_obstacle(i, j) == SwampID: # Swamp view[i, j] = self.state.mapInfo.get_obstacle_value(i, j) if self.state.mapInfo.gold_amount(i, j) > 0: view[i, j] = self.state.mapInfo.gold_amount(i, j) # print(view) DQNState = view.flatten().tolist() #Flattening the map matrix to a vector # Add position and energy of agent to the DQNState DQNState.append(self.state.x) DQNState.append(self.state.y) DQNState.append(self.state.energy) me = {"playerId": 1, "energy": self.state.energy, "posx": self.state.x, "posy": self.state.y, "lastAction": self.state.lastAction, "score": self.state.score, "status": self.state.status} #Add position of bots for player in self.state.players: if player["playerId"] != self.state.id: DQNState.append(player["posx"]) DQNState.append(player["posy"]) #Convert the DQNState from list to array for training DQNState = np.array(DQNState) return DQNState def get_reward(self): # Calculate reward reward = 0 score_action = self.state.score - self.score_pre energy_consume = self.energy_pre - self.state.energy self.score_pre = self.state.score self.energy_pre = self.state.energy reward = score_action - 0.2 * energy_consume # if score_action > 0: # #If the DQN agent crafts golds, then it should obtain a positive reward (equal score_action) # reward += score_action # # #If the DQN agent crashs into obstacels (Tree, Trap, Swamp), then it should be punished by a negative reward # if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TreeID: # Tree # reward -= TreeID # if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TrapID: # Trap # reward -= TrapID # if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == SwampID: # Swamp # reward -= SwampID # If out of the map, then the DQN agent should be punished by a larger nagative reward. if self.state.status == State.STATUS_ELIMINATED_WENT_OUT_MAP: reward += -10 # Run out of energy, then the DQN agent should be punished by a larger nagative reward. if self.state.status == State.STATUS_ELIMINATED_OUT_OF_ENERGY: reward += -10 # print ("reward",reward) return reward def check_terminate(self): # Checking the status of the game # it indicates the game ends or is playing return self.state.status != State.STATUS_PLAYING def updateObservation(self): return def render(self, mode='human', close=False): return def close(self): """Override in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ raise NotImplementedError() def seed(self, seed=None): """Sets the seed for this env's random number generator(s). # Returns Returns the list of seeds used in this env's random number generators """ raise NotImplementedError() def configure(self, *args, **kwargs): """Provides runtime configuration to the environment. This configuration should consist of data that tells your environment how to run (such as an address of a remote server, or path to your ImageNet data). It should not affect the semantics of the environment. """ raise NotImplementedError()
class MinerEnv: def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() self.pre_x = 0 self.pre_y = 0 self.pre_energy = 0 #self.pre_action = '' self.score_pre = self.state.score#Storing the last score for designing the reward function def start(self): #connect to server self.socket.connect() def end(self): #disconnect server self.socket.close() def send_map_info(self, request):#tell server which map to run self.socket.send(request) def reset(self): #start new game try: message = self.socket.receive() #receive game info from server self.state.init_state(message) #init state except: import traceback traceback.print_exc() def step(self, action): #step process #self.pre_action = action self.pre_energy = self.state.energy self.pre_x, self.pre_y = self.state.x,self.state.y # store the last coordinate self.socket.send(action) #send action to server try: message = self.socket.receive() #receive new state from server self.state.update_state(message) #update to local state # new_state = str_2_json(message) # players = new_state["players"] # print('length of players in step', len(players)) except: import traceback traceback.print_exc() # print(self.state.players) # Functions are customized by client def get_state(self): # Building the map #print(self.state.x,self.state.y) view = np.zeros((5*(self.state.mapInfo.max_x + 1), 5*(self.state.mapInfo.max_y + 1), 6), dtype=int) #view[0:3, :] = -10 #view[-3:, :] = -10 #view[:, 0:3] = -10 #view[:, -3:] = -10 for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.state.mapInfo.get_obstacle(i, j) == TreeID: # Tree # trap map view[5*i:5*i+5, 5*j:5*j+5,0] = -TreeID if self.state.mapInfo.get_obstacle(i, j) == TrapID: # Trap # trap map view[5*i:5*i+5, 5*j:5*j+5,0] = -TrapID if self.state.mapInfo.get_obstacle(i, j) == SwampID: # Swamp # trap map view[5*i:5*i+5, 5*j:5*j+5,0] = -SwampID if self.state.mapInfo.gold_amount(i, j) > 0: view[5*i:5*i+5, 5*j:5*j+5,0] = self.state.mapInfo.gold_amount(i, j)/1000 ## gold map for stt,player in enumerate(self.state.players): if player["playerId"] != self.state.id: try: if player["status"] not in [1,2,3]: try: view[5*player["posx"]:5*player["posx"]+5,5*player["posy"]:5*player["posy"]+5,stt + 1] = player["energy"]/50 except: view[5*player["posx"]:5*player["posx"]+5,5*player["posy"]:5*player["posy"]+5,stt + 1] = 1 except: view[5*player["posx"]: 5*player["posx"]+5,5*player["posy"]:5*player["posy"]+5,stt]= 1 # print(self.state.players) else: try: view[5*self.state.x:5*self.state.x+5,5*self.state.y:5*self.state.y+5,2]= self.state.energy except: print('out of map') DQNState = np.array(view) return DQNState def get_reward(self,action): # Calculate reward reward = 0 score_action = self.state.score - self.score_pre self.score_pre = self.state.score pre_x, pre_y =self.pre_x,self.pre_y if self.state.energy >=45 and self.state.lastAction == 4: reward += -0.2 #plus a small bonus if the agent go to a coordinate that has golds if self.state.mapInfo.gold_amount(self.state.x,self.state.y) >= 50: reward += 0.2 #If the DQN agent crafts golds, then it should obtain a positive reward (equal score_action) if score_action > 0: reward += score_action/50 # if still in the map, plus a small bonus if self.state.status == State.STATUS_PLAYING: reward += 0.1 # if there is no gold, but the agent still crafts golds, it will be punished if self.state.mapInfo.get_obstacle(pre_x,pre_y)<4 and int(self.state.lastAction)==5: reward+=-0.2 if (self.state.mapInfo.gold_amount(pre_x,pre_y) >= 50 and self.pre_energy >15) and (int(self.state.lastAction)!=5): reward+=-0.2 # If out of the map, then the DQN agent should be punished by a larger nagative reward. #if self.state.status == State.STATUS_ELIMINATED_WENT_OUT_MAP: # reward = -1 #Run out of energy, then the DQN agent should be punished by a larger nagative reward. #if self.state.status == State.STATUS_ELIMINATED_OUT_OF_ENERGY: # reward = -1 # print ("reward",reward) #if self.state.status == State.STATUS_STOP_END_STEP: # reward = +2 return reward def check_terminate(self): #Checking the status of the game #it indicates the game ends or is playing return self.state.status != State.STATUS_PLAYING
class Bot1: ACTION_GO_LEFT = 0 ACTION_GO_RIGHT = 1 ACTION_GO_UP = 2 ACTION_GO_DOWN = 3 ACTION_FREE = 4 ACTION_CRAFT = 5 def __init__(self, id, estWood=-1, pEnergyToStep=-1, pStepToGold=-1): self.state = State() self.info = PlayerInfo(id) if (estWood == -1): #random strenght estWood = (5 + randrange(16)) pEnergyToStep = (2 + randrange(9)) * 5 pStepToGold = (1 + randrange(6)) * 50 self.estWood = estWood self.pEnergyToStep = pEnergyToStep self.pStepToGold = pStepToGold #print ("AddG_BOT",estWood,pEnergyToStep,pStepToGold) def next_action(self): if (self.info.status != 0 and self.state.stepCount < 100): print("WTF", self.info.status) countPlayerAtGoldMine = 0 x, y = self.info.posx, self.info.posy r_Action = self.ACTION_FREE #for safe if (self.isKeepFree): self.isKeepFree = False return r_Action # 1st rule. Heighest Priority. Craft & Survive if (valid(y, x)): goldOnGround = self.state.mapInfo.gold_amount(x, y) countPlayerAtGoldMine = 0 for player in self.state.players: px, py = player['posx'], player['posy'] if (px == x and py == y): countPlayerAtGoldMine += 1 if (goldOnGround > 0): if (goldOnGround // countPlayerAtGoldMine > 0 and self.info.energy > 5): r_Action = self.ACTION_CRAFT else: g = Graph(9, 21) g.convertToMap(state=self.state, estWood=self.estWood, botInfo=self.info, isBot=True) g.BFS() target = g.getBFSResult(self.pEnergyToStep, self.pStepToGold) if (target == -1): print("NO TARGET") return self.ACTION_FREE ny, nx = g.traceBack(target) ny, nx = int(ny), int(nx) typeOb = self.state.mapInfo.get_obstacle(nx, ny) nextTrap = g.boardMap[ny, nx] if (typeOb == 1): # WOOOD nextTrap = 20 if (nextTrap >= self.info.energy): r_Action = self.ACTION_FREE else: if (ny == y): if (nx > x): r_Action = self.ACTION_GO_RIGHT elif (nx < x): r_Action = self.ACTION_GO_LEFT else: #nx==x if (ny > y): r_Action = self.ACTION_GO_DOWN elif (ny < y): r_Action = self.ACTION_GO_UP else: print("INVALID WTF") if (r_Action < 4 and self.info.energy <= 13 and self.state.stepCount < 90): self.isKeepFree = True return r_Action def new_game(self, data): try: self.isKeepFree = False self.state.init_state(data) except Exception as e: import traceback traceback.print_exc() def new_state(self, data): # action = self.next_action(); # self.socket.send(action) try: self.state.update_state(data) except Exception as e: import traceback traceback.print_exc() def printInfo(self): print("G_BOT", self.info.playerId, self.estWood, self.pEnergyToStep, self.pStepToGold, self.info.score, self.info.energy)
class MinerEnv: def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() self.score_pre = self.state.score #Storing the last score for designing the reward function def start(self): #connect to server self.socket.connect() def end(self): #disconnect server self.socket.close() def send_map_info(self, request): #tell server which map to run self.socket.send(request) def reset(self): #start new game self.state_x_pre = self.state.x self.state_y_pre = self.state.y self.last3position = [] self.Swamp_position = [] self.craft_no_gold = 0 self.in_gold = 0 self.premindist = 1000 #self.dmax,_,_ = self.distance_value_trade_off() try: message = self.socket.receive() #receive game info from server self.state.init_state(message) #init state except Exception as e: import traceback traceback.print_exc() def step(self, action): #step process self.state_x_pre = self.state.x self.state_y_pre = self.state.y self.last3position.append([self.state.x, self.state.y]) if len(self.last3position) > 3: self.last3position.pop(0) #print(self.last3position) self.socket.send(action) #send action to server try: message = self.socket.receive() #receive new state from server self.state.update_state(message) #update to local state except Exception as e: import traceback traceback.print_exc() # Functions are customized by client def get_reward(self): # Calculate reward reward = 0 score_action = self.state.score - self.score_pre self.score_pre = self.state.score # dmax, xgold, ygold = self.distance_value_trade_off() # print("come to: ", dmax, xgold, ygold, self.state.mapInfo.gold_amount(xgold, ygold)) # if dmax >= self.dmax: # reward += 0.1 # print(self.dmax, self.state.x, self.state.y, self.state.mapInfo.gold_amount(self.state.x, self.state.y)) # self.dmax = dmax if score_action > 0: #If the DQN agent crafts golds, then it should obtain a positive reward (equal score_action) reward += score_action / 50 * 10 #If the DQN agent crashs into obstacels (Tree, Trap, Swamp), then it should be punished by a negative reward if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TreeID: # Tree reward -= 0.06 * 3 if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TrapID: # Trap reward -= 0.03 * 3 if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == SwampID: # Swamp if [self.state.x, self.state.y] in self.Swamp_position: # go to Swamp again reward -= 0.5 else: reward -= 0.05 # first time go to swamp #reward -=0.4 if self.state.mapInfo.gold_amount(self.state.x, self.state.y) >= 50: reward += 0.3 if self.state.mapInfo.gold_amount( self.state_x_pre, self.state_y_pre ) >= 50 and self.state.lastAction != 5: # in gold but don't craft self.in_gold += 1 reward -= 0.5 if self.state.lastAction == 5 and score_action == 0: # not in gold but craft self.craft_no_gold += 1 reward -= 0.5 # If out of the map, then the DQN agent should be punished by a larger nagative reward. if self.state.status == State.STATUS_ELIMINATED_WENT_OUT_MAP: reward -= 20 #Run out of energy, then the DQN agent should be punished by a larger nagative reward. if self.state.status == State.STATUS_ELIMINATED_OUT_OF_ENERGY: reward -= 0.7 if self.state.status == State.STATUS_PLAYING: reward += 0.1 # print ("reward",reward) return reward def get_reward_complex(self): reward = 0 score_action = self.state.score - self.score_pre self.score_pre = self.state.score ### reward for gold golds = self.state.mapInfo.golds miner_posx, miner_posy = self.state.x, self.state.y target_x, target_y = miner_posx, miner_posy mindist = 1000 for gold in golds: dist = distance([gold["posx"], gold["posy"]], [miner_posx, miner_posy]) - self.reward_gold( [gold["posx"], gold["posy"]]) if dist < mindist: mindist = dist if mindist < self.premindist: reward += 0.5 self.premindist = mindist #### if score_action > 0: #If the DQN agent crafts golds, then it should obtain a positive reward (equal score_action) reward += score_action / 50 * 10 if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TreeID: # Tree reward -= 0.06 * 3 if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TrapID: # Trap reward -= 0.03 * 3 if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == SwampID: # Swamp if [self.state.x, self.state.y] in self.Swamp_position: # go to Swamp again reward -= 0.5 else: reward -= 0.05 # first time go to swamp #reward -=0.4 if self.state.mapInfo.gold_amount( self.state_x_pre, self.state_y_pre ) >= 50 and self.state.lastAction != 5: # in gold but don't craft reward -= 0.5 if self.state.lastAction == 5 and score_action == 0: # not in gold but craft reward -= 0.5 if self.state.energy >= 45 and self.state.lastAction == 4: reward -= 1 # If out of the map, then the DQN agent should be punished by a larger nagative reward. if self.state.status == State.STATUS_ELIMINATED_WENT_OUT_MAP: reward -= 20 #Run out of energy, then the DQN agent should be punished by a larger nagative reward. if self.state.status == State.STATUS_ELIMINATED_OUT_OF_ENERGY: reward -= 0.7 if self.state.status == State.STATUS_PLAYING: reward += 0.1 return reward def reward_gold(self, gold_pos): x, y = gold_pos[0], gold_pos[1] reward = 0 for stt, (i, j) in enumerate( zip([-1, 1, 0, 0, -1, 1, -1, 1], [0, 0, -1, 1, -1, -1, 1, 1])): xnew, ynew = x + i, y + j if xnew <= self.state.mapInfo.max_x and xnew >=0 \ and ynew <= self.state.mapInfo.max_y and ynew >= 0: amount = self.state.mapInfo.gold_amount(xnew, ynew) if amount >= 100 and amount <= 200: reward += 1 if amount > 200 and amount <= 500: reward += 2 if amount > 500: reward += 3 if amount >= 1000: reward += 5 return reward def get_state_tensor(self, scale_map): n = scale_map view = torch.zeros((7, n * (self.state.mapInfo.max_x + 1), n * (self.state.mapInfo.max_y + 1)), dtype=torch.float) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.state.mapInfo.get_obstacle( i, j) == TreeID: # Tree # trap map view[2, n * i:n * i + n, n * j:n * j + n] = -TreeID view[0, n * i:n * i + n, n * j:n * j + n] = -TreeID if self.state.mapInfo.get_obstacle( i, j) == TrapID: # Trap # trap map view[2, n * i:n * i + n, n * j:n * j + n] = -TrapID view[0, n * i:n * i + n, n * j:n * j + n] = -TrapID if self.state.mapInfo.get_obstacle( i, j) == SwampID: # Swamp # trap map if [i, j] not in self.Swamp_position: view[2, n * i:n * i + n, n * j:n * j + n] = -SwampID # -3 view[0, n * i:n * i + n, n * j:n * j + n] = -SwampID else: view[2, n * i:n * i + n, n * j:n * j + n] = -SwampID - 3 # -6 view[0, n * i:n * i + n, n * j:n * j + n] = -SwampID - 3 gold_ = self.state.mapInfo.gold_amount(i, j) if gold_ > 0: view[1, n * i:n * i + n, n * j:n * j + n] = gold_ / 1000 ##/10 gold map view[0, n * i:n * i + n, n * j:n * j + n] = gold_ / 1000 index = 3 playerid_list = [] for stt, player in enumerate(self.state.players): playerid_list.append(player["playerId"]) if player["playerId"] != self.state.id: try: if player["status"] not in [1, 2, 3]: try: view[index + 1, n * player["posx"]:n * player["posx"] + n, n * player["posy"]:n * player["posy"] + n] = player["energy"] / 50 except: view[index + 1, n * player["posx"]:n * player["posx"] + n, n * player["posy"]:n * player["posy"] + n] = 1 index += 1 except: view[index + 1, n * player["posx"]:n * player["posx"] + n, n * player["posy"]:n * player["posy"] + n] = 1 # print(self.state.players) #print(view[player["posx"]: player["posx"]+1, player["posy"]: player["posy"]+1, stt]) #print(np.unique(a-view[:,:,stt])) index += 1 else: try: view[3, n * self.state.x:n * self.state.x + n, n * self.state.y:n * self.state.y + n] = self.state.energy / 50 except: print('out of map') if self.state.id not in playerid_list: view[3, n * self.state.x:n * self.state.x + n, n * self.state.y:n * self.state.y + n] = self.state.energy / 50 #print("check: ", np.unique(view[3,:,:])) DQNState = view return DQNState def get_state2(self, limit): # Building the map view = np.zeros([limit * 2 + 1, limit * 2 + 1], dtype=int) max_x, max_y = self.state.mapInfo.max_x, self.state.mapInfo.max_y xlimit_below = np.clip(self.state.x - limit, 0, max_x) - np.clip( self.state.x + limit - max_x, 0, limit) xlimit_up = np.clip(self.state.x + limit, 0, max_x) + np.clip( 0 - self.state.x + limit, 0, limit) ylimit_below = np.clip(self.state.y - limit, 0, max_y) - np.clip( self.state.y + limit - max_y, 0, limit) ylimit_up = np.clip(self.state.y + limit, 0, max_y) + np.clip( 0 - self.state.y + limit, 0, limit) #print(xlimit_below, xlimit_up, ylimit_below, ylimit_up, self.state.x, self.state.y) for i in range(xlimit_below, xlimit_up + 1): for j in range(ylimit_below, ylimit_up + 1): if self.state.mapInfo.get_obstacle(i, j) == TreeID: # Tree view[i - xlimit_below, j - ylimit_below] = -TreeID if self.state.mapInfo.get_obstacle(i, j) == TrapID: # Trap view[i - xlimit_below, j - ylimit_below] = -TrapID if self.state.mapInfo.get_obstacle(i, j) == SwampID: # Swamp view[i - xlimit_below, j - ylimit_below] = -SwampID if self.state.mapInfo.gold_amount(i, j) > 0: view[i - xlimit_below, j - ylimit_below] = self.state.mapInfo.gold_amount( i, j) / 10 DQNState = view.flatten().tolist( ) #Flattening the map matrix to a vector # Add position and energy of agent to the DQNState DQNState.append(self.state.x - xlimit_below) DQNState.append(self.state.y - ylimit_below) DQNState.append(self.state.energy) #Add position of bots # for player in self.state.players: # if player["playerId"] != self.state.id: # DQNState.append(player["posx"]) # DQNState.append(player["posy"]) #Convert the DQNState from list to array for training DQNState = np.array(DQNState) return DQNState def update_swamp(self): for player in self.state.players: if self.state.mapInfo.get_obstacle( player["posx"], player["posy"]) == 3 and [ player["posx"], player["posy"] ] not in self.Swamp_position: self.Swamp_position.append([player["posx"], player["posy"]]) def check_terminate(self): #Checking the status of the game #it indicates the game ends or is playing return self.state.status != State.STATUS_PLAYING
class MinerEnv: def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() self.score_pre = self.state.score # Storing the last score for designing the reward function def start(self): # connect to server self.socket.connect() def end(self): # disconnect server self.socket.close() def send_map_info(self, request): # tell server which map to run self.socket.send(request) def reset(self): # start new game try: message = self.socket.receive() # receive game info from server print(message) self.state.init_state(message) # init state except Exception as e: import traceback traceback.print_exc() def step(self, action): # step process self.socket.send(action) # send action to server try: message = self.socket.receive() # receive new state from server # print("New state: ", message) self.state.update_state(message) # update to local state except Exception as e: import traceback traceback.print_exc() # Functions are customized by client # def get_state(self): # obs = self.state # # player_channel = np.zeros((4, obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1), dtype=float) # obstacle_1 = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) # obstacle_random = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) # obstacle_5 = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) # obstacle_10 = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) # obstacle_40 = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) # obstacle_100 = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) # obstacle_value_min = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) # obstacle_value_max = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) # # gold = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) # gold_amount = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) # # for i in range(obs.mapInfo.max_y + 1): # for j in range(obs.mapInfo.max_x + 1): # type, value = None, None # for cell in obs.mapInfo.obstacles: # if j == cell["posx"] and i == cell["posy"]: # type, value = cell["type"], cell["value"] # # if value == 0: # obstacle_random[i, j] = 1 # if value == -1: # obstacle_1[i, j] = 1 # if value == -5: # obstacle_5[i, j] = 1 # if value == -10: # obstacle_10[i, j] = 1 # if value == -40: # obstacle_40[i, j] = 1 # if value == -100: # obstacle_100[i, j] = 1 # if value is None: # gold[i, j] = 1 # value = -4 # # obstacle_value_min[i, j] = (-value if value != 0 else 5) / constants.MAX_ENERGY # obstacle_value_max[i, j] = (-value if value != 0 else 20) / constants.MAX_ENERGY # # gold_amount[i, j] = obs.mapInfo.gold_amount(j, i) / constants.MAX_EXTRACTABLE_GOLD # # player_channel[0][obs.y, obs.x] = 1 # # id = 1 # for player in obs.players: # if "status" in player and player["status"] == constants.Status.STATUS_PLAYING.value: # if player["playerId"] == obs.id: # continue # # player_channel[id][player["posy"], player["posx"]] = 1 # id += 1 # # board = np.stack( # [obstacle_random, obstacle_1, obstacle_5, obstacle_10, obstacle_40, obstacle_100, obstacle_value_min, # obstacle_value_max, gold, gold_amount]) # board = np.concatenate([player_channel, board]) # # energy = torch.tensor([max(0, obs.energy) / constants.MAX_ENERGY], dtype=torch.float) # position = torch.clamp(torch.tensor([obs.y / 8 * 2 - 1, # obs.x / 20 * 2 - 1], dtype=torch.float), -1, 1) # # featurized_obs = { # "obs": { # "conv_features": torch.unsqueeze(torch.tensor(board, dtype=torch.float), 0), # "fc_features": torch.unsqueeze(torch.cat([energy, position]), 0) # } # } # # return featurized_obs def get_state(self, last_3_actions): obs = self.state player_channel = np.zeros( (4, obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1), dtype=float) obstacle_1 = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) obstacle_random = np.zeros( [obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) obstacle_5 = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) obstacle_10 = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) obstacle_20 = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) obstacle_40 = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) obstacle_100 = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) obstacle_value_min = np.zeros( [obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) obstacle_value_max = np.zeros( [obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) gold = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) gold_amount = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) for i in range(obs.mapInfo.max_y + 1): for j in range(obs.mapInfo.max_x + 1): type, value = None, None for cell in obs.mapInfo.obstacles: if j == cell["posx"] and i == cell["posy"]: type, value = cell["type"], cell["value"] if type is None and value is None: has_gold = False for cell in obs.mapInfo.golds: if j == cell["posx"] and i == cell["posy"]: has_gold = True if not has_gold: value = -1 if value == 0: # Forest obstacle_random[i, j] = 1 if value == -1: # Land obstacle_1[i, j] = 1 if value == -5: # Swamp 1 obstacle_5[i, j] = 1 if value == -10: # Trap obstacle_10[i, j] = 1 if value == -20: # Swamp 2 obstacle_20[i, j] = 1 if value == -40: # Swamp 3 obstacle_40[i, j] = 1 if value == -100: # Swamp 4 obstacle_100[i, j] = 1 if value is None: # Gold spot gold[i, j] = 1 value = -4 obstacle_value_min[i, j] = (-value if value != 0 else 5) / constants.MAX_ENERGY obstacle_value_max[i, j] = (-value if value != 0 else 20) / constants.MAX_ENERGY gold_amount[i, j] = obs.mapInfo.gold_amount(j, i) / 3000 player_channel[0][obs.y, obs.x] = 1 id = 1 for player in obs.players: if player["playerId"] == obs.id: continue if "status" in player and player[ "status"] == constants.Status.STATUS_PLAYING.value: player_channel[id][player["posy"], player["posx"]] = 1 id += 1 board = np.stack([ obstacle_random, obstacle_1, obstacle_5, obstacle_10, obstacle_20, obstacle_40, obstacle_100, obstacle_value_min, obstacle_value_max, gold, gold_amount ]) position = np.clip(np.array([obs.y / 8 * 2 - 1, obs.x / 20 * 2 - 1]), -1, 1) one_hot_last_3_actions = np.zeros((3, 6), dtype=np.float32) one_hot_last_3_actions[np.arange(3), last_3_actions] = 1 one_hot_last_3_actions = one_hot_last_3_actions.reshape(-1) featurized_obs = { "obs": { "conv_features": torch.unsqueeze( torch.tensor(np.concatenate([ player_channel, board, np.full( (1, obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1), fill_value=max( 0, obs.energy / (constants.MAX_ENERGY / 2))) ]), dtype=torch.float), 0), "fc_features": torch.unsqueeze( torch.tensor(np.concatenate( [position, one_hot_last_3_actions]), dtype=torch.float), 0) } } return featurized_obs, self.state def get_state_v2(self, last_3_actions): obs = self.state player_channel = np.zeros( (4, obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1), dtype=float) obstacle_1 = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) obstacle_random = np.zeros( [obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) obstacle_5 = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) obstacle_10 = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) obstacle_20 = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) obstacle_40 = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) obstacle_100 = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) obstacle_value_min = np.zeros( [obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) obstacle_value_max = np.zeros( [obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) gold = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) gold_amount = np.zeros([obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1], dtype=float) for i in range(obs.mapInfo.max_y + 1): for j in range(obs.mapInfo.max_x + 1): type, value = None, None for cell in obs.mapInfo.obstacles: if j == cell["posx"] and i == cell["posy"]: type, value = cell["type"], cell["value"] if type is None and value is None: has_gold = False for cell in obs.mapInfo.golds: if j == cell["posx"] and i == cell["posy"]: has_gold = True if not has_gold: value = -1 if value == 0: # Forest obstacle_random[i, j] = 1 if value == -1: # Land obstacle_1[i, j] = 1 if value == -5: # Swamp 1 obstacle_5[i, j] = 1 if value == -10: # Trap obstacle_10[i, j] = 1 if value == -20: # Swamp 2 obstacle_20[i, j] = 1 if value == -40: # Swamp 3 obstacle_40[i, j] = 1 if value == -100: # Swamp 4 obstacle_100[i, j] = 1 if value is None: # Gold spot gold[i, j] = 1 value = -4 obstacle_value_min[i, j] = (-value if value != 0 else 5) / constants.MAX_ENERGY obstacle_value_max[i, j] = (-value if value != 0 else 20) / constants.MAX_ENERGY gold_amount[i, j] = obs.mapInfo.gold_amount(j, i) / 1250 scores = [obs.score, 0, 0, 0] energies = [obs.energy, 0, 0, 0] player_channel[0][obs.y, obs.x] = 1 id = 1 for player in obs.players: if player["playerId"] == obs.id: continue if "status" in player and player[ "status"] == constants.Status.STATUS_PLAYING.value: player_channel[id][player["posy"], player["posx"]] = 1 scores[id] = player["scores"] energies[id] = player["energy"] id += 1 board = np.stack([ obstacle_random, obstacle_1, obstacle_5, obstacle_10, obstacle_20, obstacle_40, obstacle_100, obstacle_value_min, obstacle_value_max, gold, gold_amount ]) # board = np.concatenate([players, board]) position = np.clip(np.array([obs.y / 8 * 2 - 1, obs.x / 20 * 2 - 1]), -1, 1) one_hot_last_3_actions = np.zeros((3, 6), dtype=np.float32) one_hot_last_3_actions[np.arange(3), last_3_actions] = 1 one_hot_last_3_actions = one_hot_last_3_actions.reshape(-1) featurized_obs = { "obs": { "conv_features": torch.unsqueeze( torch.tensor(np.concatenate([ player_channel, np.copy(board), np.full( (1, obs.mapInfo.max_y + 1, obs.mapInfo.max_x + 1), fill_value=max( 0, obs.energy / (constants.MAX_ENERGY / 2))) ]), dtype=torch.float), 0), "fc_features": torch.unsqueeze( torch.tensor(np.concatenate( [position, one_hot_last_3_actions, scores, energies]), dtype=torch.float), 0) } } return featurized_obs, self.state def check_terminate(self): return self.state.status != State.STATUS_PLAYING
class MinerEnv: def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() self.score_pre = self.state.score #Storing the last score for designing the reward function def start(self): #connect to server self.socket.connect() def end(self): #disconnect server self.socket.close() def send_map_info(self, request): #tell server which map to run self.socket.send(request) def reset(self): #start new game self.state_x_pre = self.state.x self.state_y_pre = self.state.y self.last3position = [] self.Swamp_position = [] try: message = self.socket.receive() #receive game info from server self.state.init_state(message) #init state except Exception as e: import traceback traceback.print_exc() def step(self, action): #step process self.state_x_pre = self.state.x self.state_y_pre = self.state.y self.last3position.append([self.state.x, self.state.y]) if len(self.last3position) > 3: self.last3position.pop(0) #print(self.last3position) self.socket.send(action) #send action to server try: message = self.socket.receive() #receive new state from server self.state.update_state(message) #update to local state except Exception as e: import traceback traceback.print_exc() # Functions are customized by client def get_state(self): # Building the map view = np.zeros( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.state.mapInfo.get_obstacle(i, j) == TreeID: # Tree view[i, j] = -TreeID if self.state.mapInfo.get_obstacle(i, j) == TrapID: # Trap view[i, j] = -TrapID if self.state.mapInfo.get_obstacle(i, j) == SwampID: # Swamp view[i, j] = -SwampID if self.state.mapInfo.gold_amount(i, j) > 0: view[i, j] = self.state.mapInfo.gold_amount(i, j) DQNState = view.flatten().tolist( ) #Flattening the map matrix to a vector # Add position and energy of agent to the DQNState DQNState.append(self.state.x) DQNState.append(self.state.y) DQNState.append(self.state.energy) #Add position of bots for player in self.state.players: if player["playerId"] != self.state.id: DQNState.append(player["posx"]) DQNState.append(player["posy"]) #Convert the DQNState from list to array for training DQNState = np.array(DQNState) return DQNState def get_reward2(self): # Calculate reward reward = 0 score_action = self.state.score - self.score_pre self.score_pre = self.state.score if score_action > 0: #If the DQN agent crafts golds, then it should obtain a positive reward (equal score_action) reward += score_action #If the DQN agent crashs into obstacels (Tree, Trap, Swamp), then it should be punished by a negative reward if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TreeID: # Tree reward -= TreeID * 3 * randrange(1, 5) if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TrapID: # Trap reward -= TrapID * 3 if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == SwampID: # Swamp if [self.state.x, self.state.y] in self.Swamp_position: # go to Swamp again reward -= 15 else: reward -= SwampID * 3 # first time go to swamp self.Swamp_position.append([self.state.x, self.state.y]) if self.state.mapInfo.gold_amount( self.state_x_pre, self.state_y_pre ) >= 50 and self.state.lastAction != 5: # in gold but don't craft reward -= 10 if self.state.lastAction == 5 and score_action < 0: # not in gold but craft reward -= 10 if len(self.last3position ) == 3 and self.state.lastAction != 5: # back to same position if self.last3position[0] == self.last3position[2]: reward -= 3 if self.last3position[1] == self.last3position[2]: reward -= 3 if self.state.energy >= 45 and self.state.lastAction == 4: reward -= 7 # if self.state.status == State.STATUS_PLAYING: # reward += 0.5 # If out of the map, then the DQN agent should be punished by a larger nagative reward. if self.state.status == State.STATUS_ELIMINATED_WENT_OUT_MAP: reward += -40 #Run out of energy, then the DQN agent should be punished by a larger nagative reward. if self.state.status == State.STATUS_ELIMINATED_OUT_OF_ENERGY: reward += -20 # print ("reward",reward) return reward def get_reward(self): # Calculate reward reward = 0 score_action = self.state.score - self.score_pre self.score_pre = self.state.score if score_action > 0: #If the DQN agent crafts golds, then it should obtain a positive reward (equal score_action) reward += score_action / 50 #If the DQN agent crashs into obstacels (Tree, Trap, Swamp), then it should be punished by a negative reward if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TreeID: # Tree reward -= 0.03 * randrange(1, 5) if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TrapID: # Trap reward -= 0.06 * 3 if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == SwampID: # Swamp if [self.state.x, self.state.y] in self.Swamp_position: # go to Swamp again reward -= 0.15 else: reward -= 0.05 # first time go to swamp self.Swamp_position.append([self.state.x, self.state.y]) if self.state.mapInfo.gold_amount( self.state_x_pre, self.state_y_pre ) >= 50 and self.state.lastAction != 5: # in gold but don't craft reward -= 0.55 if self.state.lastAction == 5 and score_action < 0: # not in gold but craft reward -= 0.55 if len(self.last3position ) == 3 and self.state.lastAction != 5: # back to same position if self.last3position[0] == self.last3position[2]: reward -= 0.1 if self.last3position[1] == self.last3position[2]: reward -= 0.1 if self.state.energy >= 45 and self.state.lastAction == 4: reward -= 0.3 # if self.state.status == State.STATUS_PLAYING: # reward += 0.5 # If out of the map, then the DQN agent should be punished by a larger nagative reward. if self.state.status == State.STATUS_ELIMINATED_WENT_OUT_MAP: reward += -10 #Run out of energy, then the DQN agent should be punished by a larger nagative reward. if self.state.status == State.STATUS_ELIMINATED_OUT_OF_ENERGY: reward += -5 # print ("reward",reward) return reward def get_state_tensor(self, scale_map): n = scale_map view = np.zeros((n * (self.state.mapInfo.max_x + 1), n * (self.state.mapInfo.max_y + 1), 6)) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.state.mapInfo.get_obstacle( i, j) == TreeID: # Tree # trap map view[n * i:n * i + n, n * j:n * j + n, 0] = -TreeID if self.state.mapInfo.get_obstacle( i, j) == TrapID: # Trap # trap map view[n * i:n * i + n, n * j:n * j + n, 0] = -TrapID if self.state.mapInfo.get_obstacle( i, j) == SwampID: # Swamp # trap map view[n * i:n * i + n, n * j:n * j + n, 0] = -SwampID if self.state.mapInfo.gold_amount(i, j) > 0: view[n * i:n * i + n, n * j:n * j + n, 0] = self.state.mapInfo.gold_amount( i, j) / 1000 ##/10 gold map for stt, player in enumerate(self.state.players): if player["playerId"] != self.state.id: try: if player["status"] not in [1, 2, 3]: try: view[n * player["posx"]:n * player["posx"] + n, n * player["posy"]:n * player["posy"] + n, stt + 1] = player["energy"] / 50 except: view[n * player["posx"]:n * player["posx"] + n, n * player["posy"]:n * player["posy"] + n, stt + 1] = 1 except: view[n * player["posx"]:n * player["posx"] + n, n * player["posy"]:n * player["posy"] + n, stt] = 1 # print(self.state.players) else: try: view[n * self.state.x:n * self.state.x + n, n * self.state.y:n * self.state.y + n, 2] = self.state.energy / 50 except: print('out of map') DQNState = np.array(view) return DQNState def get_state3(self, limit): # Building the map view = np.zeros([limit * 2 + 1, limit * 2 + 1], dtype=int) max_x, max_y = self.state.mapInfo.max_x, self.state.mapInfo.max_y xlimit_below = np.clip(self.state.x - limit, 0, max_x) - np.clip( self.state.x + limit - max_x, 0, limit) xlimit_up = np.clip(self.state.x + limit, 0, max_x) + np.clip( 0 - self.state.x + limit, 0, limit) ylimit_below = np.clip(self.state.y - limit, 0, max_y) - np.clip( self.state.y + limit - max_y, 0, limit) ylimit_up = np.clip(self.state.y + limit, 0, max_y) + np.clip( 0 - self.state.y + limit, 0, limit) #print(xlimit_below, xlimit_up, ylimit_below, ylimit_up, self.state.x, self.state.y) dmax, m, n, exist_gold = -1000, -5, 0.1, False x_maxgold, y_maxgold = self.state.x, self.state.y for i in range(max_x + 1): for j in range(max_y + 1): if self.state.mapInfo.gold_amount(i, j) >= 50: exist_gold = True d = m * ((self.state.x - i)**2 + (self.state.y - j)** 2) + n * self.state.mapInfo.gold_amount(i, j) if d > dmax: dmax = d x_maxgold, y_maxgold = i, j # position of cell is nearest and much gold if i in range(xlimit_below, xlimit_up + 1) and j in range( ylimit_below, ylimit_up + 1): if self.state.mapInfo.get_obstacle(i, j) == TreeID: # Tree view[i - xlimit_below, j - ylimit_below] = -TreeID if self.state.mapInfo.get_obstacle(i, j) == TrapID: # Trap view[i - xlimit_below, j - ylimit_below] = -TrapID if self.state.mapInfo.get_obstacle(i, j) == SwampID: # Swamp view[i - xlimit_below, j - ylimit_below] = -SwampID if self.state.mapInfo.gold_amount(i, j) > 0: view[i - xlimit_below, j - ylimit_below] = self.state.mapInfo.gold_amount( i, j) / 10 DQNState = view.flatten().tolist( ) #Flattening the map matrix to a vector # Add position and energy of agent to the DQNState DQNState.append(self.state.x - xlimit_below) DQNState.append(self.state.y - ylimit_below) DQNState.append(self.state.energy) #Add position of bots # for player in self.state.players: # if player["playerId"] != self.state.id: # DQNState.append(player["posx"]) # DQNState.append(player["posy"]) DQNState.append(self.state.x - x_maxgold) DQNState.append(self.state.y - y_maxgold) if exist_gold == False: DQNState.append(0) else: DQNState.append( self.state.mapInfo.gold_amount(x_maxgold, y_maxgold) / 10) #Convert the DQNState from list to array for training DQNState = np.array(DQNState) return DQNState def check_terminate(self): #Checking the status of the game #it indicates the game ends or is playing return self.state.status != State.STATUS_PLAYING
class Bot1: ACTION_GO_LEFT = 0 ACTION_GO_RIGHT = 1 ACTION_GO_UP = 2 ACTION_GO_DOWN = 3 ACTION_FREE = 4 ACTION_CRAFT = 5 def __init__(self, id): self.state = State() self.info = PlayerInfo(id) self.path = None self.grid = next_move([[0 for i in range(9)] for j in range(21)]) self.idx = 0 def path_generator(self, grid): self.idx = 0 pos = (self.state.x, self.state.y) start = Node(1000, pos) self.grid = self.update(self.grid) goal = self.gold_sort(self.grid) self.path = aStar(start, goal, self.grid) for node in self.path: print(node.point) def next_action(self): if self.path is not None and self.idx < len(self.path): x = self.path[self.idx] if self.idx != len(self.path)-1: nextx = self.path[self.idx+1] if (self.info.energy <=10): return self.ACTION_FREE else: if self.state.mapInfo.gold_amount(self.info.posx, self.info.posy) > 0: return self.ACTION_CRAFT else: if ((nextx.point[0]-x.point[0] == 0) and (nextx.point[1]-x.point[1] == 1)): return self.ACTION_GO_DOWN else: if ((nextx.point[0]-x.point[0] == 0) and (nextx.point[1]-x.point[1] == -1)): return self.ACTION_GO_UP else: if ((nextx.point[0]-x.point[0] == 1) and (nextx.point[1]-x.point[1] == 0)): return self.ACTION_GO_RIGHT else: return self.ACTION_GO_LEFT self.idx += 1 else: self.path_generator(self.grid) return 4 def update(self, grid): for i in range(0, 21): for j in range(0, 9): for m in self.state.mapInfo.obstacles: if (i == m["posx"] and j == m["posy"]): grid[i][j].value = abs(m["value"]) for m in self.state.mapInfo.golds: if (i == m["posx"] and j == m["posy"]): grid[i][j].value = 0 return grid def gold_sort(self, grid): gold = [] for i in range (0,21): for j in range (0,9): if (grid[i][j].value == 0): gold.append(grid[i][j]) pos = (self.state.x, self.state.y) val = 1000 temp = Node(val, pos) for x in gold: x.value = manhattan(x, temp) gold.sort(key = lambda x: x.value) return gold[0] def new_game(self, data): try: self.state.init_state(data) except Exception as e: import traceback traceback.print_exc() def new_state(self, data): # action = self.next_action(); # self.socket.send(action) try: self.state.update_state(data) except Exception as e: import traceback traceback.print_exc()
class MinerEnv: def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() self.score_pre = self.state.score #Storing the last score for designing the reward function def start(self): #connect to server self.socket.connect() def end(self): #disconnect server self.socket.close() def send_map_info(self, request): #tell server which map to run self.socket.send(request) def reset(self): #start new game try: message = self.socket.receive() #receive game info from server print(message) self.state.init_state(message) #init state except Exception as e: import traceback traceback.print_exc() def step(self, action): #step process self.socket.send(action) #send action to server try: message = self.socket.receive() #receive new state from server #print("New state: ", message) self.state.update_state(message) #update to local state except Exception as e: import traceback traceback.print_exc() # Functions are customized by client def get_state(self): # Building the map view = np.zeros( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.state.mapInfo.get_obstacle(i, j) == TreeID: # Tree view[i, j] = -TreeID if self.state.mapInfo.get_obstacle(i, j) == TrapID: # Trap view[i, j] = -TrapID if self.state.mapInfo.get_obstacle(i, j) == SwampID: # Swamp view[i, j] = -SwampID if self.state.mapInfo.gold_amount(i, j) > 0: view[i, j] = self.state.mapInfo.gold_amount(i, j) DQNState = view.flatten().tolist( ) #Flattening the map matrix to a vector # Add position and energy of agent to the DQNState DQNState.append(self.state.x) DQNState.append(self.state.y) DQNState.append(self.state.energy) #Add position of bots for player in self.state.players: if player["playerId"] != self.state.id: DQNState.append(player["posx"]) DQNState.append(player["posy"]) #Convert the DQNState from list to array DQNState = np.array(DQNState) return DQNState def check_terminate(self): return self.state.status != State.STATUS_PLAYING
class MinerEnv: def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() self.score_pre = self.state.score # Storing the last score for designing the reward function self.decay = 27 self.area_affect = 3 self.affect_eff = 0.92 self.view = None self.energy_view = None self.current_action = None def start(self): # connect to server self.socket.connect() def end(self): # disconnect server self.socket.close() def send_map_info(self, request): # tell server which map to run self.socket.send(request) def reset(self): # start new game try: message = self.socket.receive() # receive game info from server print(message) self.state.init_state(message) # init state except Exception as e: import traceback traceback.print_exc() def step(self, action): # step process self.socket.send(action) # send action to server try: message = self.socket.receive() # receive new state from server # print("New state: ", message) self.state.update_state(message) # update to local state print(self.state.score) except Exception as e: import traceback traceback.print_exc() # Functions are customized by client def get_state(self): # Building the map self.view = np.zeros( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int) self.energy_view = np.zeros( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int) self.gold_map = np.zeros( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int) gold_opt = np.zeros( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.state.mapInfo.get_obstacle(i, j) == TreeID: # Tree self.view[i, j] = -20 self.energy_view[i, j] = -20 elif self.state.mapInfo.get_obstacle(i, j) == TrapID: # Trap self.view[i, j] = -10 self.energy_view[i, j] = -10 elif self.state.mapInfo.get_obstacle(i, j) == SwampID: # Swamp self.view[i, j] = self.state.mapInfo.get_obstacle_value(i, j) self.energy_view[ i, j] = self.state.mapInfo.get_obstacle_value(i, j) elif self.state.mapInfo.gold_amount(i, j) > 0: self.view[i, j] = self.state.mapInfo.gold_amount(i, j) self.energy_view[i, j] = -4 self.gold_map[i, j] = self.state.mapInfo.gold_amount(i, j) else: self.view[i, j] = -1 self.energy_view[i, j] = -1 # print(self.gold_map) # player update goldmap for player in self.state.players: if player["playerId"] != self.state.id: x = player["posx"] y = player["posy"] if 0 <= x <= self.state.mapInfo.max_x and 0 <= y <= self.state.mapInfo.max_y: if self.gold_map[x][y] > 0: if x != self.state.x or y != self.state.y: self.gold_map[x][y] = self.gold_map[x][y] * 0.63 self.view[x][y] = self.gold_map[x][y] else: for t in range(1, self.area_affect + 1): for k in range(-t, t): if 0 <= x + k <= self.state.mapInfo.max_x and 0 <= y + t - abs( k) <= self.state.mapInfo.max_y: if self.gold_map[x + k][y + t - abs(k)] > 0: self.gold_map[x + k][ y + t - abs(k)] = self.gold_map[ x + k][y + t - abs(k)] * pow( self.affect_eff, self.area_affect + 1 - t) self.view[x + k][y + t - abs(k)] = self.gold_map[ x + k][y + t - abs(k)] if 0 <= x - k <= self.state.mapInfo.max_x and 0 <= y - t + abs( k) <= self.state.mapInfo.max_y: if self.gold_map[x - k][y - t + abs(k)] > 0: self.gold_map[x - k][ y - t + abs(k)] = self.gold_map[ x - k][y - t + abs(k)] * pow( self.affect_eff, self.area_affect + 1 - t) self.view[x - k][y - t + abs(k)] = self.gold_map[ x - k][y - t + abs(k)] print(self.gold_map) arr = [] for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.state.mapInfo.gold_amount(i, j) > 0: gold_est = np.zeros([ self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1 ], dtype=int) gold_est[i][j] = self.gold_map[i][j] for a in range(0, i): gold_est[i - a - 1][j] = max( gold_est[i - a][j] - self.decay + self.view[i - a - 1][j], 0) for b in range(i + 1, self.state.mapInfo.max_x + 1): gold_est[b][j] = max( gold_est[b - 1][j] - self.decay + self.view[b][j], 0) for c in range(0, j): gold_est[i][j - c - 1] = max( gold_est[i][j - c] - self.decay + self.view[i][j - c - 1], 0) for d in range(j + 1, self.state.mapInfo.max_y + 1): gold_est[i][d] = max( gold_est[i][d - 1] - self.decay + self.view[i][d], 0) for x in range(0, i): for y in range(0, j): gold_est[i - x - 1][j - y - 1] = max(gold_est[i - x][j - y - 1], gold_est[i - x - 1][j - y]) - self.decay + \ self.view[i - x - 1][j - y - 1] for x in range(0, i): for y in range(j + 1, self.state.mapInfo.max_y + 1): gold_est[i - x - 1][y] = max(gold_est[i - x][y], gold_est[i - x - 1][y - 1]) - self.decay + \ self.view[i - x - 1][y] for x in range(i + 1, self.state.mapInfo.max_x + 1): for y in range(0, j): gold_est[x][j - y - 1] = max(gold_est[x][j - y], gold_est[x - 1][j - y - 1]) - self.decay + \ self.view[x][j - y - 1] for x in range(i + 1, self.state.mapInfo.max_x + 1): for y in range(j + 1, self.state.mapInfo.max_y + 1): gold_est[x][y] = max( gold_est[x - 1][y], gold_est[x][y - 1]) - self.decay + self.view[x][y] # print(i, j, self.state.mapInfo.gold_amount(i, j)) # print(gold_est) arr.append(gold_est) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): for t in range(len(arr)): if gold_opt[i][j] < arr[t][i][j]: gold_opt[i][j] = arr[t][i][j] # print(gold_opt) return np.array(gold_opt) def check_terminate(self): return self.state.status != State.STATUS_PLAYING
class MyBot: ACTION_GO_LEFT = 0 ACTION_GO_RIGHT = 1 ACTION_GO_UP = 2 ACTION_GO_DOWN = 3 ACTION_FREE = 4 ACTION_CRAFT = 5 def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() self.is_moving_right = True # default: go to right side self.steps = 0 self.pre_action = 0 self.pre_x = -1 self.pre_y = -1 self.search_left_right = True self.largest_gold_x = -1 self.largest_gold_y = -1 self.left_or_right = 2 def start(self): # connect to server self.socket.connect() def end(self): # disconnect server self.socket.close() def send_map_info(self, request): # tell server which map to run self.socket.send(request) def reset(self): # start new game try: message = self.socket.receive() # receive game info from server self.state.init_state(message) # init state except Exception as e: import traceback traceback.print_exc() def step(self, tmp_action): # step process self.socket.send(tmp_action) # send action to server try: message = self.socket.receive() # receive new state from server self.state.update_state(message) # update to local state except Exception as e: import traceback traceback.print_exc() def check_terminate(self): # Checking the status of the game # it indicates the game ends or is playing return self.state.status != State.STATUS_PLAYING def getTotalGoldBetweenTwoPoints(self, src_x, src_y, des_x, des_y, initial_flag=False): total_gold = 0 start_x = src_x end_x = des_x start_y = src_y end_y = des_y if src_x < des_x: start_x = src_x end_x = des_x elif src_x > des_x: start_x = des_x end_x = src_x if src_y < des_y: start_y = src_y end_y = des_y elif src_y > des_y: start_y = des_y end_y = src_y for gold in self.state.mapInfo.golds: x = gold["posx"] y = gold["posy"] if x != des_x or y != des_y: if x != src_x or y != src_y: if (start_x <= gold["posx"] <= end_x) and (start_y <= gold["posy"] <= end_y): gold_on_ground = gold["amount"] distance = abs(x - self.state.x) + abs(y - self.state.y) count_players = 0 for player in self.state.players: if player["posx"] == x and player["posy"] == y: if "energy" in player: if player["status"] == self.state.STATUS_PLAYING: count_players += 1 elif initial_flag: # 0 step, initial state count_players += 1 total_gold += (gold_on_ground - (count_players * distance * 50)) / (count_players + 1) return total_gold def get_num_of_gold_position(self): num_of_gold_position = 0 for gold in self.state.mapInfo.golds: if gold["amount"] > 0: num_of_gold_position += 1 return num_of_gold_position def get_num_of_players_at_position(self, x, y, initial_flag=False): num_of_players = 0 for player in self.state.players: if "energy" in player: if player["status"] == self.state.STATUS_PLAYING: if player["posx"] == x and player["posy"] == y: num_of_players += 1 elif initial_flag: # 0 step, initial state if player["posx"] == x and player["posy"] == y: num_of_players += 1 return num_of_players def findNearestGold(self, my_bot_x, my_bot_y): min_distance = 100 for gold in self.state.mapInfo.golds: if gold["amount"] > 0: i = gold["posx"] j = gold["posy"] distance = abs(i - my_bot_x) + abs(j - my_bot_y) min_distance = min(min_distance, distance) return min_distance def goLeftOrRight(self, my_bot_x, my_bot_y, initial_flag=False): total_gold_left = 0 total_gold_right = 0 for gold in self.state.mapInfo.golds: gold_amount = gold["amount"] if gold_amount > 0: i = gold["posx"] j = gold["posy"] if i >= my_bot_x: total_gold_right += gold_amount if i <= my_bot_x: total_gold_left += gold_amount count_players_left = 0 count_players_right = 0 for player in self.state.players: if "energy" in player: if player["status"] == self.state.STATUS_PLAYING: if player["posx"] >= my_bot_x: count_players_right += 1 if player["posx"] <= my_bot_x: count_players_left += 1 elif initial_flag: # 0 step, initial state if player["posx"] >= my_bot_x: count_players_right += 1 if player["posx"] <= my_bot_x: count_players_left += 1 total_gold_left = total_gold_left / count_players_left total_gold_right = total_gold_right / count_players_right # 1 ==> left; 2 ==> both; 3 ==> right if total_gold_left > total_gold_right: return 1 elif total_gold_left == total_gold_right: return 2 else: return 3 def myGetGoldAmount(self, x, y, initial_flag=False, are_we_here=False): distance = abs(x - self.state.x) + abs(y - self.state.y) gold_on_ground = self.state.mapInfo.gold_amount(x, y) if gold_on_ground == 0: return 0 count_players = 0 for player in self.state.players: if player["posx"] == x and player["posy"] == y: if "energy" in player: if player["status"] == self.state.STATUS_PLAYING: count_players += 1 elif initial_flag: # 0 step, initial state count_players += 1 if are_we_here: return gold_on_ground / count_players else: return (gold_on_ground - (count_players * distance * 50)) / (count_players + 1) - (distance * 50) #return gold_on_ground / (count_players + 1) - (distance * 50) - (50 * count_players * distance) # +1 because assuming that we will come here def findLargestGold(self, steps, initial_flag=False, leftOrRight=2): my_bot_x, my_bot_y = self.state.x, self.state.y largest_gold_x = -1 largest_gold_y = -1 pre_gold = 0 max_gold = -100000 for goal in self.state.mapInfo.golds: if goal["posx"] != my_bot_x or goal["posy"] != my_bot_y: if goal["posx"] != self.pre_x or goal["posy"] != self.pre_y: i = goal["posx"] j = goal["posy"] distance = abs(i - self.state.x) + abs(j - self.state.y) if steps < 80 or 100 - steps > distance: if leftOrRight == 2: if goal["amount"] > 0: count_players = 0 for player in self.state.players: if player["posx"] == i and player["posy"] == j: if "energy" in player: if player["status"] == self.state.STATUS_PLAYING: count_players += 1 elif initial_flag: # 0 step, initial state count_players += 1 #gold_amount = (goal["amount"] / (count_players + 1)) - (distance * 50) - (50 * count_players * distance) gold_amount = (goal["amount"] - (count_players * distance * 50)) / (count_players + 1) - (distance * 50) gold_amount += self.getTotalGoldBetweenTwoPoints(my_bot_x, my_bot_y, i, j, initial_flag) if gold_amount > max_gold: largest_gold_x = i largest_gold_y = j max_gold = gold_amount pre_gold = goal["amount"] elif gold_amount == max_gold: if goal["amount"] > pre_gold: # prev_distance = (largest_gold_x - my_bot_x) * (largest_gold_x - my_bot_x) + \ # (largest_gold_y - my_bot_y) * (largest_gold_y - my_bot_y) # new_distance = (i - my_bot_x) * (i - my_bot_x) + (j - my_bot_y) * (j - my_bot_y) # if new_distance < prev_distance: largest_gold_x = i largest_gold_y = j max_gold = gold_amount pre_gold = goal["amount"] # only search at left side if leftOrRight == 1: if goal["amount"] > 0: if i <= my_bot_x: count_players = 0 for player in self.state.players: if player["posx"] == i and player["posy"] == j: if "energy" in player: if player["status"] == self.state.STATUS_PLAYING: count_players += 1 elif initial_flag: # 0 step, initial state count_players += 1 #gold_amount = (goal["amount"] / (count_players + 1)) - (distance * 50) - ( # 50 * count_players * distance) gold_amount = (goal["amount"] - (count_players * distance * 50)) / (count_players + 1) - (distance * 50) gold_amount += self.getTotalGoldBetweenTwoPoints(my_bot_x, my_bot_y, i, j, initial_flag) if gold_amount > max_gold: largest_gold_x = i largest_gold_y = j max_gold = gold_amount pre_gold = goal["amount"] elif gold_amount == max_gold: if goal["amount"] > pre_gold: #prev_distance = (largest_gold_x - my_bot_x) * (largest_gold_x - my_bot_x) + \ # (largest_gold_y - my_bot_y) * (largest_gold_y - my_bot_y) #new_distance = (i - my_bot_x) * (i - my_bot_x) + (j - my_bot_y) * (j - my_bot_y) #if new_distance < prev_distance: largest_gold_x = i largest_gold_y = j max_gold = gold_amount pre_gold = goal["amount"] # only search at right side if leftOrRight == 3: if goal["amount"] > 0: if i >= my_bot_x: count_players = 0 for player in self.state.players: if player["posx"] == i and player["posy"] == j: if "energy" in player: if player["status"] == self.state.STATUS_PLAYING: count_players += 1 elif initial_flag: # 0 step, initial state count_players += 1 #gold_amount = (goal["amount"] / (count_players + 1)) - (distance * 50) - ( # 50 * count_players * distance) gold_amount = (goal["amount"] - (count_players * distance * 50)) / (count_players + 1) - (distance * 50) gold_amount += self.getTotalGoldBetweenTwoPoints(my_bot_x, my_bot_y, i, j, initial_flag) if gold_amount > max_gold: largest_gold_x = i largest_gold_y = j max_gold = gold_amount pre_gold = goal["amount"] elif gold_amount == max_gold: if goal["amount"] > pre_gold: #prev_distance = (largest_gold_x - my_bot_x) * (largest_gold_x - my_bot_x) + \ # (largest_gold_y - my_bot_y) * (largest_gold_y - my_bot_y) #new_distance = (i - my_bot_x) * (i - my_bot_x) + (j - my_bot_y) * (j - my_bot_y) #if new_distance < prev_distance: largest_gold_x = i largest_gold_y = j max_gold = gold_amount pre_gold = goal["amount"] return largest_gold_x, largest_gold_y def findLargestGoldInSmallMap(self, des_x, des_y): x, y = self.state.x, self.state.y largest_gold_x = None largest_gold_y = None next_step_x = 0 next_step_y = 0 pre_gold = 0 if x < des_x: next_step_x = 1 else: next_step_x = -1 if y < des_y: next_step_y = 1 else: next_step_y = -1 max_gold = -100000 while x != des_x + next_step_x: while y != des_y + next_step_y: if x != des_x or y != des_y: if self.state.x != x or self.state.y != y: gold_amount = self.myGetGoldAmount(x, y) if gold_amount > 0: if gold_amount > max_gold: largest_gold_x = x largest_gold_y = y max_gold = gold_amount pre_gold = self.state.mapInfo.gold_amount(x, y) elif gold_amount == max_gold: if self.state.mapInfo.gold_amount(x, y) > pre_gold: #prev_distance = (largest_gold_x - self.state.x) * (largest_gold_x - self.state.x) + \ # (largest_gold_y - self.state.y) * (largest_gold_y - self.state.y) #new_distance = (x - self.state.x) * (x - self.state.x) + (y - self.state.y) * (y - self.state.y) #if new_distance < prev_distance: largest_gold_x = x largest_gold_y = y max_gold = gold_amount pre_gold = self.state.mapInfo.gold_amount(x, y) y += next_step_y y = self.state.y x += next_step_x return largest_gold_x, largest_gold_y def getEnergyAtPosition(self, x, y): energy = 1 tmp_type = 0 gold = self.state.mapInfo.gold_amount(x, y) if gold > 0: energy = 4 else: for obstacle in self.state.mapInfo.obstacles: i = obstacle["posx"] j = obstacle["posy"] if i == x and j == y: tmp_type = obstacle["type"] if tmp_type == 1: # Tree energy = 20 elif tmp_type == 2: # Trap if obstacle["value"] == -10: energy = 10 elif tmp_type == 3: # Swamp energy = -obstacle["value"] break return energy, tmp_type def getMinEnergyFromSrc2Des(self, x, y, des_x, des_y, action_option_1, action_option_2): if x == des_x and y == des_y: return 4 # gold energy # get energy at (x,y) energy = 1 energy, tmp_type = self.getEnergyAtPosition(x, y) if energy == 100: energy = 10000 energy_option_1 = 100000 energy_option_2 = 100000 if x != des_x: if action_option_1 == self.ACTION_GO_RIGHT: next_x = x + 1 else: next_x = x - 1 energy_option_1 = self.getMinEnergyFromSrc2Des(next_x, y, des_x, des_y, action_option_1, action_option_2) if y != des_y: if action_option_2 == self.ACTION_GO_DOWN: next_y = y + 1 else: next_y = y - 1 energy_option_2 = self.getMinEnergyFromSrc2Des(x, next_y, des_x, des_y, action_option_1, action_option_2) return energy + min(energy_option_1, energy_option_2) def getActionBaseOnEnergy(self, action_option_1, action_option_2): my_bot_x, my_bot_y = self.state.x, self.state.y n_action = action_option_1 require_energy = 100 if action_option_1 == self.ACTION_GO_RIGHT: next_x = my_bot_x + 1 else: next_x = my_bot_x - 1 if action_option_2 == self.ACTION_GO_DOWN: next_y = my_bot_y + 1 else: next_y = my_bot_y - 1 energy_1 = 1 energy_2 = 1 gold = self.state.mapInfo.gold_amount(next_x, my_bot_y) if gold > 0: energy_1 = 4 gold = self.state.mapInfo.gold_amount(my_bot_x, next_y) if gold > 0: energy_2 = 4 for obstacle in self.state.mapInfo.obstacles: i = obstacle["posx"] j = obstacle["posy"] if i == next_x and j == my_bot_y: if obstacle["type"] == 1: # Tree energy_1 = 20 elif obstacle["type"] == 2: # Trap if obstacle["value"] == -10: energy_1 = 10 elif obstacle["type"] == 3: # Swamp energy_1 = -obstacle["value"] if i == my_bot_x and j == next_y: if obstacle["type"] == 1: # Tree energy_2 = 20 elif obstacle["type"] == 2: # Trap if obstacle["value"] == -10: energy_2 = 10 elif obstacle["type"] == 3: # Swamp energy_2 = -obstacle["value"] if energy_1 < energy_2: n_action = action_option_1 require_energy = energy_1 else: n_action = action_option_2 require_energy = energy_2 if self.state.energy <= require_energy: n_action = self.ACTION_FREE #print("require_energy = {0}".format(require_energy)) #print("choose action = {0}".format(n_action)) return n_action def goToTarget(self, des_x, des_y): n_action = self.ACTION_FREE require_energy = 100 my_bot_x, my_bot_y = self.state.x, self.state.y next_my_bot_x = my_bot_x next_my_bot_y = my_bot_y tmp_my_bot_x_1 = my_bot_x tmp_my_bot_y_1 = my_bot_y tmp_my_bot_x_2 = my_bot_x tmp_my_bot_y_2 = my_bot_y tmp_action_1 = self.ACTION_FREE tmp_action_2 = self.ACTION_FREE if my_bot_x == des_x: if 0 <= (tmp_my_bot_x_1 + 1) <= self.state.mapInfo.max_x: tmp_my_bot_x_1 += 1 tmp_action_1 = self.ACTION_GO_RIGHT if 0 <= (tmp_my_bot_x_2 - 1) <= self.state.mapInfo.max_x: tmp_my_bot_x_2 -= 1 tmp_action_2 = self.ACTION_GO_LEFT if my_bot_y < des_y: n_action = self.ACTION_GO_DOWN next_my_bot_y += 1 else: n_action = self.ACTION_GO_UP next_my_bot_y -= 1 elif my_bot_y == des_y: if 0 <= (tmp_my_bot_y_1 + 1) <= self.state.mapInfo.max_y: tmp_my_bot_y_1 += 1 tmp_action_1 = self.ACTION_GO_DOWN if 0 <= (tmp_my_bot_y_2 - 1) <= self.state.mapInfo.max_y: tmp_my_bot_y_2 -= 1 tmp_action_2 = self.ACTION_GO_UP if my_bot_x < des_x: n_action = self.ACTION_GO_RIGHT next_my_bot_x += 1 else: n_action = self.ACTION_GO_LEFT next_my_bot_x -= 1 else: if my_bot_x < des_x: action_option_1 = self.ACTION_GO_RIGHT next_my_bot_x += 1 else: action_option_1 = self.ACTION_GO_LEFT next_my_bot_x -= 1 if my_bot_y < des_y: action_option_2 = self.ACTION_GO_DOWN next_my_bot_y += 1 else: action_option_2 = self.ACTION_GO_UP next_my_bot_y -= 1 distance = abs(my_bot_x - des_x) + abs(my_bot_y - des_y) if distance > 10 + 4: n_action = self.getActionBaseOnEnergy(action_option_1, action_option_2) else: energy_action_1 = self.getMinEnergyFromSrc2Des(next_my_bot_x, my_bot_y, des_x, des_y, action_option_1, action_option_2) energy_action_2 = self.getMinEnergyFromSrc2Des(my_bot_x, next_my_bot_y, des_x, des_y, action_option_1, action_option_2) if energy_action_1 <= energy_action_2: n_action = action_option_1 require_energy, tmp_type = self.getEnergyAtPosition(next_my_bot_x, my_bot_y) else: n_action = action_option_2 require_energy, tmp_type = self.getEnergyAtPosition(my_bot_x, next_my_bot_y) if self.state.energy <= require_energy: n_action = self.ACTION_FREE elif tmp_type != 3 and self.pre_action == self.ACTION_FREE and self.state.energy < 38 and self.steps < 70: return self.ACTION_FREE return n_action require_energy = 1 require_energy, tmp_type = self.getEnergyAtPosition(next_my_bot_x, next_my_bot_y) if require_energy >= 50: # ==100 tmp_require_energy_1, tmp_type_1 = self.getEnergyAtPosition(tmp_my_bot_x_1, tmp_my_bot_y_1) tmp_require_energy_2, tmp_type_2 = self.getEnergyAtPosition(tmp_my_bot_x_2, tmp_my_bot_y_2) tmp_require_energy = tmp_require_energy_1 tmp_action = tmp_action_1 if tmp_require_energy_2 < tmp_require_energy_1: tmp_require_energy = tmp_require_energy_2 tmp_action = tmp_action_2 if self.state.energy <= tmp_require_energy: return self.ACTION_FREE else: return tmp_action if self.state.energy <= require_energy: n_action = self.ACTION_FREE elif tmp_type != 3 and self.pre_action == self.ACTION_FREE and self.state.energy < 38 and self.steps < 70: return self.ACTION_FREE return n_action def next_action(self, initial_flag=False): my_bot_x, my_bot_y = self.state.x, self.state.y n_action = self.ACTION_FREE energy = self.state.energy gold_on_ground = self.myGetGoldAmount(my_bot_x, my_bot_y, initial_flag, are_we_here=True) remain_steps = 100 - self.steps if gold_on_ground > 0: if remain_steps <= (gold_on_ground // 50 + 1) or self.get_num_of_gold_position() == 1: #if remain_steps == 1: # n_action = self.ACTION_CRAFT #else: if energy <= 5: n_action = self.ACTION_FREE else: n_action = self.ACTION_CRAFT self.steps += 1 self.pre_action = n_action self.pre_x = my_bot_x self.pre_y = my_bot_y return n_action if gold_on_ground >= 50: if energy <= 5: n_action = self.ACTION_FREE elif energy >= (gold_on_ground/50)*5: n_action = self.ACTION_CRAFT elif self.pre_action == self.ACTION_FREE and energy < 38: n_action = self.ACTION_FREE else: n_action = self.ACTION_CRAFT elif gold_on_ground > 0 and self.steps > 80: if energy <= 5: n_action = self.ACTION_FREE elif energy >= (gold_on_ground/50)*5: n_action = self.ACTION_CRAFT elif self.pre_action == self.ACTION_FREE and energy < 38: n_action = self.ACTION_FREE else: n_action = self.ACTION_CRAFT else: # free if distance to nearest fold >= remain steps if self.findNearestGold(my_bot_x, my_bot_y) >= remain_steps: n_action = self.ACTION_FREE else: if self.largest_gold_x != -1 and self.largest_gold_y != -1: if self.get_num_of_players_at_position(self.largest_gold_x, self.largest_gold_y, initial_flag) > 0: self.search_left_right = True if self.steps < 80 and self.search_left_right == True: #if self.steps < 50: #if self.steps < 16: self.left_or_right = self.goLeftOrRight(my_bot_x, my_bot_y, initial_flag) self.search_left_right = False elif self.steps >= 80: self.left_or_right = 2 largest_gold_x, largest_gold_y = self.findLargestGold(self.steps, initial_flag, self.left_or_right) if largest_gold_x < 0 or largest_gold_y < 0: self.left_or_right = 2 n_action = self.ACTION_FREE self.steps += 1 self.pre_action = n_action self.pre_x = my_bot_x self.pre_y = my_bot_y return n_action self.largest_gold_x = largest_gold_x self.largest_gold_y = largest_gold_y target_x = largest_gold_x target_y = largest_gold_y while True: tmp_x, tmp_y = self.findLargestGoldInSmallMap(target_x, target_y) if (tmp_x is None) or (tmp_y is None): break target_x = tmp_x target_y = tmp_y n_action = self.goToTarget(target_x, target_y) self.steps += 1 self.pre_action = n_action self.pre_x = my_bot_x self.pre_y = my_bot_y return n_action
class MinerEnv: def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() self.score_pre = self.state.score # Storing the last score for designing the reward function def start(self): # connect to server self.socket.connect() def end(self): # disconnect server self.socket.close() def send_map_info(self, request): # tell server which map to run self.socket.send(request) def reset(self): # start new game try: message = self.socket.receive() # receive game info from server self.state.init_state(message) # init state except Exception as e: import traceback traceback.print_exc() def step(self, action): # step process self.socket.send(action) # send action to server try: message = self.socket.receive() # receive new state from server self.state.update_state(message) # update to local state except Exception as e: import traceback traceback.print_exc() # Functions are customized by client def get_state(self): # Building the map view = np.zeros( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.state.mapInfo.get_obstacle(i, j) == TreeID: # Tree view[i, j] = -TreeID if self.state.mapInfo.get_obstacle(i, j) == TrapID: # Trap view[i, j] = -TrapID if self.state.mapInfo.get_obstacle(i, j) == SwampID: # Swamp view[i, j] = -SwampID if self.state.mapInfo.gold_amount(i, j) > 0: view[i, j] = self.state.mapInfo.gold_amount(i, j) DQNState = view.flatten().tolist( ) # Flattening the map matrix to a vector # Add position and energy of agent to the DQNState next_round_energy = self.get_next_round_engergy() DQNState.append(next_round_energy) DQNState.append(self.state.x) DQNState.append(self.state.y) DQNState.append(self.state.score) DQNState.append(self.state.energy) # DQNState.append(self.state) # Add position of bots for player in self.state.players: if player["playerId"] != self.state.id: DQNState.append(player["posx"]) DQNState.append(player["posy"]) energy = 0 score = 0 free_count = 0 if 'energy' in player: energy = player["energy"] if 'score' in player: score = player["score"] if 'free_count' in player: free_count = player["free_count"] DQNState.append(energy) DQNState.append(score) DQNState.append(free_count) # Convert the DQNState from list to array for training DQNState = np.array(DQNState) return DQNState def get_next_round_engergy(self): free_count = 0 for p in self.state.players: if p['playerId'] == self.state.id: free_count = p['freeCount'] next_e = self.state.energy for i in range(4 - free_count): next_e += next_e / max(i, 1) return next_e def dig_score(self): pass def get_reward(self): # Calculate reward reward = 0 score_action = self.state.score # - self.score_pre self.score_pre = self.state.score if score_action > 0: # If the DQN agent crafts golds, then it should obtain a positive reward (equal score_action) reward += score_action # print('Craft gold : {}'.format(score_action)) next_e = self.get_next_round_engergy() if next_e <= 0: # Do not stand while you have full energy :( reward -= 100 if next_e >= 50 and self.state.lastAction == 4: reward -= 100 # If the DQN agent crashs into obstacels (Tree, Trap, Swamp), then it should be punished by a negative reward if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TreeID: # Tree reward -= TreeID if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TrapID: # Trap reward -= TrapID if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == SwampID: # Swamp reward -= SwampID # If out of the map, then the DQN agent should be punished by a larger nagative reward. if self.state.status == State.STATUS_ELIMINATED_WENT_OUT_MAP: reward += -100 # Run out of energy, then the DQN agent should be punished by a larger nagative reward. if self.state.status == State.STATUS_ELIMINATED_OUT_OF_ENERGY: reward += -100 return reward / 100. def check_terminate(self): # Checking the status of the game # it indicates the game ends or is playing return self.state.status != State.STATUS_PLAYING
class MinerEnv: def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() self.score_pre = self.state.score #Storing the last score for designing the reward function self.decay = 27 self.area_affect = 3 self.affect_eff = 0.92 self.view = None self.energy_view = None self.current_action = None self.gold_map = None self.gold_map_origin = None def start(self): #connect to server self.socket.connect() def end(self): #disconnect server self.socket.close() def send_map_info(self, request): #tell server which map to run self.socket.send(request) def reset(self): #start new game # Choosing a map in the list # mapID = np.random.randint(1, 6) # Choosing a map ID from 5 maps in Maps folder randomly mapID = 1 posID_x = np.random.randint( MAP_MAX_X) # Choosing a initial position of the DQN agent on # posID_x = 12 # X-axes randomly posID_y = np.random.randint( MAP_MAX_Y ) # Choosing a initial position of the DQN agent on Y-axes randomly # posID_y = 1 # Creating a request for initializing a map, initial position, the initial energy, and the maximum number of steps of the DQN agent request = ("map" + str(mapID) + "," + str(posID_x) + "," + str(posID_y) + ",50,100") # Send the request to the game environment (GAME_SOCKET_DUMMY.py) self.send_map_info(request) try: message = self.socket.receive() #receive game info from server print(message) self.state.init_state(message) #init state print(self.state.score) except Exception as e: import traceback traceback.print_exc() def step(self, action): #step process self.socket.send(action) #send action to server try: message = self.socket.receive() #receive new state from server #print("New state: ", message) self.state.update_state(message) #update to local state print(self.state.score) except Exception as e: import traceback traceback.print_exc() # Functions are customized by client def get_state(self): # Building the map self.view = np.zeros( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int) self.energy_view = np.zeros( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int) self.gold_map = np.zeros( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int) gold_opt = np.zeros( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.state.mapInfo.get_obstacle(i, j) == TreeID: # Tree self.view[i, j] = -20 self.energy_view[i, j] = -20 elif self.state.mapInfo.get_obstacle(i, j) == TrapID: # Trap self.view[i, j] = -10 self.energy_view[i, j] = -10 elif self.state.mapInfo.get_obstacle(i, j) == SwampID: # Swamp self.view[i, j] = self.state.mapInfo.get_obstacle_value(i, j) self.energy_view[ i, j] = self.state.mapInfo.get_obstacle_value(i, j) elif self.state.mapInfo.gold_amount(i, j) > 0: self.view[i, j] = self.state.mapInfo.gold_amount(i, j) self.energy_view[i, j] = -4 self.gold_map[i, j] = self.state.mapInfo.gold_amount(i, j) else: self.view[i, j] = -1 self.energy_view[i, j] = -1 self.gold_map_origin = copy.deepcopy(self.gold_map) # print(self.gold_map) # player update goldmap for player in self.state.players: if player["playerId"] != self.state.id: x = player["posx"] y = player["posy"] if 0 <= x <= self.state.mapInfo.max_x and 0 <= y <= self.state.mapInfo.max_y: if self.gold_map[x][y] > 0: if x != self.state.x or y != self.state.y: self.gold_map[x][y] = self.gold_map[x][y] * 0.63 self.view[x][y] = self.gold_map[x][y] else: for t in range(1, self.area_affect + 1): for k in range(-t, t): if 0 <= x + k <= self.state.mapInfo.max_x and 0 <= y + t - abs( k) <= self.state.mapInfo.max_y: if self.gold_map[x + k][y + t - abs(k)] > 0: self.gold_map[x + k][ y + t - abs(k)] = self.gold_map[ x + k][y + t - abs(k)] * pow( self.affect_eff, self.area_affect + 1 - t) self.view[x + k][y + t - abs(k)] = self.gold_map[ x + k][y + t - abs(k)] if 0 <= x - k <= self.state.mapInfo.max_x and 0 <= y - t + abs( k) <= self.state.mapInfo.max_y: if self.gold_map[x - k][y - t + abs(k)] > 0: self.gold_map[x - k][ y - t + abs(k)] = self.gold_map[ x - k][y - t + abs(k)] * pow( self.affect_eff, self.area_affect + 1 - t) self.view[x - k][y - t + abs(k)] = self.gold_map[ x - k][y - t + abs(k)] print(self.gold_map) arr = [] for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.state.mapInfo.gold_amount(i, j) > 0: gold_est = np.zeros([ self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1 ], dtype=int) gold_est[i][j] = self.gold_map[i][j] for a in range(0, i): gold_est[i - a - 1][j] = max( gold_est[i - a][j] - self.decay + self.view[i - a - 1][j], 0) for b in range(i + 1, self.state.mapInfo.max_x + 1): gold_est[b][j] = max( gold_est[b - 1][j] - self.decay + self.view[b][j], 0) for c in range(0, j): gold_est[i][j - c - 1] = max( gold_est[i][j - c] - self.decay + self.view[i][j - c - 1], 0) for d in range(j + 1, self.state.mapInfo.max_y + 1): gold_est[i][d] = max( gold_est[i][d - 1] - self.decay + self.view[i][d], 0) for x in range(0, i): for y in range(0, j): gold_est[i - x - 1][j - y - 1] = max(gold_est[i - x][j - y - 1], gold_est[i - x - 1][j - y]) - self.decay + \ self.view[i - x - 1][j - y - 1] for x in range(0, i): for y in range(j + 1, self.state.mapInfo.max_y + 1): gold_est[i - x - 1][y] = max(gold_est[i - x][y], gold_est[i - x - 1][y - 1]) - self.decay + \ self.view[i - x - 1][y] for x in range(i + 1, self.state.mapInfo.max_x + 1): for y in range(0, j): gold_est[x][j - y - 1] = max(gold_est[x][j - y], gold_est[x - 1][j - y - 1]) - self.decay + \ self.view[x][j - y - 1] for x in range(i + 1, self.state.mapInfo.max_x + 1): for y in range(j + 1, self.state.mapInfo.max_y + 1): gold_est[x][y] = max( gold_est[x - 1][y], gold_est[x][y - 1]) - self.decay + self.view[x][y] # print(i, j, self.state.mapInfo.gold_amount(i, j)) # print(gold_est) arr.append(gold_est) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): for t in range(len(arr)): if gold_opt[i][j] < arr[t][i][j]: gold_opt[i][j] = arr[t][i][j] # print(gold_opt) return np.array(gold_opt) def check_terminate(self): return self.state.status != State.STATUS_PLAYING
class Bot_DDPG: ACTION_GO_LEFT = 0 ACTION_GO_RIGHT = 1 ACTION_GO_UP = 2 ACTION_GO_DOWN = 3 ACTION_FREE = 4 ACTION_CRAFT = 5 def __init__(self, id): self.state = State() self.info = PlayerInfo(id) self.limit = 2 state_dim = (2*self.limit+1)**2 + 3 action_dim = 6 max_action = 1.0 # load model kwargs = { "state_dim": state_dim, "action_dim": action_dim, "max_action": max_action, } policy_file = "DDPG_Miner_0_2" self.TreeID = 1 self.TrapID = 2 self.SwampID = 3 self.policy = DDPG.DDPG(**kwargs) self.policy.load(f"./models_DDPG/{policy_file}") def next_action(self): s = self.get_state2(self.limit) action, _ = self.policy.predict_action(s) return int(action) # if self.info.posx == 9: # return 4 # return 0 def get_state2(self, limit): # Building the map view = np.zeros([limit*2+1, limit*2+1], dtype=int) max_x, max_y = self.state.mapInfo.max_x, self.state.mapInfo.max_y xlimit_below = np.clip(self.info.posx - limit, 0, max_x) - np.clip(self.info.posx + limit - max_x, 0, limit) xlimit_up = np.clip(self.info.posx + limit, 0, max_x) + np.clip(0 - self.info.posx + limit, 0, limit) ylimit_below = np.clip(self.info.posy - limit, 0, max_y) - np.clip(self.info.posy + limit - max_y, 0, limit) ylimit_up = np.clip(self.info.posy + limit, 0, max_y) + np.clip(0 - self.info.posy + limit, 0, limit) #print(xlimit_below, xlimit_up, ylimit_below, ylimit_up, self.info.posx, self.info.posy) for i in range(xlimit_below, xlimit_up + 1): for j in range(ylimit_below, ylimit_up + 1): if self.state.mapInfo.get_obstacle(i, j) == self.TreeID: # Tree view[i - xlimit_below, j - ylimit_below] = -self.TreeID if self.state.mapInfo.get_obstacle(i, j) == self.TrapID: # Trap view[i - xlimit_below, j - ylimit_below] = -self.TrapID if self.state.mapInfo.get_obstacle(i, j) == self.SwampID: # Swamp view[i - xlimit_below, j - ylimit_below] = -self.SwampID if self.state.mapInfo.gold_amount(i, j) > 0: view[i - xlimit_below, j - ylimit_below] = self.state.mapInfo.gold_amount(i, j)/10 DQNState = view.flatten().tolist() #Flattening the map matrix to a vector # Add position and energy of agent to the DQNState DQNState.append(self.info.posx - xlimit_below) DQNState.append(self.info.posy - ylimit_below) DQNState.append(self.info.energy) #Add position of bots # for player in self.state.players: # if player["playerId"] != self.state.id: # DQNState.append(player["posx"]) # DQNState.append(player["posy"]) #Convert the DQNState from list to array for training DQNState = np.array(DQNState) return DQNState def new_game(self, data): try: self.state.init_state(data) except Exception as e: import traceback traceback.print_exc() def new_state(self, data): # action = self.next_action(); # self.socket.send(action) try: self.state.update_state(data) except Exception as e: import traceback traceback.print_exc()
class MinerEnv: def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() self.score_pre = self.state.score #Storing the last score for designing the reward function self.pos_x_pre = self.state.x self.pos_y_pre = self.state.y def start(self): #connect to server self.socket.connect() def end(self): #disconnect server self.socket.close() def send_map_info(self, request): #tell server which map to run self.socket.send(request) def reset(self): #start new game try: message = self.socket.receive() #receive game info from server self.state.init_state(message) #init state self.score_pre = self.state.score #Storing the last score for designing the reward function self.pos_x_pre = self.state.x self.pos_y_pre = self.state.y except Exception as e: import traceback traceback.print_exc() def step(self, action): #step process self.socket.send(action) #send action to server try: message = self.socket.receive() #receive new state from server self.state.update_state(message) #update to local state except Exception as e: import traceback traceback.print_exc() # Functions are customized by client def get_state(self): #Local view view = np.zeros([5, 5]) for i in range(-2, 3): for j in range(-2, 3): index_x = self.state.x + i index_y = self.state.y + j if index_x < 0 or index_y < 0 or index_x >= self.state.mapInfo.max_x or index_y >= self.state.mapInfo.max_y: view[2 + i, 2 + j] = -1 else: if self.state.mapInfo.get_obstacle(index_x, index_y) == TreeID: view[2 + i, 2 + j] = -1 if self.state.mapInfo.get_obstacle(index_x, index_y) == TrapID: view[2 + i, 2 + j] = -1 if self.state.mapInfo.get_obstacle(index_x, index_y) == SwampID: view[2 + i, 2 + j] = -1 #Create the state DQNState = view.flatten().tolist() self.pos_x_gold_first = self.state.x self.pos_y_gold_first = self.state.y if len(self.state.mapInfo.golds) > 0: self.pos_x_gold_first = self.state.mapInfo.golds[0]["posx"] self.pos_y_gold_first = self.state.mapInfo.golds[0]["posy"] DQNState.append(self.pos_x_gold_first - self.state.x) DQNState.append(self.pos_y_gold_first - self.state.y) #Convert the DQNState from list to array for training DQNState = np.array(DQNState) return DQNState def get_reward(self): # Calculate reward reward = 0 goldamount = self.state.mapInfo.gold_amount(self.state.x, self.state.y) if goldamount > 0: reward += 10 #goldamount #remove the gold for g in self.socket.stepState.golds: if g.posx == self.state.x and g.posy == self.state.y: self.socket.stepState.golds.remove(g) #If the DQN agent crashs into obstacels (Tree, Trap, Swamp), then it should be punished by a negative reward if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TreeID: # Tree reward -= 0.2 if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TrapID: # Trap reward -= 0.2 if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == SwampID: # Swamp reward -= 0.2 dis_pre = np.sqrt((self.pos_x_pre - self.pos_x_gold_first)**2 + (self.pos_y_pre - self.pos_y_gold_first)**2) dis_curr = np.sqrt((self.state.x - self.pos_x_gold_first)**2 + (self.state.y - self.pos_y_gold_first)**2) if (dis_curr - dis_pre) <= 0: #Reducing the distance , reward ++ reward += 0.1 else: reward -= 0.1 # If out of the map, then the DQN agent should be punished by a larger nagative reward. if self.state.status == State.STATUS_ELIMINATED_WENT_OUT_MAP: reward += -10 return reward def check_terminate(self): #Checking the status of the game #it indicates the game ends or is playing return self.state.status != State.STATUS_PLAYING
class Bot2: ACTION_GO_LEFT = 0 ACTION_GO_RIGHT = 1 ACTION_GO_UP = 2 ACTION_GO_DOWN = 3 ACTION_FREE = 4 ACTION_CRAFT = 5 def __init__(self, id): self.state = State() self.info = PlayerInfo(id) self.isMovingInc = False def next_action(self): if (self.info.status!=0 and self.state.stepCount < 100): print ("WTF",self.info.status) countPlayerAtGoldMine = 0 x, y= self.info.posx,self.info.posy r_Action = self.ACTION_FREE #for safe if (self.isKeepFree ): self.isKeepFree = False return r_Action # 1st rule. Heighest Priority. Craft & Survive if (valid(y,x)): goldOnGround = self.state.mapInfo.gold_amount(x, y) countPlayerAtGoldMine = 1 for player in self.state.players: px,py,pId = player['posx'],player['posy'],player['playerId'] if (pId!=self.info.playerId): if (px==x and py==y): countPlayerAtGoldMine += 1 if ( goldOnGround > 0 and countPlayerAtGoldMine >0): if ( goldOnGround > 0 and self.info.energy > 5): r_Action = self.ACTION_CRAFT else : if (self.state.mapInfo.is_column_has_gold(x)): tx = x ty = -1 for dy in range (0,9,1): if (self.state.mapInfo.gold_amount(x,dy) > 0): ty = dy break if (ty>y): r_Action = self.ACTION_GO_DOWN else : r_Action = self.ACTION_GO_UP else : if (x == 20) : self.isMovingInc = False if (x == 0) : self.isMovingInc = True if (self.isMovingInc): r_Action = self.ACTION_GO_RIGHT else : r_Action = self.ACTION_GO_LEFT else : print ("INVALID WTF") if (r_Action == self.ACTION_CRAFT and self.info.energy < 5): r_Action = self.ACTION_FREE safeEnergy = 20*(1+randrange(0,1)) if (self.info.energy < safeEnergy): r_Action = self.ACTION_FREE return r_Action def new_game(self, data): try: self.isKeepFree = False self.state.init_state(data) except Exception as e: import traceback traceback.print_exc() def new_state(self, data): # action = self.next_action(); # self.socket.send(action) try: self.state.update_state(data) except Exception as e: import traceback traceback.print_exc() def printInfo(self): print ("G_BOT",self.info.playerId,self.estWood,self.pEnergyToStep,self.pStepToGold,self.info.score,self.info.energy)
class MinerEnv: def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() self.score_pre = ( self.state.score ) # Storing the last score for designing the reward function def start(self): # connect to server self.socket.connect() def end(self): # disconnect server self.socket.close() def send_map_info(self, request): # tell server which map to run self.socket.send(request) def reset(self): # start new game try: message = self.socket.receive() # receive game info from server self.state.init_state(message) # init state except Exception as e: import traceback traceback.print_exc() def step(self, action): # step process self.socket.send(action) # send action to server try: message = self.socket.receive() # receive new state from server self.state.update_state(message) # update to local state except Exception as e: import traceback traceback.print_exc() # Functions are customized by client def get_state(self): # Building the map view = np.zeros( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.state.mapInfo.get_obstacle(i, j) == TreeID: # Tree view[i, j] = -TreeID if self.state.mapInfo.get_obstacle(i, j) == TrapID: # Trap view[i, j] = -TrapID if self.state.mapInfo.get_obstacle(i, j) == SwampID: # Swamp view[i, j] = -SwampID if self.state.mapInfo.gold_amount(i, j) > 0: view[i, j] = self.state.mapInfo.gold_amount(i, j) DQNState = view.flatten().tolist( ) # Flattening the map matrix to a vector # Add position and energy of agent to the DQNState DQNState.append(self.state.x) DQNState.append(self.state.y) DQNState.append(self.state.energy) # Add position of bots for player in self.state.players: if player["playerId"] != self.state.id: DQNState.append(player["posx"]) DQNState.append(player["posy"]) # Convert the DQNState from list to array for training DQNState = np.array(DQNState) return DQNState def get_reward(self): # Calculate reward reward = 0 score_action = self.state.score - self.score_pre self.score_pre = self.state.score if score_action > 0: # If the DQN agent crafts golds, then it should obtain a positive reward (equal score_action) reward += score_action # If the DQN agent crashs into obstacels (Tree, Trap, Swamp), then it should be punished by a negative reward if (self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TreeID): # Tree reward -= TreeID if (self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TrapID): # Trap reward -= TrapID if (self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == SwampID): # Swamp reward -= SwampID # If out of the map, then the DQN agent should be punished by a larger nagative reward. if self.state.status == State.STATUS_ELIMINATED_WENT_OUT_MAP: reward += -10 # Run out of energy, then the DQN agent should be punished by a larger nagative reward. if self.state.status == State.STATUS_ELIMINATED_OUT_OF_ENERGY: reward += -10 # print ("reward",reward) return reward def check_terminate(self): # Checking the status of the game # it indicates the game ends or is playing return self.state.status != State.STATUS_PLAYING
class MinerEnv: def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() self.score_pre = self.state.score #Storing the last score for designing the reward function def start(self): #connect to server self.socket.connect() def end(self): #disconnect server self.socket.close() def send_map_info(self, request): #tell server which map to run self.socket.send(request) def reset(self): #start new game try: message = self.socket.receive() #receive game info from server print(message) self.state.init_state(message) #init state except Exception as e: import traceback traceback.print_exc() def step(self, action): #step process self.socket.send(action) #send action to server try: message = self.socket.receive() #receive new state from server #print("New state: ", message) self.state.update_state(message) #update to local state except Exception as e: import traceback traceback.print_exc() # Functions are customized by client def get_state(self): #Local view view = np.zeros([5, 5]) for i in range(-2, 3): for j in range(-2, 3): index_x = self.state.x + i index_y = self.state.y + j if index_x < 0 or index_y < 0 or index_x >= self.state.mapInfo.max_x or index_y >= self.state.mapInfo.max_y: view[2 + i, 2 + j] = -1 else: if self.state.mapInfo.get_obstacle(index_x, index_y) == TreeID: view[2 + i, 2 + j] = -1 if self.state.mapInfo.get_obstacle(index_x, index_y) == TrapID: view[2 + i, 2 + j] = -1 if self.state.mapInfo.get_obstacle(index_x, index_y) == SwampID: view[2 + i, 2 + j] = -1 #Create the state DQNState = view.flatten().tolist() self.pos_x_gold_first = self.state.x self.pos_y_gold_first = self.state.y if len(self.state.mapInfo.golds) > 0: self.pos_x_gold_first = self.state.mapInfo.golds[0]["posx"] self.pos_y_gold_first = self.state.mapInfo.golds[0]["posy"] DQNState.append(self.pos_x_gold_first - self.state.x) DQNState.append(self.pos_y_gold_first - self.state.y) #Convert the DQNState from list to array for training DQNState = np.array(DQNState) return DQNState def check_terminate(self): return self.state.status != State.STATUS_PLAYING
class MinerEnv: def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() self.score_pre = self.state.score #Storing the last score for designing the reward function def start(self): #connect to server self.socket.connect() def end(self): #disconnect server self.socket.close() def send_map_info(self, request): #tell server which map to run self.socket.send(request) def reset(self): #start new game # Choosing a map in the list # mapID = np.random.randint(1, 6) # Choosing a map ID from 5 maps in Maps folder randomly mapID = 1 posID_x = np.random.randint( MAP_MAX_X) # Choosing a initial position of the DQN agent on # X-axes randomly posID_y = np.random.randint( MAP_MAX_Y ) # Choosing a initial position of the DQN agent on Y-axes randomly # Creating a request for initializing a map, initial position, the initial energy, and the maximum number of steps of the DQN agent request = ("map" + str(mapID) + "," + str(posID_x) + "," + str(posID_y) + ",50,100") # Send the request to the game environment (GAME_SOCKET_DUMMY.py) self.send_map_info(request) try: message = self.socket.receive() #receive game info from server print(message) self.state.init_state(message) #init state print(self.state.score) except Exception as e: import traceback traceback.print_exc() def step(self, action): #step process self.socket.send(action) #send action to server try: message = self.socket.receive() #receive new state from server #print("New state: ", message) self.state.update_state(message) #update to local state print(self.state.score) except Exception as e: import traceback traceback.print_exc() # Functions are customized by client def get_state(self): # Building the map view = np.zeros( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1, 2], dtype="float32") self.gold_map = np.zeros( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.state.mapInfo.get_obstacle(i, j) == TreeID: # Tree view[i, j, 0] = -20 * 1.0 / 20 # view[i, j, 0] = -TreeID elif self.state.mapInfo.get_obstacle(i, j) == TrapID: # Trap view[i, j, 0] = -10 * 1.0 / 20 # view[i, j, 0] = -TrapID elif self.state.mapInfo.get_obstacle(i, j) == SwampID: # Swamp view[i, j, 0] = self.state.mapInfo.get_obstacle_value( i, j) * 1.0 / 20 # view[i, j, 0] = -SwampID elif self.state.mapInfo.gold_amount(i, j) > 0: view[i, j, 0] = self.state.mapInfo.gold_amount(i, j) * 1.0 / 100 self.gold_map[i, j] = self.state.mapInfo.gold_amount( i, j) / 50 if self.state.status == 0: view[self.state.x, self.state.y, 1] = self.state.energy # for player in self.state.players: # if player["playerId"] != self.state.id: # view[player["posx"], player["posy"], 1] -= 1 # Convert the DQNState from list to array for training DQNState = np.array(view) return DQNState def check_terminate(self): return self.state.status != State.STATUS_PLAYING
class MyBot: ACTION_GO_LEFT = 0 ACTION_GO_RIGHT = 1 ACTION_GO_UP = 2 ACTION_GO_DOWN = 3 ACTION_FREE = 4 ACTION_CRAFT = 5 def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() self.is_moving_right = True # default: go to right side self.steps = 0 self.pre_action = 0 def start(self): # connect to server self.socket.connect() def end(self): # disconnect server self.socket.close() def send_map_info(self, request): # tell server which map to run self.socket.send(request) def reset(self): # start new game try: message = self.socket.receive() # receive game info from server self.state.init_state(message) # init state except Exception as e: import traceback traceback.print_exc() def step(self, tmp_action): # step process self.socket.send(tmp_action) # send action to server try: message = self.socket.receive() # receive new state from server self.state.update_state(message) # update to local state except Exception as e: import traceback traceback.print_exc() def check_terminate(self): # Checking the status of the game # it indicates the game ends or is playing return self.state.status != State.STATUS_PLAYING def goLeftOrRight(self, my_bot_x, my_bot_y, initial_flag=False): total_gold_left = 0 total_gold_right = 0 for gold in self.state.mapInfo.golds: gold_amount = gold["amount"] if gold_amount > 0: i = gold["posx"] j = gold["posy"] if i >= my_bot_x: total_gold_right += gold_amount if i <= my_bot_x: total_gold_left += gold_amount count_players_left = 0 count_players_right = 0 for player in self.state.players: if "energy" in player: if player["status"] == self.state.STATUS_PLAYING: if player["posx"] >= my_bot_x: count_players_right += 1 if player["posx"] <= my_bot_x: count_players_left += 1 elif initial_flag: # 0 step, initial state if player["posx"] >= my_bot_x: count_players_right += 1 if player["posx"] <= my_bot_x: count_players_left += 1 total_gold_left = total_gold_left / count_players_left total_gold_right = total_gold_right / count_players_right # 1 ==> left; 2 ==> both; 3 ==> right if total_gold_left > total_gold_right: return 1 elif total_gold_left == total_gold_right: return 2 else: return 3 def myGetGoldAmount(self, x, y, initial_flag=False, are_we_here=False): distance = abs(x - self.state.x) + abs(y - self.state.y) gold_on_ground = self.state.mapInfo.gold_amount(x, y) if gold_on_ground == 0: return 0 count_players = 0 for player in self.state.players: if player["posx"] == x and player["posy"] == y: if "energy" in player: if player["status"] == self.state.STATUS_PLAYING: count_players += 1 elif initial_flag: # 0 step, initial state count_players += 1 if are_we_here: return gold_on_ground / count_players else: return gold_on_ground / (count_players + 1) - (distance * 50) - ( 50 * count_players * distance ) # +1 because assuming that we will come here def findLargestGold(self, initial_flag=False, leftOrRight=2): my_bot_x, my_bot_y = self.state.x, self.state.y largest_gold_x = -1 largest_gold_y = -1 max_gold = -100000 for goal in self.state.mapInfo.golds: if leftOrRight == 2: if goal["amount"] > 0: i = goal["posx"] j = goal["posy"] distance = abs(i - self.state.x) + abs(j - self.state.y) count_players = 0 for player in self.state.players: if player["posx"] == i and player["posy"] == j: if "energy" in player: if player[ "status"] == self.state.STATUS_PLAYING: count_players += 1 elif initial_flag: # 0 step, initial state count_players += 1 gold_amount = (goal["amount"] / (count_players + 1)) - ( distance * 50) - (50 * count_players * distance) if gold_amount > max_gold: largest_gold_x = i largest_gold_y = j max_gold = gold_amount elif gold_amount == max_gold: prev_distance = (largest_gold_x - my_bot_x) * (largest_gold_x - my_bot_x) + \ (largest_gold_y - my_bot_y) * (largest_gold_y - my_bot_y) new_distance = (i - my_bot_x) * (i - my_bot_x) + ( j - my_bot_y) * (j - my_bot_y) if new_distance < prev_distance: largest_gold_x = i largest_gold_y = j max_gold = gold_amount # only search at left side if leftOrRight == 1: if goal["amount"] > 0: i = goal["posx"] j = goal["posy"] if i <= my_bot_x: distance = abs(i - self.state.x) + abs(j - self.state.y) count_players = 0 for player in self.state.players: if player["posx"] == i and player["posy"] == j: if "energy" in player: if player[ "status"] == self.state.STATUS_PLAYING: count_players += 1 elif initial_flag: # 0 step, initial state count_players += 1 gold_amount = (goal["amount"] / (count_players + 1)) - ( distance * 50) - ( 50 * count_players * distance) if gold_amount > max_gold: largest_gold_x = i largest_gold_y = j max_gold = gold_amount elif gold_amount == max_gold: prev_distance = (largest_gold_x - my_bot_x) * (largest_gold_x - my_bot_x) + \ (largest_gold_y - my_bot_y) * (largest_gold_y - my_bot_y) new_distance = (i - my_bot_x) * (i - my_bot_x) + ( j - my_bot_y) * (j - my_bot_y) if new_distance < prev_distance: largest_gold_x = i largest_gold_y = j max_gold = gold_amount # only search at right side if leftOrRight == 3: if goal["amount"] > 0: i = goal["posx"] j = goal["posy"] if i >= my_bot_x: distance = abs(i - self.state.x) + abs(j - self.state.y) count_players = 0 for player in self.state.players: if player["posx"] == i and player["posy"] == j: if "energy" in player: if player[ "status"] == self.state.STATUS_PLAYING: count_players += 1 elif initial_flag: # 0 step, initial state count_players += 1 gold_amount = (goal["amount"] / (count_players + 1)) - ( distance * 50) - ( 50 * count_players * distance) if gold_amount > max_gold: largest_gold_x = i largest_gold_y = j max_gold = gold_amount elif gold_amount == max_gold: prev_distance = (largest_gold_x - my_bot_x) * (largest_gold_x - my_bot_x) + \ (largest_gold_y - my_bot_y) * (largest_gold_y - my_bot_y) new_distance = (i - my_bot_x) * (i - my_bot_x) + ( j - my_bot_y) * (j - my_bot_y) if new_distance < prev_distance: largest_gold_x = i largest_gold_y = j max_gold = gold_amount return largest_gold_x, largest_gold_y def findLargestGoldInSmallMap(self, des_x, des_y): x, y = self.state.x, self.state.y largest_gold_x = None largest_gold_y = None next_step_x = 0 next_step_y = 0 if x < des_x: next_step_x = 1 else: next_step_x = -1 if y < des_y: next_step_y = 1 else: next_step_y = -1 max_gold = -100000 while x != des_x + next_step_x: while y != des_y + next_step_y: if x != des_x or y != des_y: gold_amount = self.myGetGoldAmount(x, y) if gold_amount > 0: if gold_amount > max_gold: largest_gold_x = x largest_gold_y = y max_gold = gold_amount elif gold_amount == max_gold: prev_distance = (largest_gold_x - self.state.x) * (largest_gold_x - self.state.x) + \ (largest_gold_y - self.state.y) * (largest_gold_y - self.state.y) new_distance = (x - self.state.x) * ( x - self.state.x) + (y - self.state.y) * ( y - self.state.y) if new_distance < prev_distance: largest_gold_x = x largest_gold_y = y max_gold = gold_amount y += next_step_y y = self.state.y x += next_step_x return largest_gold_x, largest_gold_y def getActionBaseOnEnergy(self, action_option_1, action_option_2): my_bot_x, my_bot_y = self.state.x, self.state.y n_action = action_option_1 require_energy = 100 if action_option_1 == self.ACTION_GO_RIGHT: next_x = my_bot_x + 1 else: next_x = my_bot_x - 1 if action_option_2 == self.ACTION_GO_DOWN: next_y = my_bot_y + 1 else: next_y = my_bot_y - 1 energy_1 = 1 energy_2 = 1 gold = self.state.mapInfo.gold_amount(next_x, my_bot_y) if gold > 0: energy_1 = 4 gold = self.state.mapInfo.gold_amount(my_bot_x, next_y) if gold > 0: energy_2 = 4 for obstacle in self.state.mapInfo.obstacles: i = obstacle["posx"] j = obstacle["posy"] if i == next_x and j == my_bot_y: if obstacle["type"] == 1: # Tree energy_1 = 20 elif obstacle["type"] == 2: # Trap if obstacle["value"] == -10: energy_1 = 10 elif obstacle["type"] == 3: # Swamp energy_1 = -obstacle["value"] if i == my_bot_x and j == next_y: if obstacle["type"] == 1: # Tree energy_2 = 20 elif obstacle["type"] == 2: # Trap if obstacle["value"] == -10: energy_2 = 10 elif obstacle["type"] == 3: # Swamp energy_2 = -obstacle["value"] if energy_1 < energy_2: n_action = action_option_1 require_energy = energy_1 else: n_action = action_option_2 require_energy = energy_2 if self.state.energy <= require_energy: n_action = self.ACTION_FREE #print("require_energy = {0}".format(require_energy)) #print("choose action = {0}".format(n_action)) return n_action def goToTarget(self, des_x, des_y): n_action = self.ACTION_FREE require_energy = 100 my_bot_x, my_bot_y = self.state.x, self.state.y next_my_bot_x = my_bot_x next_my_bot_y = my_bot_y if my_bot_x == des_x: if my_bot_y < des_y: n_action = self.ACTION_GO_DOWN next_my_bot_y += 1 else: n_action = self.ACTION_GO_UP next_my_bot_y -= 1 elif my_bot_y == des_y: if my_bot_x < des_x: n_action = self.ACTION_GO_RIGHT next_my_bot_x += 1 else: n_action = self.ACTION_GO_LEFT next_my_bot_x -= 1 else: if my_bot_x < des_x: action_option_1 = self.ACTION_GO_RIGHT else: action_option_1 = self.ACTION_GO_LEFT if my_bot_y < des_y: action_option_2 = self.ACTION_GO_DOWN else: action_option_2 = self.ACTION_GO_UP n_action = self.getActionBaseOnEnergy(action_option_1, action_option_2) return n_action require_energy = 1 gold_amount = self.state.mapInfo.gold_amount(next_my_bot_x, next_my_bot_y) if gold_amount > 0: require_energy = 4 for obstacle in self.state.mapInfo.obstacles: i = obstacle["posx"] j = obstacle["posy"] if i == next_my_bot_x and j == next_my_bot_y: if obstacle["type"] == 1: # Tree require_energy = 20 elif obstacle["type"] == 2: # Trap if obstacle["value"] == -10: require_energy = 10 elif obstacle["type"] == 3: # Swamp require_energy = -obstacle["value"] if self.state.energy <= require_energy: n_action = self.ACTION_FREE return n_action def next_action(self, initial_flag=False): my_bot_x, my_bot_y = self.state.x, self.state.y n_action = self.ACTION_FREE gold_on_ground = self.myGetGoldAmount(my_bot_x, my_bot_y, initial_flag, are_we_here=True) energy = self.state.energy if gold_on_ground > 0: if energy <= 5: n_action = self.ACTION_FREE elif energy >= (gold_on_ground / 50) * 5: n_action = self.ACTION_CRAFT elif self.pre_action == self.ACTION_FREE and energy < 38: n_action = self.ACTION_FREE else: n_action = self.ACTION_CRAFT else: leftOrRight = 2 if self.steps < 30: leftOrRight = self.goLeftOrRight(my_bot_x, my_bot_y, initial_flag) largest_gold_x, largest_gold_y = self.findLargestGold( initial_flag, leftOrRight) target_x = largest_gold_x target_y = largest_gold_y while True: tmp_x, tmp_y = self.findLargestGoldInSmallMap( target_x, target_y) if (tmp_x is None) or (tmp_y is None): break target_x = tmp_x target_y = tmp_y n_action = self.goToTarget(target_x, target_y) self.steps += 1 self.pre_action = n_action return n_action
class Bot3: ACTION_GO_LEFT = 0 ACTION_GO_RIGHT = 1 ACTION_GO_UP = 2 ACTION_GO_DOWN = 3 ACTION_FREE = 4 ACTION_CRAFT = 5 def __init__(self, id): self.state = State() self.info = PlayerInfo(id) def next_action(self): if self.state.mapInfo.gold_amount(self.info.posx, self.info.posy) > 0: if self.info.energy >= 6: return self.ACTION_CRAFT else: return self.ACTION_FREE if self.info.energy < 5: return self.ACTION_FREE else: action = self.ACTION_GO_LEFT if self.info.posx % 2 == 0: if self.info.posy < self.state.mapInfo.max_y: action = self.ACTION_GO_DOWN else: if self.info.posy > 0: action = self.ACTION_GO_UP else: action = self.ACTION_GO_RIGHT return action def act_sample(self, mystate): if self.state.mapInfo.gold_amount(mystate[-3], mystate[-2]) > 0: if mystate[-1] >= 6: return self.ACTION_CRAFT else: return self.ACTION_FREE if mystate[-1] < 5: return self.ACTION_FREE else: action = self.ACTION_GO_LEFT if self.info.posx % 2 == 0: if mystate[-2] < self.state.mapInfo.max_y: action = self.ACTION_GO_DOWN else: if mystate[-2] > 0: action = self.ACTION_GO_UP else: action = self.ACTION_GO_RIGHT return action def new_game(self, data): try: self.state.init_state(data) except Exception as e: import traceback traceback.print_exc() def new_state(self, data): # action = self.next_action(); # self.socket.send(action) try: self.state.update_state(data) except Exception as e: import traceback traceback.print_exc()
class Bot6: ACTION_GO_LEFT = 0 ACTION_GO_RIGHT = 1 ACTION_GO_UP = 2 ACTION_GO_DOWN = 3 ACTION_FREE = 4 ACTION_CRAFT = 5 def __init__(self, id): self.state = State() self.info = PlayerInfo(id) self.isMovingInc = False self.initial_flag = True def myGetGoldAmount(self, x, y, initial_flag=False, are_we_here=False): distance = abs(x - self.info.posx) + abs(y - self.info.posy) gold_on_ground = self.state.mapInfo.gold_amount(x, y) if gold_on_ground == 0: return 0 count_players = 0 for player in self.state.players: if player["posx"] == x and player["posy"] == y: if "energy" in player: if player["status"] == self.state.STATUS_PLAYING: count_players += 1 elif initial_flag: # 0 step, initial state count_players += 1 if are_we_here: return gold_on_ground / count_players else: return gold_on_ground / (count_players + 1) - (distance * 50) - ( 50 * count_players * distance ) # +1 because assuming that we will come here def findLargestGold(self, initial_flag): my_bot_x, my_bot_y = self.info.posx, self.info.posy largest_gold_x = -1 largest_gold_y = -1 max_gold = -100000 for goal in self.state.mapInfo.golds: if goal["amount"] > 0: i = goal["posx"] j = goal["posy"] distance = abs(i - self.info.posx) + abs(j - self.info.posy) count_players = 0 for player in self.state.players: if player["posx"] == i and player["posy"] == j: if "energy" in player: if player["status"] == self.state.STATUS_PLAYING: count_players += 1 elif initial_flag: # 0 step, initial state count_players += 1 gold_amount = (goal["amount"] / (count_players + 1)) - ( distance * 50) - (50 * count_players * distance) if gold_amount > max_gold: largest_gold_x = i largest_gold_y = j max_gold = gold_amount elif gold_amount == max_gold: prev_distance = (largest_gold_x - my_bot_x) * (largest_gold_x - my_bot_x) + \ (largest_gold_y - my_bot_y) * (largest_gold_y - my_bot_y) new_distance = (i - my_bot_x) * (i - my_bot_x) + ( j - my_bot_y) * (j - my_bot_y) if new_distance < prev_distance: largest_gold_x = i largest_gold_y = j max_gold = gold_amount return largest_gold_x, largest_gold_y def findLargestGoldInSmallMap(self, des_x, des_y): x, y = self.info.posx, self.info.posy largest_gold_x = None largest_gold_y = None next_step_x = 0 next_step_y = 0 if x < des_x: next_step_x = 1 else: next_step_x = -1 if y < des_y: next_step_y = 1 else: next_step_y = -1 max_gold = -100000 while x != des_x + next_step_x: while y != des_y + next_step_y: if x != des_x or y != des_y: gold_amount = self.myGetGoldAmount(x, y) if gold_amount > 0: if gold_amount > max_gold: largest_gold_x = x largest_gold_y = y max_gold = gold_amount elif gold_amount == max_gold: prev_distance = (largest_gold_x - self.info.posx) * (largest_gold_x - self.info.posx) + \ (largest_gold_y - self.info.posy) * (largest_gold_y - self.info.posy) new_distance = (x - self.info.posx) * ( x - self.info.posx) + (y - self.info.posy) * ( y - self.info.posy) if new_distance < prev_distance: largest_gold_x = x largest_gold_y = y max_gold = gold_amount y += next_step_y y = self.info.posy x += next_step_x return largest_gold_x, largest_gold_y def getActionBaseOnEnergy(self, action_option_1, action_option_2): my_bot_x, my_bot_y = self.info.posx, self.info.posy n_action = action_option_1 require_energy = 100 if action_option_1 == self.ACTION_GO_RIGHT: next_x = my_bot_x + 1 else: next_x = my_bot_x - 1 if action_option_2 == self.ACTION_GO_DOWN: next_y = my_bot_y + 1 else: next_y = my_bot_y - 1 energy_1 = 1 energy_2 = 1 gold = self.state.mapInfo.gold_amount(next_x, my_bot_y) if gold > 0: energy_1 = 4 gold = self.state.mapInfo.gold_amount(my_bot_x, next_y) if gold > 0: energy_2 = 4 for obstacle in self.state.mapInfo.obstacles: i = obstacle["posx"] j = obstacle["posy"] if i == next_x and j == my_bot_y: if obstacle["type"] == 1: # Tree energy_1 = 20 elif obstacle["type"] == 2: # Trap if obstacle["value"] == -10: energy_1 = 10 elif obstacle["type"] == 3: # Swamp energy_1 = -obstacle["value"] if i == my_bot_x and j == next_y: if obstacle["type"] == 1: # Tree energy_2 = 20 elif obstacle["type"] == 2: # Trap if obstacle["value"] == -10: energy_2 = 10 elif obstacle["type"] == 3: # Swamp energy_2 = -obstacle["value"] if energy_1 < energy_2: n_action = action_option_1 require_energy = energy_1 else: n_action = action_option_2 require_energy = energy_2 if self.info.energy <= require_energy: n_action = self.ACTION_FREE # print("require_energy = {0}".format(require_energy)) # print("choose action = {0}".format(n_action)) return n_action def goToTarget(self, des_x, des_y): n_action = self.ACTION_FREE require_energy = 100 my_bot_x, my_bot_y = self.info.posx, self.info.posy next_my_bot_x = my_bot_x next_my_bot_y = my_bot_y if my_bot_x == des_x: if my_bot_y < des_y: n_action = self.ACTION_GO_DOWN next_my_bot_y += 1 else: n_action = self.ACTION_GO_UP next_my_bot_y -= 1 elif my_bot_y == des_y: if my_bot_x < des_x: n_action = self.ACTION_GO_RIGHT next_my_bot_x += 1 else: n_action = self.ACTION_GO_LEFT next_my_bot_x -= 1 else: if my_bot_x < des_x: action_option_1 = self.ACTION_GO_RIGHT else: action_option_1 = self.ACTION_GO_LEFT if my_bot_y < des_y: action_option_2 = self.ACTION_GO_DOWN else: action_option_2 = self.ACTION_GO_UP n_action = self.getActionBaseOnEnergy(action_option_1, action_option_2) return n_action require_energy = 1 gold_amount = self.state.mapInfo.gold_amount(next_my_bot_x, next_my_bot_y) if gold_amount > 0: require_energy = 4 for obstacle in self.state.mapInfo.obstacles: i = obstacle["posx"] j = obstacle["posy"] if i == next_my_bot_x and j == next_my_bot_y: if obstacle["type"] == 1: # Tree require_energy = 20 elif obstacle["type"] == 2: # Trap if obstacle["value"] == -10: require_energy = 10 elif obstacle["type"] == 3: # Swamp require_energy = -obstacle["value"] if self.info.energy <= require_energy: n_action = self.ACTION_FREE return n_action def next_action(self, initial_flag=False): #my_bot_x, my_bot_y = self.state.x, self.state.y my_bot_x, my_bot_y = self.info.posx, self.info.posy n_action = self.ACTION_FREE gold_on_ground = self.myGetGoldAmount(my_bot_x, my_bot_y, self.initial_flag, are_we_here=True) energy = self.info.energy if gold_on_ground > 0: if energy <= 5: n_action = self.ACTION_FREE elif energy > 37.5: n_action = self.ACTION_CRAFT elif energy > (gold_on_ground / 50) * 5: n_action = self.ACTION_CRAFT else: n_action = self.ACTION_FREE else: largest_gold_x, largest_gold_y = self.findLargestGold( self.initial_flag) target_x = largest_gold_x target_y = largest_gold_y while True: tmp_x, tmp_y = self.findLargestGoldInSmallMap( target_x, target_y) if (tmp_x is None) or (tmp_y is None): break target_x = tmp_x target_y = tmp_y n_action = self.goToTarget(target_x, target_y) if self.initial_flag: self.initial_flag = False return n_action def new_game(self, data): try: self.isKeepFree = False self.state.init_state(data) except Exception as e: import traceback traceback.print_exc() def new_state(self, data): # action = self.next_action(); # self.socket.send(action) try: self.state.update_state(data) except Exception as e: import traceback traceback.print_exc() def printInfo(self): print("G_BOT", self.info.playerId, self.estWood, self.pEnergyToStep, self.pStepToGold, self.info.score, self.info.energy)
class MinerEnv: def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() self.energy_pre = self.state.energy self.score_pre = self.state.score #Storing the last score for designing the reward function def start(self): #connect to server self.socket.connect() def end(self): #disconnect server self.socket.close() def send_map_info(self, request): #tell server which map to run self.socket.send(request) def reset(self): #start new game try: message = self.socket.receive() #receive game info from server self.state.init_state(message) #init state except Exception as e: import traceback traceback.print_exc() def step(self, action): #step process self.socket.send(action) #send action to server try: message = self.socket.receive() #receive new state from server self.state.update_state(message) #update to local state except Exception as e: import traceback traceback.print_exc() # Functions are customized by client def get_state(self): # Building the map channel_1 = np.full( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], 0.05) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): obs_id, val = self.state.mapInfo.get_obstacle(i, j) if obs_id == TreeID: # Tree channel_1[i, j] = 0.3 if obs_id == TrapID: # Trap channel_1[i, j] = 0.6 if obs_id == SwampID: # Tree if abs(val) == -5: channel_1[i, j] = 0.2 if abs(val) == -20: channel_1[i, j] = 0.4 if abs(val) > 20: channel_1[i, j] = 0.8 channel_2 = np.full( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], 0.05) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.state.mapInfo.gold_amount(i, j) > 0: if self.state.mapInfo.gold_amount(i, j) < 500: channel_2[i, j] = 0.3 if 900 > self.state.mapInfo.gold_amount(i, j) >= 500: channel_2[i, j] = 0.6 if self.state.mapInfo.gold_amount(i, j) >= 900: channel_2[i, j] = 1 channel_3 = np.full( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], 0.05) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.state.x in range(21) and self.state.y in range(9): channel_3[self.state.x, self.state.y] = 1 X = [] Y = [] for player in self.state.players: if player["playerId"] != self.state.id: X.append(player["posx"]) Y.append(player["posy"]) channel_4 = np.full( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], 0.05) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if X[0] in range(21) and Y[0] in range(9): channel_4[X[0], Y[0]] = 1 channel_5 = np.full( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], 0.05) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if X[1] in range(21) and Y[1] in range(9): channel_5[X[1], Y[1]] = 1 channel_6 = np.full( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], 0.05) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if X[2] in range(21) and Y[2] in range(9): channel_6[X[2], Y[2]] = 1 channel_7 = np.full( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], 0.05) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): channel_7[i, j] = float(self.state.energy / 50) DQNState = np.dstack([ channel_1, channel_2, channel_3, channel_4, channel_5, channel_6, channel_7 ]) DQNState = np.rollaxis(DQNState, 2, 0) return DQNState def get_reward(self): # Calculate reward reward = 0 score_action = self.state.score - self.score_pre self.score_pre = int(self.state.score) if score_action > 0 and self.state.lastAction == 5: reward += 6.25 if score_action <= 0 and self.state.lastAction == 5: reward -= 2 obs_id, value = self.state.mapInfo.get_obstacle( self.state.x, self.state.y) if obs_id not in [1, 2, 3] and self.state.lastAction != 4: reward += 0.5 if obs_id == TreeID: # Tree reward -= 2 if obs_id == TrapID: # Trap reward -= 2 if obs_id == SwampID: # Swamp if abs(value) <= -5: reward -= 0.5 if 15 <= abs(value) <= 40: reward -= 4 if abs(value) > 40: reward -= 6 # if self.state.mapInfo.is_row_has_gold(self.state.x): # if self.state.lastAction in [2,3]: # reward += 1 # else: # reward += 0.5 # if self.state.mapInfo.is_column_has_gold(self.state.x): # if self.state.lastAction in [0,1]: # reward += 1 # else: # reward += 0.5 if self.state.lastAction == 4 and self.state.energy > 40: reward -= 4 if self.state.lastAction == 4: reward += 1.75 if self.state.status == State.STATUS_ELIMINATED_WENT_OUT_MAP: reward += -10 if self.state.status == State.STATUS_ELIMINATED_OUT_OF_ENERGY: reward += -5 return np.sign(reward) * np.log(1 + abs(reward)) def check_terminate(self): #Checking the status of the game #it indicates the game ends or is playing return self.state.status != State.STATUS_PLAYING
class MinerEnv: def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() self.score_pre = self.state.score # Storing the last score for designing the reward function self.energy_pre = self.state.energy #self.x_pre = self.state.x #self.y_pre = self.state.y def start(self): # connect to server self.socket.connect() def end(self): # disconnect server self.socket.close() def send_map_info(self, request): # tell server which map to run self.socket.send(request) def reset(self): # start new game try: message = self.socket.receive() # receive game info from server self.state.init_state(message) # init state except Exception as e: import traceback traceback.print_exc() def step(self, action): # step process self.socket.send(action) # send action to server try: message = self.socket.receive() # receive new state from server self.state.update_state(message) # update to local state except Exception as e: import traceback traceback.print_exc() # Functions are customized by client def get_state(self, remain_steps, initial_flag=False): # update pre position, score, energy #self.x_pre = self.state.x #self.y_pre = self.state.y self.score_pre = self.state.score self.energy_pre = self.state.energy # depth = 3 # goal, min_energy, max_energy depth = 15 # goal, min_energy, max_energy, 4 player position goal_depth = 0 min_energy_depth = 1 max_energy_depth = 2 my_agent_depth = 3 bot1_depth = 4 bot2_depth = 5 bot3_depth = 6 goal_pos = 7 tree_pos = 8 trap_pos = 9 swamp_pos_5 = 10 swamp_pos_20 = 11 swamp_pos_40 = 12 swamp_pos_100 = 13 ground_position = 14 # len_player_infor = 6 * 4 len_player_infor = 2 + 8 + 6 # max_goal = 67 * 50 * 4 # assume 67 steps for mining and 33 steps for relaxing max_goal = 1250 max_energy = 100 # max_x = self.state.mapInfo.max_x # max_y = self.state.mapInfo.max_y max_player_energy = 50 max_score = 3000 # max_score = 67 * 50 max_last_action = 6 + 1 # 1 because of None max_status = 5 # Building the map view_1 = np.zeros([ self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1, depth ], dtype=float) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): # ground view_1[i, j, min_energy_depth] = -1 / max_energy view_1[i, j, max_energy_depth] = -1 / max_energy view_1[i, j, ground_position] = 1 goal = self.state.mapInfo.gold_amount(i, j) if goal > 0: view_1[i, j, ground_position] = 0 view_1[i, j, goal_pos] = 1 view_1[i, j, min_energy_depth] = -4 / max_energy view_1[i, j, max_energy_depth] = -4 / max_energy view_1[i, j, goal_depth] = goal / max_goal for obstacle in self.state.mapInfo.obstacles: i = obstacle["posx"] j = obstacle["posy"] if obstacle["type"] == TreeID: # Tree view_1[i, j, ground_position] = 0 view_1[i, j, tree_pos] = 1 view_1[i, j, min_energy_depth] = -5 / max_energy # -5 ~ -20 view_1[i, j, max_energy_depth] = -20 / max_energy # -5 ~ -20 elif obstacle["type"] == TrapID: # Trap if obstacle["value"] != 0: view_1[i, j, ground_position] = 0 view_1[i, j, trap_pos] = 1 view_1[i, j, min_energy_depth] = obstacle["value"] / max_energy view_1[i, j, max_energy_depth] = obstacle["value"] / max_energy elif obstacle["type"] == SwampID: # Swamp view_1[i, j, ground_position] = 0 view_1[i, j, min_energy_depth] = obstacle[ "value"] / max_energy # -5, -20, -40, -100 view_1[i, j, max_energy_depth] = obstacle[ "value"] / max_energy # -5, -20, -40, -100 if obstacle["value"] == -5: view_1[i, j, swamp_pos_5] = 1 elif obstacle["value"] == -20: view_1[i, j, swamp_pos_20] = 1 elif obstacle["value"] == -40: view_1[i, j, swamp_pos_40] = 1 elif obstacle["value"] == -100: view_1[i, j, swamp_pos_100] = 1 """ for goal in self.state.mapInfo.golds: i = goal["posx"] j = goal["posy"] view_1[i, j, min_energy_depth] = 4 / max_energy view_1[i, j, max_energy_depth] = 4 / max_energy view_1[i, j, goal_depth] = goal["amount"] / max_goal """ # Add player's information view_2 = np.zeros([len_player_infor * 4 + 1], dtype=float) # +1 remaining steps index_player = 0 if (0 <= self.state.x <= self.state.mapInfo.max_x) and \ (0 <= self.state.y <= self.state.mapInfo.max_y): view_1[self.state.x, self.state.y, my_agent_depth] = 1 view_2[index_player * len_player_infor + 0] = self.state.energy / max_player_energy view_2[index_player * len_player_infor + 1] = self.state.score / max_score if self.state.lastAction is None: # 0 step view_2[index_player * len_player_infor + 2 + max_last_action] = 1 else: # > 1 step view_2[index_player * len_player_infor + 2 + self.state.lastAction] = 1 view_2[index_player * len_player_infor + 2 + max_last_action + 1 + self.state.status] = 1 bot_depth = my_agent_depth for player in self.state.players: if player["playerId"] != self.state.id: index_player += 1 bot_depth += 1 if (0 <= player["posx"] <= self.state.mapInfo.max_x) and \ (0 <= player["posy"] <= self.state.mapInfo.max_y): if "energy" in player: # > 1 step if player["status"] == self.state.STATUS_PLAYING: view_1[player["posx"], player["posy"], bot_depth] = 1 view_2[index_player * len_player_infor + 0] = player["energy"] / max_player_energy view_2[index_player * len_player_infor + 1] = player["score"] / max_score view_2[index_player * len_player_infor + 2 + player["lastAction"]] = 1 # one hot view_2[index_player * len_player_infor + 2 + max_last_action + 1 + player["status"]] = 1 elif initial_flag: # 0 step, initial state view_1[player["posx"], player["posy"], bot_depth] = 1 view_2[index_player * len_player_infor + 0] = 50 / max_player_energy view_2[index_player * len_player_infor + 1] = 0 / max_score view_2[index_player * len_player_infor + 2 + max_last_action] = 1 # one hot view_2[index_player * len_player_infor + 2 + max_last_action + 1 + self.state.STATUS_PLAYING] = 1 view_2[-1] = remain_steps / 100 # Convert the DQNState from list to array for training DQNState_map = np.array(view_1) DQNState_users = np.array(view_2) return DQNState_map, DQNState_users def get_reward(self, num_of_wrong_relax, num_of_wrong_mining): # return -0.01 ~ 0.01 # reward must target to mine goal max_reward = 50 reward_died = -50 # ~ double max reward # reward_died = -25 # let a try reward_enter_goal = max_reward / 20 # 5 # Calculate reward reward = 0 # moving, because agent will die at the max step energy_action = self.state.energy - self.energy_pre # < 0 if not relax score_action = self.state.score - self.score_pre # >= 0 if score_action > 0: reward = score_action / 2500 # max ~2500 / episode else: # moving #if int(self.state.lastAction) < 4: # # enter gold # if self.state.mapInfo.gold_amount(self.state.x, self.state.y) > 0: # reward = reward_enter_goal / 2500 # mining but cannot get gold if (int(self.state.lastAction) == 5) and (score_action == 0): # reward = reward_died / 10 / max_reward num_of_wrong_mining += 1 # relax when energy > 40 or cannot get more energy elif int(self.state.lastAction) == 4: if self.energy_pre > 40 or energy_action == 0: # reward = reward_died / 10 / max_reward num_of_wrong_relax += 1 # at gold but move to ground # if (int(self.state.lastAction) < 4) and (self.state.mapInfo.gold_amount(self.x_pre, self.y_pre) > 0) \ # and (self.state.mapInfo.gold_amount(self.state.x, self.state.y) == 0): # reward = reward_died # relax when energy > 40 #elif self.energy_pre > 40 and int(self.state.lastAction) == 4: # reward = reward_died / 4 # relax but cannot get more energy #elif int(self.state.lastAction) == 4 and energy_action == 0: # reward = reward_died / 4 # If out of the map, then the DQN agent should be punished by a larger negative reward. #if self.state.status == State.STATUS_ELIMINATED_WENT_OUT_MAP or self.state.status == State.STATUS_ELIMINATED_INVALID_ACTION: # reward = reward_died / max_reward #elif self.state.status == State.STATUS_ELIMINATED_OUT_OF_ENERGY or self.state.status == State.STATUS_STOP_EMPTY_GOLD \ # or self.state.status == State.STATUS_STOP_END_STEP: if self.state.status != State.STATUS_PLAYING: if self.state.score == 0: reward = reward_died / max_reward # -1 if self.state.status == State.STATUS_ELIMINATED_WENT_OUT_MAP or self.state.status == State.STATUS_ELIMINATED_OUT_OF_ENERGY: reward = reward_died / max_reward # -1 # print ("reward",reward) #return reward / max_reward / self.state.mapInfo.maxStep # 100 steps return reward, num_of_wrong_relax, num_of_wrong_mining def check_terminate(self): # Checking the status of the game # it indicates the game ends or is playing return self.state.status != State.STATUS_PLAYING
class Bot_newTD3: ACTION_GO_LEFT = 0 ACTION_GO_RIGHT = 1 ACTION_GO_UP = 2 ACTION_GO_DOWN = 3 ACTION_FREE = 4 ACTION_CRAFT = 5 def __init__(self, id): self.state = State() self.info = PlayerInfo(id) self.limit = 2 state_dim = (2 * self.limit + 1)**2 + 3 + 3 action_dim = 6 max_action = 1.0 # load model kwargs = { "state_dim": state_dim, "action_dim": action_dim, "max_action": max_action, } policy_file = "newTD3_Miner_0_2_get_state3" self.TreeID = 1 self.TrapID = 2 self.SwampID = 3 self.policy = newTD3_bot.TD3(**kwargs) self.policy.load(f"./models_newTD3_2/{policy_file}") def next_action(self): s = self.get_state2(self.limit) action, _ = self.policy.predict_action(s) return int(action) def get_state2(self, limit): # Building the map view = np.zeros([limit * 2 + 1, limit * 2 + 1], dtype=int) max_x, max_y = self.state.mapInfo.max_x, self.state.mapInfo.max_y xlimit_below = np.clip(self.info.posx - limit, 0, max_x) - np.clip( self.info.posx + limit - max_x, 0, limit) xlimit_up = np.clip(self.info.posx + limit, 0, max_x) + np.clip( 0 - self.info.posx + limit, 0, limit) ylimit_below = np.clip(self.info.posy - limit, 0, max_y) - np.clip( self.info.posy + limit - max_y, 0, limit) ylimit_up = np.clip(self.info.posy + limit, 0, max_y) + np.clip( 0 - self.info.posy + limit, 0, limit) #print(xlimit_below, xlimit_up, ylimit_below, ylimit_up, self.info.posx, self.info.posy) dmax, m, n, exist_gold = -1000, -5, 0.1, False x_maxgold, y_maxgold = self.state.x, self.state.y for i in range(max_x + 1): for j in range(max_y + 1): if self.state.mapInfo.gold_amount(i, j) >= 50: exist_gold = True d = m * ((self.info.posx - i)**2 + (self.info.posy - j)** 2) + n * self.state.mapInfo.gold_amount(i, j) if d > dmax: dmax = d x_maxgold, y_maxgold = i, j # position of cell is nearest and much gold if i in range(xlimit_below, xlimit_up + 1) and j in range( ylimit_below, ylimit_up + 1): if self.state.mapInfo.get_obstacle( i, j) == self.TreeID: # Tree view[i - xlimit_below, j - ylimit_below] = -self.TreeID if self.state.mapInfo.get_obstacle( i, j) == self.TrapID: # Trap view[i - xlimit_below, j - ylimit_below] = -self.TrapID if self.state.mapInfo.get_obstacle( i, j) == self.SwampID: # Swamp view[i - xlimit_below, j - ylimit_below] = -self.SwampID if self.state.mapInfo.gold_amount(i, j) > 0: view[i - xlimit_below, j - ylimit_below] = self.state.mapInfo.gold_amount( i, j) / 10 DQNState = view.flatten().tolist( ) #Flattening the map matrix to a vector # Add position and energy of agent to the DQNState DQNState.append(self.info.posx - xlimit_below) DQNState.append(self.info.posy - ylimit_below) DQNState.append(self.info.energy) #Add position of bots # for player in self.state.players: # if player["playerId"] != self.state.id: # DQNState.append(player["posx"]) # DQNState.append(player["posy"]) DQNState.append(self.info.posx - x_maxgold) DQNState.append(self.info.posy - y_maxgold) if exist_gold == False: DQNState.append(0) else: DQNState.append( self.state.mapInfo.gold_amount(x_maxgold, y_maxgold) / 10) #Convert the DQNState from list to array for training DQNState = np.array(DQNState) return DQNState def new_game(self, data): try: self.state.init_state(data) except Exception as e: import traceback traceback.print_exc() def new_state(self, data): # action = self.next_action(); # self.socket.send(action) try: self.state.update_state(data) except Exception as e: import traceback traceback.print_exc()
class MinerEnv: def __init__(self, host, port): self.socket = GameSocket(host, port) self.state = State() # define action space # self.INPUT_DIM = (21, 9, 2) # The number of input values for the DQN model self.INPUT_DIM = (21, 9) self.ACTIONNUM = 6 # The number of actions output from the DQN model # define state space self.gameState = None self.reward = 0 self.terminate = False self.gold_map = None self.dist_gold = None self.score_pre = self.state.score # Storing the last score for designing the reward function self.energy_pre = self.state.energy # Storing the last energy for designing the reward function self.viewer = None self.steps_beyond_done = None def start(self): # connect to server self.socket.connect() def end(self): # disconnect server self.socket.close() def send_map_info(self, request): # tell server which map to run self.socket.send(request) def reset(self): # start new game # Choosing a map in the list # mapID = np.random.randint(1, 6) # Choosing a map ID from 5 maps in Maps folder randomly mapID = 1 posID_x = np.random.randint( MAP_MAX_X) # Choosing a initial position of the DQN agent on # X-axes randomly posID_y = np.random.randint( MAP_MAX_Y ) # Choosing a initial position of the DQN agent on Y-axes randomly # Creating a request for initializing a map, initial position, the initial energy, and the maximum number of steps of the DQN agent request = ("map" + str(mapID) + "," + str(posID_x) + "," + str(posID_y) + ",50,100") # Send the request to the game environment (GAME_SOCKET_DUMMY.py) self.send_map_info(request) # Initialize the game environment try: message = self.socket.receive() #receive game info from server self.state.init_state(message) #init state except Exception as e: import traceback traceback.print_exc() self.gameState = self.get_state() # Get the state after resetting. # This function (get_state()) is an example of creating a state for the DQN model distance = 500 for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.gold_map[i][j] > 0: distance_temp = abs(self.state.x - i) + abs( self.state.y - j) #- self.gold_map[i][j] if distance > distance_temp: distance = distance_temp self.dist_gold = distance self.score_pre = self.state.score # Storing the last score for designing the reward function self.energy_pre = self.state.energy # Storing the last energy for designing the reward function self.reward = 0 # The amount of rewards for the entire episode self.terminate = False # The variable indicates that the episode ends self.steps_beyond_done = None return self.gameState def step(self, action): # step process self.socket.send(str(action)) # send action to server try: message = self.socket.receive() # receive new state from server self.state.update_state(message) # update to local state except Exception as e: import traceback traceback.print_exc() self.gameState = self.get_state() self.reward = self.get_reward() done = self.check_terminate() return self.gameState, self.reward, done, {} # Functions are customized by client def get_state(self): # Building the map view = np.zeros( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1, 2], dtype="float32") self.gold_map = np.zeros( [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int) for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.state.mapInfo.get_obstacle(i, j) == TreeID: # Tree view[i, j, 0] = -20 * 1.0 / 20 # view[i, j, 0] = -TreeID elif self.state.mapInfo.get_obstacle(i, j) == TrapID: # Trap view[i, j, 0] = -10 * 1.0 / 20 # view[i, j, 0] = -TrapID elif self.state.mapInfo.get_obstacle(i, j) == SwampID: # Swamp view[i, j, 0] = self.state.mapInfo.get_obstacle_value( i, j) * 1.0 / 20 # view[i, j, 0] = -SwampID elif self.state.mapInfo.gold_amount(i, j) > 0: view[i, j, 0] = self.state.mapInfo.gold_amount(i, j) * 1.0 / 100 self.gold_map[i, j] = self.state.mapInfo.gold_amount( i, j) / 50 if self.state.status == 0: view[self.state.x, self.state.y, 1] = self.state.energy * 1.0 / 10 # for player in self.state.players: # if player["playerId"] != self.state.id: # view[player["posx"], player["posy"], 1] -= 1 #Convert the DQNState from list to array for training DQNState = np.array(view) return DQNState def get_reward(self): # Calculate reward a = self.state.lastAction e = self.state.energy e_pre = self.energy_pre score_action = self.state.score - self.score_pre energy_consume = self.energy_pre - self.state.energy if energy_consume == 0: energy_consume = 20 elif energy_consume < 0: energy_consume = energy_consume * 2 self.score_pre = self.state.score self.energy_pre = self.state.energy # calculate distance to gold distance = 500 for i in range(self.state.mapInfo.max_x + 1): for j in range(self.state.mapInfo.max_y + 1): if self.gold_map[i][j] > 0: distance_temp = abs(self.state.x - i) + abs( self.state.y - j) # - self.gold_map[i][j] if distance > distance_temp: distance = distance_temp move_distance = (self.dist_gold - distance) if move_distance > 0: score_distance = move_distance * 20 elif move_distance < 0: score_distance = move_distance * 10 else: score_distance = 0 self.dist_gold = distance reward = score_action * 2 + score_distance - energy_consume # If out of the map, then the DQN agent should be punished by a larger nagative reward. if self.state.status == State.STATUS_ELIMINATED_WENT_OUT_MAP: reward += -50 # Run out of energy, then the DQN agent should be punished by a larger nagative reward. if self.state.status == State.STATUS_ELIMINATED_OUT_OF_ENERGY: reward += -50 # print ("reward",reward) return reward * 0.01 def check_terminate(self): # Checking the status of the game # it indicates the game ends or is playing return self.state.status != State.STATUS_PLAYING def updateObservation(self): return def render(self, mode='human', close=False): return def close(self): """Override in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ raise NotImplementedError() def seed(self, seed=None): """Sets the seed for this env's random number generator(s). # Returns Returns the list of seeds used in this env's random number generators """ raise NotImplementedError() def configure(self, *args, **kwargs): """Provides runtime configuration to the environment. This configuration should consist of data that tells your environment how to run (such as an address of a remote server, or path to your ImageNet data). It should not affect the semantics of the environment. """ raise NotImplementedError()