#Creating a request for initializing a map, initial position, the initial energy, and the maximum number of steps of the DQN agent request = ("map" + str(mapID) + "," + str(posID_x) + "," + str(posID_y) + ",50,100") #Send the request to the game environment (GAME_SOCKET_DUMMY.py) minerEnv.send_map_info(request) # Getting the initial state minerEnv.reset() #Initialize the game environment s = minerEnv.get_state() #Get the state after reseting. #This function (get_state()) is an example of creating a state for the DQN model total_reward = 0 #The amount of rewards for the entire episode terminate = False #The variable indicates that the episode ends maxStep = minerEnv.state.mapInfo.maxStep #Get the maximum number of steps for each episode in training #Start an episde for training for step in range(0, maxStep): action = DQNAgent.act( s) # Getting an action from the DQN model from the state (s) minerEnv.step( str(action) ) # Performing the action in order to obtain the new state s_next = minerEnv.get_state() # Getting a new state reward = minerEnv.get_reward() # Getting a reward terminate = minerEnv.check_terminate( ) # Checking the end status of the episode # Add this transition to the memory batch memory.push(s, action, reward, terminate, s_next) # Sample batch memory to train network if (memory.length > INITIAL_REPLAY_SIZE): #If there are INITIAL_REPLAY_SIZE experiences in the memory batch #then start replaying
total_reward = 0 #The amount of rewards for the entire episode terminate = False #The variable indicates that the episode ends maxStep = minerEnv.state.mapInfo.maxStep #Get the maximum number of steps for each episode in training average_loss = 0 #Here: maxStep =100 line 64 GAME_SOCKET_DUMMY #Start an episde for training for step in range(0, maxStep): #s.shape =(MAP_MAX_X, MAP_MAX_Y) moi phan tu la gia tri score VD 0, -1, 100, 250, -3 if args.load_model != "": action = np.argmax(DQNAgent.model.predict(s.reshape(1,len(s)))) else: # _id = np.random.randint(1,5) # if _id == 1: # action = DQNAgent.act(s) # Getting an action from the DQN model from the state (s) # else: action = bots[_id-2].act_sample(s) action = DQNAgent.act(s) #action int, VD action =3 minerEnv.step(str(action)) # Performing the action in order to obtain the new state s_next = minerEnv.get_state2(limit) # Getting a new state #s_next tuong tu s reward = minerEnv.get_reward() # Getting a reward #reward int, VD 0 terminate = minerEnv.check_terminate() # Checking the end status of the episode #terminal True or False # Add this transition to the memory batch memory.push(s, action, reward, terminate, s_next) # Sample batch memory to train network if (memory.length > INITIAL_REPLAY_SIZE): #If there are INITIAL_REPLAY_SIZE experiences in the memory batch #then start replaying