Exemple #1
0
        #Creating a request for initializing a map, initial position, the initial energy, and the maximum number of steps of the DQN agent
        request = ("map" + str(mapID) + "," + str(posID_x) + "," +
                   str(posID_y) + ",50,100")
        #Send the request to the game environment (GAME_SOCKET_DUMMY.py)
        minerEnv.send_map_info(request)

        # Getting the initial state
        minerEnv.reset()  #Initialize the game environment
        s = minerEnv.get_state()  #Get the state after reseting.
        #This function (get_state()) is an example of creating a state for the DQN model
        total_reward = 0  #The amount of rewards for the entire episode
        terminate = False  #The variable indicates that the episode ends
        maxStep = minerEnv.state.mapInfo.maxStep  #Get the maximum number of steps for each episode in training
        #Start an episde for training
        for step in range(0, maxStep):
            action = DQNAgent.act(
                s)  # Getting an action from the DQN model from the state (s)
            minerEnv.step(
                str(action)
            )  # Performing the action in order to obtain the new state
            s_next = minerEnv.get_state()  # Getting a new state
            reward = minerEnv.get_reward()  # Getting a reward
            terminate = minerEnv.check_terminate(
            )  # Checking the end status of the episode

            # Add this transition to the memory batch
            memory.push(s, action, reward, terminate, s_next)

            # Sample batch memory to train network
            if (memory.length > INITIAL_REPLAY_SIZE):
                #If there are INITIAL_REPLAY_SIZE experiences in the memory batch
                #then start replaying
        total_reward = 0 #The amount of rewards for the entire episode
        terminate = False #The variable indicates that the episode ends
        maxStep = minerEnv.state.mapInfo.maxStep #Get the maximum number of steps for each episode in training
        average_loss = 0
        #Here: maxStep =100 line 64 GAME_SOCKET_DUMMY
        #Start an episde for training
        for step in range(0, maxStep):
            #s.shape =(MAP_MAX_X, MAP_MAX_Y) moi phan tu la gia tri score VD 0, -1, 100, 250, -3
            if args.load_model != "":
                action = np.argmax(DQNAgent.model.predict(s.reshape(1,len(s))))
            else:
                # _id = np.random.randint(1,5)
                # if _id == 1:
                #     action = DQNAgent.act(s)  # Getting an action from the DQN model from the state (s)
                # else: action = bots[_id-2].act_sample(s)
                action = DQNAgent.act(s)
            #action int, VD action =3
            minerEnv.step(str(action))  # Performing the action in order to obtain the new state
            s_next = minerEnv.get_state2(limit)  # Getting a new state
            #s_next tuong tu s
            reward = minerEnv.get_reward()  # Getting a reward
            #reward int, VD 0
            terminate = minerEnv.check_terminate()  # Checking the end status of the episode
            #terminal True or False
            # Add this transition to the memory batch
            memory.push(s, action, reward, terminate, s_next)

            # Sample batch memory to train network
            if (memory.length > INITIAL_REPLAY_SIZE):
                #If there are INITIAL_REPLAY_SIZE experiences in the memory batch
                #then start replaying