Esempio n. 1
0
    def __init__(self):
        self.env = Tetris()
        # self.num_states = self.env.observation_space.shape[0]
        self.num_states = 211  # <= 20 * 11 + 1(降ってきているブロック)
        # self.num_actions = self.env.action_space.n
        self.num_actions = 40  # <= [north, east, south, west] * 10

        self.agent = Agent(self.num_states, self.num_actions)
Esempio n. 2
0
def test(opt):
    '''This function is used for testing the trained model '''
    if torch.cuda.is_available(
    ):  # torch.cuda is used for computational perpose and the .availiable() shows if the system supports cuda
        torch.cuda.manual_seed(
            125
        )  # the torch cuda manual seed is used in order to have reproducable results
    else:
        torch.manual_seed(125)  # sets the random number generator from pytorch
    if torch.cuda.is_available(
    ):  # torch.cuda is used for computational perpose and the .availiable() shows if the system supports cuda
        testing_model = torch.load(
            "{}/tetris".format(opt.saved_path)
        )  # loads the tetris model from the saved path on the trained models folder
    else:
        testing_model = torch.load(
            "{}/tetris".format(opt.saved_path),
            map_location=lambda storage, loc: storage
        )  # loads the tetris model from the saved path on the trained models folder the map location is set to lambda and the location is the storage which is the last checkpoint saved on the gpu device
    testing_model.eval()  # sets the testing model to evaluation mode
    environment = Tetris(
        width=opt.width, height=opt.height, block_size=opt.block_size
    )  # sets the environment to the tetris environment that was created before with the width from the parser the height from the parser and the block size from the parser
    environment.reset()  # resets the environment
    if torch.cuda.is_available(
    ):  # torch.cuda is used for computational perpose and the .availiable() shows if the system supports cuda
        testing_modelmodel.cuda(
        )  # sets the .cuda() to the testing model to keep track of the gpu
    output_testing_video = cv2.VideoWriter(
        opt.result, cv2.VideoWriter_fourcc(*'FMP4'), opt.fps,
        (int(1.5 * opt.width * opt.block_size), opt.height * opt.block_size)
    )  # here i ouput a video during testing by using the open cv video writer with the result from the parser and i set the format as fmp4 and also i took the fps from the parser and 1.5* width and the block size from the parser.
    while True:
        next_steps = environment.get_next_states(
        )  # next steps are set to the environment next states
        next_actions, next_states = zip(*next_steps.items(
        ))  # next actions and next states are zipped together as a tuple
        next_states = torch.stack(
            next_states
        )  # next states are set to the cocatenates of the next states to a new dimension
        if torch.cuda.is_available(
        ):  # torch.cuda is used for computational perpose and the .availiable() shows if the system supports cuda
            next_states = next_states.cuda(
            )  # sets the .cuda() to the next states to keep track of the gpu
        preds = testing_model(
            next_states
        )[:, 0]  # the predictios are set to the testing model next states
        idx = torch.argmax(preds).item()  #index set the maximum predictions
        a = next_actions[idx]  # a is the next actions index
        _, done = environment.make_step(
            a, cv2_rend=True, video=output_testing_video
        )  # this is called by itself because of '_,' and done is equal to the environemnt of a, the open cv2 render is true to have a visual look on the game and the video is set to the output testing video
        if done:
            output_testing_video.release(
            )  # outpus the testing results as a video when its done
            break  # stops
Esempio n. 3
0
class Environment:
    def __init__(self):
        self.env = Tetris()
        # self.num_states = self.env.observation_space.shape[0]
        self.num_states = 211  # <= 20 * 11 + 1(降ってきているブロック)
        # self.num_actions = self.env.action_space.n
        self.num_actions = 40  # <= [north, east, south, west] * 10

        self.agent = Agent(self.num_states, self.num_actions)

    def run(self):
        episode_10_list = np.zeros(10)

        complete_episodes = 0  # scoreがTARGET_SCORE以上になった試行数
        episode_final = False
        # frames = []

        for episode in range(NUM_EPISODES):
            am, field = self.env.reset()
            observation = make_observation(am, field)
            state = torch.from_numpy(observation).type(torch.FloatTensor)
            state = torch.unsqueeze(state, 0)

            sum_reward = 0
            sum_density_reward = 0

            for step in range(MAX_STEPS):
                action = self.agent.get_action(state, episode)
                am, field, step_reward, done = self.env.step(action.item())
                observation_next = make_observation(am, field)

                real_score = step_reward // 1
                sum_reward += real_score
                sum_density_reward += step_reward - real_score

                if done:
                    state_next = None

                    episode_10_list = np.hstack(
                        (episode_10_list[1:], sum_reward))

                    if step < (MAX_STEPS - 1):  # 窒息死
                        reward = torch.FloatTensor([-10.0])
                    else:
                        reward = torch.FloatTensor([float(step_reward)
                                                    ])  # 報酬はいつも通り

                    if sum_reward < TARGET_SCORE:
                        complete_episodes = 0
                    else:
                        complete_episodes += 1

                else:
                    reward = torch.FloatTensor([float(step_reward)])
                    state_next = torch.from_numpy(observation_next).type(
                        torch.FloatTensor)
                    state_next = torch.unsqueeze(state_next, 0)

                self.agent.memorize(state, action, state_next, reward)
                self.agent.update_q_function()
                state = state_next

                if done:
                    print(
                        f"{episode} Episode: Finished with score {sum_reward} ; density_score {sum_density_reward / 14} : 10試行の平均SCORE = {episode_10_list.mean():.1f}"
                    )
                    break

            if episode_final is True:
                print("episode final's score:", sum_reward)
                break

            if complete_episodes >= 10:
                print(f"10回連続{TARGET_SCORE}点越え")
                episode_final = True
Esempio n. 4
0
def train(opt):
    '''This function is for the training '''
    if torch.cuda.is_available(
    ):  # torch.cuda is used for computational perpose and the .availiable() shows if the system supports cuda
        torch.cuda.manual_seed(
            125
        )  # the torch cuda manual seed is used in order to have reproducable results
    else:
        torch.manual_seed(125)  # sets the random number generator from pytorch
    if os.path.isdir(
            opt.log_path):  # check if the path is the path that is stored
        shutil.rmtree(
            opt.log_path)  # delets all the content from the lo_path dirfectory
    os.makedirs(opt.log_path
                )  # create a new path directory and store it to the log_path
    new_writer2 = SummaryWriter(
        opt.log_path)  # create a new summary writer with the log path
    environment = Tetris(
        width=opt.width, height=opt.height, block_size=opt.block_size
    )  # sets the environment to the tetris environment that i have created before with with width, the height and the the block size  from the parser.
    deepQ_model = DeepQNetwork(
    )  # the model is set to the deep q network that was created before
    my_optim = torch.optim.Adam(
        deepQ_model.parameters(), lr=opt.lr
    )  # sets the optimizaer with the algorith Adamn and the deep q model paramets and the learning rate from the parser
    cn = nn.MSELoss(
    )  # this is the default as ((input-target)**2).mean() but with pytorch it gets easier
    state = environment.reset(
    )  # the state is equal to a new reset environment
    if torch.cuda.is_available(
    ):  # torch.cuda is used for computational perpose and the .availiable() shows if the system supports cuda
        deepQ_model.cuda(
        )  # sets the .cuda() to the deep q learning model to keep track of the gpu
        state = state.cuda(
        )  # sets the .cuda() to the state to keep track of the gpu
    r_memory = deque(
        maxlen=opt.mem_size
    )  #adds the removed elements to the r_memory. In that case the removed element is the memory sizy from the parser
    epoch = 0  #the epoch is set to 0
    output_training_video = cv2.VideoWriter(
        opt.result, cv2.VideoWriter_fourcc(*'FMP4'), opt.fps,
        (int(1.5 * opt.width * opt.block_size), opt.height * opt.block_size))
    while epoch < opt.num_epochs:  # loops until the epoch is less than the number of epochs from the parser

        next_steps = environment.get_next_states(
        )  # the next steps are set to the environment next states
        epsilon = opt.finalEpsilon + (
            max(opt.decay_epochs - epoch, 0) *
            (opt.initialEpsilon - opt.finalEpsilon) / opt.decay_epochs
        )  # this is for exploration. The epsilon is the final epsilon value from the parser + the max decay epochs - epoch and 0 * with the initial epsilon from the parser - the final epsilon / by the number of decay epochs.
        pp = random()  # pp is a random
        rand_action = pp <= epsilon  # random action is equal to the pp less than the epsilon
        nextActions, next_states = zip(
            *next_steps.items()
        )  # next action and next states are equal to a series of tuples of the next steps
        next_states = torch.stack(
            next_states
        )  # next states are set to the cocatenates of the next states to a new dimension
        if torch.cuda.is_available(
        ):  # torch.cuda is used for computational perpose and the .availiable() shows if the system supports cuda
            next_states = next_states.cuda(
            )  # sets the .cuda() to the next states to keep track of the gpu
        deepQ_model.eval(
        )  # this pytorch function sets the model to evaluation mode while testing
        with torch.no_grad(
        ):  # torch. no_grad() is used to deactive the autograd egnine which reduces memory usage and speed up
            dqm_p = deepQ_model(
                next_states
            )[:, 0]  # press is set to the deepq model with the next states
        deepQ_model.train()  # trains the deep q model
        if rand_action:  # if the action is random
            idx = randint(
                0,
                len(next_steps) - 1
            )  # the index is set to the random of the length of the next steps -1
        else:
            idx = torch.argmax(
                dqm_p).item()  #index set the maximum values of dqm_p
        next_state = next_states[
            idx, :]  # the next state is equal to the next states index
        action = nextActions[idx]  #action is set the next actions index
        reward, done = environment.make_step(
            action, cv2_rend=True
        )  # the reword and done is set to the environment with the action and the open cv render which is the environment for visualization
        if torch.cuda.is_available(
        ):  # torch.cuda is used for computational perpose and the .availiable() shows if the system supports cuda
            next_state = next_state.cuda(
            )  # sets the .cuda() to the next state to keep track of the gpu
        r_memory.append([
            state, reward, next_state, done
        ])  # appends the r memory with the state reward next state and done
        if done:  # if its done
            output_training_video.release()
            episode_durations.append(epoch + 1)
            #plot_durations()
            final_total_score = environment.player_score  # the final total score is equal to the environments players score
            tot_reward.append(final_total_score)
            plot_reward()
            final_total_blocks = environment.tetris_blocks  # the final total blocks are equal to the environments tetris blocks
            final_total_completed_lines = environment.completed_lines  # the final total completed lines are equal to the environments completed lines
            state = environment.reset(
            )  # state is equal to a new environment (rest)
            if torch.cuda.is_available(
            ):  # torch.cuda is used for computational perpose and the .availiable() shows if the system supports cuda
                state = state.cuda(
                )  # sets the .cuda() to the state to keep track of the gpu
        else:
            state = next_state  # the state is equal to the next state
            continue
        if len(
                r_memory
        ) < opt.mem_size / 10:  # if the length of the r memory is less than the parsers memory size / 10
            continue  # continues
        epoch += 1  # increments epoch +1
        batch = sample(
            r_memory, min(len(r_memory), opt.mini_batch_size)
        )  # the batch is set to the sample of the r memory the minimum length of the r memory and the mini batch size from the parser
        stateBatch, batchReward, nextB_state, completed_batch = zip(
            *batch
        )  # the statebatch, the batch reward the next state and the completed batch are all zipped all together to a tuple
        stateBatch = torch.stack(
            tuple(state for state in stateBatch)
        )  # the state batch is equal to the  to the cocatenates as a tuple of the states
        batchReward = torch.from_numpy(
            np.array(batchReward, dtype=np.float32)[:, None]
        )  # the batch reward is equal to a numpy ndarray of the batch reward as a float
        nextB_state = torch.stack(
            tuple(state for state in nextB_state)
        )  # the nextB state is equal to the cocatenates as a tuple of the states
        if torch.cuda.is_available(
        ):  # torch.cuda is used for computational perpose and the .availiable() shows if the system supports cuda
            stateBatch = stateBatch.cuda(
            )  # sets the .cuda() to the state batch to keep track of the gpu
            batchReward = batchReward.cuda(
            )  # sets the .cuda() to the batch reward to keep track of the gpu
            nextB_state = nextB_state.cuda(
            )  # sets the .cuda() to the nextB state to keep track of the gpu
        q_values = deepQ_model(
            stateBatch)  # the q values are equal to the models's state batch
        deepQ_model.eval()  # sets the model to evaluation mode for testing
        with torch.no_grad(
        ):  # torch. no_grad() is used to deactive the autograd egnine which reduces memory usage and speed up
            nextPred_batch = deepQ_model(
                nextB_state
            )  # the next predi batch is equal to the models's nextB state
        deepQ_model.train()  # sets the model to training mode
        batch_Y = torch.cat(
            tuple(reward if done else reward + opt.gamma * prediction
                  for reward, done, prediction in zip(
                      batchReward, completed_batch, nextPred_batch))
        )[:,
          None]  #  Loops in the zip tuple of batch rewards completed batches and next pred batch and if the batch of Y is equal to a oncatenated tuple of the reward. If its not done the reward + the gamma from the parser * the predictions are stored to the batch Y.
        my_optim.zero_grad(
        )  # the gradients of the optimizer are set to zero at the begining of the mini batch
        loss = cn(q_values,
                  batch_Y)  # the loss is equal to the q values and the batch y
        loss.backward(
        )  # computes dloss/dx for every parameter x which has requires the grad = True
        my_optim.step(
        )  #performs a parameter update on the optimzier based on the current gradient
        print(
            "Epoch Num: {}/{}, Action: {}, Score: {}, TPieces {}, Cleared lines: {}"
            .format(epoch, opt.num_epochs, action, final_total_score,
                    final_total_blocks, final_total_completed_lines)
        )  # prints the epoch number the action the final total score the final total blocks and the final completed lines for every epoch during training
        new_writer2.add_scalar(
            'Train/Score', final_total_score, epoch - 1
        )  # creates a summury scaler using tensorflow for the train score which gets the final total score and the step which is epoch -1
        new_writer2.add_scalar(
            'Train/TPieces', final_total_blocks, epoch - 1
        )  # creates a summury scaler using tensorflow for the train TPieces which gets the final total blocks and the step which is epoch -1
        new_writer2.add_scalar(
            'Train/Cleared lines', final_total_completed_lines, epoch - 1
        )  # creates a summury scaler using tensorflow for the train cleared lines which gets the final total completed lines and the step which is epoch -1
        if epoch > 0 and epoch % opt.store_interval == 0:  # if the epoch is greater than 0 and the epoch % the stored interval is equal to 0
            torch.save(
                deepQ_model, "{}/tetris_{}".format(opt.saved_path, epoch)
            )  # the trained model and epochsis saved to the saved path which is the trained models folder.
    torch.save(
        deepQ_model, "{}/tetris".format(opt.saved_path)
    )  # saves the trained model to the saved path from the parser which is the trained models folder