# if done:
                #     print('Ep: ', i_episode,
                #           '| Ep_r: ', round(ep_r, 2))


            # ----------  importrant data ----------------------
            # ------------- r_d and r_p

            # print('r: {:5.1f}, steps: {:3}, r_d: {:5.2f}, r_p: {:5.2f}, d/p: {:-8.3f}'.format( r, steps, r_d, r_p, d_p))

            if (done or i == MAX_STEP - 1):

                step = i + 1
                model.Rewards.append(ep_r)

                f = round(t1.now() / step, 3)
                print(
                    'episode: {:4d}, reward: {:5.2f}, r_d: {:5.2f}, r_p: {:5.2f}, steps: {:-6d}, fresh_speed: {}'.format(
                        i_episode,
                        round(ep_r, 3),
                        sum(env.rewards_d), sum(env.rewards_p),
                        steps,
                        f,
                    ))
                # r = arg.reward_done
                # position_ = (0, 0)
                # s_ = preprocess_state(s_, position_, env)
                # if (arg.show_pre_image and cv_img(s_[-1])):    break_flag = 1; break
                # tt.sleep(0.5)
                break
            s = s_
Exemple #2
0
                # print('----------------------------- learn ------------------', model.learn_step_counter)
                # if done:
                #     print('Ep: ', i_episode,
                #           '| Ep_r: ', round(ep_r, 2))

            # ----------  importrant data ----------------------
            # ------------- r_d and r_p

            # print('r: {:5.1f}, steps: {:3}, r_d: {:5.2f}, r_p: {:5.2f}, d/p: {:-8.3f}'.format( r, steps, r_d, r_p, d_p))

            if (done or i == MAX_STEP - 1):

                step = i + 1
                model.Rewards.append(ep_r)

                f = round(t1.now() / step, 3)
                print(
                    'episode: {:4d}, reward: {:5.2f}, r_d: {:5.2f}, r_p: {:5.2f}, steps: {:-6d}, fresh_speed: {}'
                    .format(
                        i_episode,
                        round(ep_r, 3),
                        sum(env.rewards_d),
                        sum(env.rewards_p),
                        steps,
                        f,
                    ))
                # r = arg.reward_done
                # position_ = (0, 0)
                # s_ = preprocess_state(s_, position_, env)
                # if (arg.show_pre_image and cv_img(s_[-1])):    break_flag = 1; break
                # tt.sleep(0.5)
    screen = torch.from_numpy(screen)
    screen = screen.unsqueeze(0)
    screen_np = screen.cpu().squeeze(0).permute(1, 2, 0).numpy()
    screen.shape
    screen_np.shape
    # import matplotlib.pyplot as plt
    #
    # plt.imshow(screen_np)
    # plt.show()

    dqn = DQN(h, w, 2)  # Height * Width , n_action

    tt = Time()
    for i in range(100):
        dqn.forward(screen)
    print(tt.now())

    screen.shape
    xx = torch.unsqueeze(screen, 0)
    torch.cat([screen, screen], -1).shape

    l_screen = []
    for i in range(100):
        l_screen.append(screen)
    end = []
    for screen0 in l_screen:
        screen0 = screen0.unsqueeze(0)
        try:
            end = torch.cat([end, screen0])
        except:
            end = screen0