Esempio n. 1
0
            update_times += 1
            ### copy value net parameters to target net ###
            if update_times % NET_COPY_STEP == 0:
                target_net.load_state_dict(value_net.state_dict())

    torch.save(value_net.state_dict(), PATH)


if __name__ == '__main__':
    # set up matplotlib
    is_ipython = 'inline' in matplotlib.get_backend()
    if is_ipython:
        from IPython import display

    plt.ion()
    learn(env=env.Env(),
          MAX_EPISODE=2000000,
          EPS_START=0.9,
          EPS_END=0.05,
          EPS_DECAY=200,
          ACTION_NUM=6,
          REPLAY_MEMORY_CAPACITY=10000,
          BATCH_SIZE=32,
          LOSS_FUNCTION=nn.SmoothL1Loss,
          OPTIM_METHOD=optim.Adam,
          LEARNING_RATE=1e-4,
          GAMMA=0.99,
          NET_COPY_STEP=1000,
          OBSERVE=10000,
          TRAIN_FREQ=4,
          PATH='net_param.pt')
Esempio n. 2
0
# -*- coding: utf-8 -*-

from data import env
import cv2

env = env.Env()

dis = 0.9
REPLAY_MEMORY = 50000


def main():

    max_episodes = 1500
    for episode in range(max_episodes):
        done = False
        step_count = 0
        env.reset()
        obs, _, _, _, _, _, _ = env.step(0)
        cv2.imshow('mario', obs)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        while not done:
            state, reward, done, s4, s5, s6, s7 = env.step(11)
            # 0 next_state:{ndarray} shape (90,90)
            # 1 reward:{int}
            # 2 done:{bool}
            # 3 state_clear:{bool}
            # 4 max_x:{int}
            # 5 time_out:{bool}
            # 6 now_x:{int}