update_times += 1 ### copy value net parameters to target net ### if update_times % NET_COPY_STEP == 0: target_net.load_state_dict(value_net.state_dict()) torch.save(value_net.state_dict(), PATH) if __name__ == '__main__': # set up matplotlib is_ipython = 'inline' in matplotlib.get_backend() if is_ipython: from IPython import display plt.ion() learn(env=env.Env(), MAX_EPISODE=2000000, EPS_START=0.9, EPS_END=0.05, EPS_DECAY=200, ACTION_NUM=6, REPLAY_MEMORY_CAPACITY=10000, BATCH_SIZE=32, LOSS_FUNCTION=nn.SmoothL1Loss, OPTIM_METHOD=optim.Adam, LEARNING_RATE=1e-4, GAMMA=0.99, NET_COPY_STEP=1000, OBSERVE=10000, TRAIN_FREQ=4, PATH='net_param.pt')
# -*- coding: utf-8 -*- from data import env import cv2 env = env.Env() dis = 0.9 REPLAY_MEMORY = 50000 def main(): max_episodes = 1500 for episode in range(max_episodes): done = False step_count = 0 env.reset() obs, _, _, _, _, _, _ = env.step(0) cv2.imshow('mario', obs) cv2.waitKey(0) cv2.destroyAllWindows() while not done: state, reward, done, s4, s5, s6, s7 = env.step(11) # 0 next_state:{ndarray} shape (90,90) # 1 reward:{int} # 2 done:{bool} # 3 state_clear:{bool} # 4 max_x:{int} # 5 time_out:{bool} # 6 now_x:{int}