def test(rank, args, shared_model, counter): torch.manual_seed(args.seed + rank) FloatTensor = torch.cuda.FloatTensor if args.use_cuda else torch.FloatTensor DoubleTensor = torch.cuda.DoubleTensor if args.use_cuda else torch.DoubleTensor ByteTensor = torch.cuda.ByteTensor if args.use_cuda else torch.ByteTensor env = create_mario_env(args.env_name) """ need to implement Monitor wrapper with env.change_level """ # expt_dir = 'video' # env = wrappers.Monitor(env, expt_dir, force=True, video_callable=lambda count: count % 10 == 0) env.seed(args.seed + rank) model = ActorCritic(env.observation_space.shape[0], len(ACTIONS)) if args.use_cuda: model.cuda() model.eval() state = env.reset() state = torch.from_numpy(state) reward_sum = 0 done = True savefile = os.getcwd() + '/save/mario_curves.csv' title = ['Time', 'No. Steps', 'Total Reward', 'Episode Length'] with open(savefile, 'a', newline='') as sfile: writer = csv.writer(sfile) writer.writerow(title) start_time = time.time() # a quick hack to prevent the agent from stucking actions = deque(maxlen=4000) episode_length = 0 while True: episode_length += 1 ep_start_time = time.time() # Sync with the shared model if done: model.load_state_dict(shared_model.state_dict()) cx = Variable(torch.zeros(1, 512), volatile=True).type(FloatTensor) hx = Variable(torch.zeros(1, 512), volatile=True).type(FloatTensor) else: cx = Variable(cx.data, volatile=True).type(FloatTensor) hx = Variable(hx.data, volatile=True).type(FloatTensor) state_inp = Variable(state.unsqueeze(0), volatile=True).type(FloatTensor) value, logit, (hx, cx) = model((state_inp, (hx, cx))) prob = F.softmax(logit, dim=-1) action = prob.max(-1, keepdim=True)[1].data action_out = ACTIONS[action][0, 0] # print("Process: Test Action: {}".format(str(action_out))) state, reward, done, _ = env.step(action_out) env.render() done = done or episode_length >= args.max_episode_length reward_sum += reward # a quick hack to prevent the agent from stucking actions.append(action[0, 0]) if actions.count(actions[0]) == actions.maxlen: done = True if done: print( "Time {}, num steps {}, FPS {:.0f}, episode reward {}, episode length {}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), counter.value, counter.value / (time.time() - start_time), reward_sum, episode_length)) data = [ time.time() - ep_start_time, counter.value, reward_sum, episode_length ] with open(savefile, 'a', newline='') as sfile: writer = csv.writer(sfile) writer.writerows([data]) reward_sum = 0 episode_length = 0 actions.clear() time.sleep(60) env.locked_levels = [False] + [True] * 31 env.change_level(0) state = env.reset() state = torch.from_numpy(state)
def test(rank, args, shared_model, counter): torch.manual_seed(args.seed + rank) FloatTensor = torch.FloatTensor# torch.cuda.FloatTensor if args.use_cuda else torch.FloatTensor DoubleTensor = torch.DoubleTensor# torch.cuda.DoubleTensor if args.use_cuda else torch.DoubleTensor ByteTensor = torch.ByteTensor# torch.cuda.ByteTensor if args.use_cuda else torch.ByteTensor env = create_mario_env(args.env_name, args.reward_type) """ need to implement Monitor wrapper with env.change_level """ # expt_dir = 'video' # env = wrappers.Monitor(env, expt_dir, force=True, video_callable=lambda count: count % 10 == 0) #env.seed(args.seed + rank) model = ActorCritic(env.observation_space.shape[0], len(ACTIONS)) model.eval() state = env.reset() state = torch.from_numpy(state) reward_sum = 0 done = True savefile = os.getcwd() + '/save/scmemi_'+ args.reward_type +'/mario_curves.csv' title = ['Time','No. Steps', 'Total Reward', 'final_position', 'Episode Length'] with open(savefile, 'a', newline='') as sfile: writer = csv.writer(sfile) writer.writerow(title) start_time = time.time() # a quick hack to prevent the agent from stucking actions = deque(maxlen=400) positions = deque(maxlen=400) episode_length = 0 while True: episode_length += 1 ep_start_time = time.time() # Sync with the shared model if done: model.load_state_dict(shared_model.state_dict()) cx = Variable(torch.zeros(1, 512), requires_grad=True ).type(FloatTensor) with torch.no_grad(): cx=cx hx = Variable(torch.zeros(1, 512), requires_grad=True).type(FloatTensor) with torch.no_grad(): hx=hx else: with torch.no_grad(): cx = Variable(cx.data).type(FloatTensor) hx = Variable(hx.data).type(FloatTensor) with torch.no_grad(): state_inp = Variable(state.unsqueeze(0)).type(FloatTensor) value, logit, (hx, cx) = model((state_inp, (hx, cx))) prob = F.softmax(logit, dim=-1) action = prob.max(-1, keepdim=True)[1].data action_out = int(action[0, 0].data.numpy()) state, reward, done, info = env.step(action_out) #env.render() done = done or episode_length >= args.max_episode_length reward_sum += reward # a quick hack to prevent the agent from stucking actions.append(action[0, 0]) if actions.count(actions[0]) == actions.maxlen: done = True print('action') if args.pos_stuck : positions.append(info['x_pos']) pos_ar = np.array(positions) if (len(positions) >= 200) and (pos_ar < pos_ar[-1] + 20).all() and (pos_ar > pos_ar[-1] - 20).all(): done = True if done: print("Time {}, num steps {}, FPS {:.0f}, episode reward {:.3f}, distance covered {:.3f}, episode length {}".format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), counter.value, counter.value / (time.time() - start_time), reward_sum, info['x_pos']/x_norm, episode_length)) data = [time.time() - ep_start_time, counter.value, reward_sum, info['x_pos']/x_norm, episode_length] with open(savefile, 'a', newline='') as sfile: writer = csv.writer(sfile) writer.writerows([data]) reward_sum = 0 episode_length = 0 actions.clear() positions.clear() time.sleep(60) # env.locked_levels = [False] + [True] * 31 # env.change_level(0) state = env.reset() state = torch.from_numpy(state)