parser = argparse.ArgumentParser() parser.add_argument("-d", "--demo", required=True, help="Dir name to scan for demos") parser.add_argument("-e", "--env", default=DEFAULT_ENV, help="Environment name to load, default=" + DEFAULT_ENV) parser.add_argument("-o", "--output", required=True, help="Output prefix to save images") args = parser.parse_args() demo = vnc_demo.load_demo(args.demo, args.env, read_text=True) print("Loaded %d demo samples" % len(demo)) env = gym.make(args.env) env = universe.wrappers.experimental.SoftmaxClickMouse(env) for idx, (obs, action) in enumerate(demo): fname = "%s_%04d.png" % (args.output, idx) action_coords = env._points[action] img, text = obs wob_vnc.save_obs(img, fname, action_coords) print(fname, text) pass
wob_vnc.configure(env, REMOTE_ADDR) net = model_vnc.ModelMultimodal(input_shape=wob_vnc.WOB_SHAPE, n_actions=env.action_space.n) if args.model: net.load_state_dict(torch.load(args.model)) preprocessor = model_vnc.MultimodalPreprocessor.load(args.model[:-4] + ".pre") else: preprocessor = model_vnc.MultimodalPreprocessor() env.reset() for round_idx in range(args.count): action = env.action_space.sample() step_idx = 0 while True: obs, reward, done, info, idle_count = step_env(env, action) print(step_idx, reward, done, idle_count) img_name = "%s_r%02d_s%04d_%.3f_i%02d_d%d.png" % ( args.name, round_idx, step_idx, reward, idle_count, int(done)) obs_v = preprocessor([obs]) logits_v = net(obs_v)[0] policy = F.softmax(logits_v).data.numpy()[0] action = np.random.choice(len(policy), p=policy) wob_vnc.save_obs(obs[0], img_name, action=action) step_idx += 1 if done or reward != 0: print("Round %d done" % round_idx) break pass
env = universe.wrappers.experimental.SoftmaxClickMouse(env) env = wob_vnc.MiniWoBCropper(env, keep_text=True) wob_vnc.configure(env, REMOTE_ADDR) net = model_vnc.ModelMultimodal(input_shape=wob_vnc.WOB_SHAPE, n_actions=env.action_space.n) if args.model: net.load_state_dict(torch.load(args.model)) preprocessor = model_vnc.MultimodalPreprocessor.load(args.model[:-4] + ".pre") else: preprocessor = model_vnc.MultimodalPreprocessor() env.reset() for round_idx in range(args.count): action = env.action_space.sample() step_idx = 0 while True: obs, reward, done, info, idle_count = step_env(env, action) print(step_idx, reward, done, idle_count) img_name = "%s_r%02d_s%04d_%.3f_i%02d_d%d.png" % ( args.name, round_idx, step_idx, reward, idle_count, int(done)) obs_v = preprocessor([obs]) logits_v = net(obs_v)[0] policy = F.softmax(logits_v, dim=1).data.numpy()[0] action = np.random.choice(len(policy), p=policy) wob_vnc.save_obs(obs[0], img_name, action=action) step_idx += 1 if done or reward != 0: print("Round %d done" % round_idx) break pass
discr2 = vnc_demo.mouse_to_action(x, y) assert discr == discr2 pass if __name__ == "__main__": # test_mouse_coords() parser = argparse.ArgumentParser() parser.add_argument("-d", "--demo", required=True, help="Dir name to scan for demos") parser.add_argument("-e", "--env", default=DEFAULT_ENV, help="Environment name to load, default=" + DEFAULT_ENV) parser.add_argument("-o", "--output", required=True, help="Output prefix to save images") args = parser.parse_args() demo = vnc_demo.load_demo(args.demo, args.env, read_text=True) print("Loaded %d demo samples" % len(demo)) env = gym.make(args.env) env = universe.wrappers.experimental.SoftmaxClickMouse(env) for idx, (obs, action) in enumerate(demo): fname = "%s_%04d.png" % (args.output, idx) action_coords = env._points[action] img, text = obs wob_vnc.save_obs(img, fname, action_coords) print(fname, text) pass