def main(): parser = argparse.ArgumentParser(description=None) parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') args = parser.parse_args() if args.verbosity == 0: logger.setLevel(logging.INFO) elif args.verbosity >= 1: logger.setLevel(logging.DEBUG) env = gym.make('wob.mini.ClickDialog-v0') env = jiminy.wrappers.experimental.SoftmaxClickMouse(env) env = wob_vnc.MiniWoBCropper(env) wob_vnc.configure(env, wob_vnc.remotes_url(port_ofs=0, hostname='0.0.0.0', count=REMOTES_COUNT) ) # automatically creates a local docker container observation_n = env.reset() idx = 0 while True: # your agent here # # Try sending this instead of a random action: ('KeyEvent', 'ArrowUp', True) action_n = [env.action_space.sample() for ob in observation_n] observation_n, reward_n, done_n, info = env.step(action_n) print("idx: {}, reward: {}".format(idx * REMOTES_COUNT, reward_n)) idx += 1 return 0
writer = SummaryWriter(comment="-wob_click_" + name) saves_path = os.path.join(SAVES_DIR, name) os.makedirs(saves_path, exist_ok=True) demo_samples = None if args.demo: demo_samples = vnc_demo.load_demo(args.demo, env_name) if not demo_samples: demo_samples = None else: print("Loaded %d demo samples, will use them during training" % len(demo_samples)) env = gym.make(env_name) env = universe.wrappers.experimental.SoftmaxClickMouse(env) env = wob_vnc.MiniWoBCropper(env) wob_vnc.configure(env, wob_vnc.remotes_url(port_ofs=args.port_ofs, hostname=args.host, count=REMOTES_COUNT)) net = model_vnc.Model(input_shape=wob_vnc.WOB_SHAPE, n_actions=env.action_space.n) if args.cuda: net.cuda() print(net) optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3) agent = ptan.agent.PolicyAgent(lambda x: net(x)[0], cuda=args.cuda, apply_softmax=True) exp_source = ptan.experience.ExperienceSourceFirstLast( [env], agent, gamma=GAMMA, steps_count=REWARD_STEPS, vectorized=True) best_reward = None with common.RewardTracker(writer) as tracker: with ptan.common.utils.TBMeanTracker(writer, batch_size=10) as tb_tracker:
env_name = args.env if not env_name.startswith('wob.mini.'): env_name = "wob.mini." + env_name name = env_name.split('.')[-1] + "_" + args.name writer = SummaryWriter(comment="-wob_click_mm_" + name) saves_path = os.path.join(SAVES_DIR, name) os.makedirs(saves_path, exist_ok=True) env = gym.make(env_name) env = universe.wrappers.experimental.SoftmaxClickMouse(env) env = wob_vnc.MiniWoBCropper(env, keep_text=True) wob_vnc.configure( env, wob_vnc.remotes_url(port_ofs=args.port_ofs, hostname=REMOTES_HOST, count=REMOTES_COUNT)) net = model_vnc.ModelMultimodal(input_shape=wob_vnc.WOB_SHAPE, n_actions=env.action_space.n) if args.cuda: net.cuda() print(net) optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3) preprocessor = model_vnc.MultimodalPreprocessor() agent = ptan.agent.PolicyAgent(lambda x: net(x)[0], cuda=args.cuda, apply_softmax=True, preprocessor=preprocessor) exp_source = ptan.experience.ExperienceSourceFirstLast(