Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser(description=None)
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        dest='verbosity',
                        default=0,
                        help='Set verbosity.')
    args = parser.parse_args()

    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)

    env = gym.make('wob.mini.ClickDialog-v0')
    env = jiminy.wrappers.experimental.SoftmaxClickMouse(env)
    env = wob_vnc.MiniWoBCropper(env)
    wob_vnc.configure(env,
                      wob_vnc.remotes_url(port_ofs=0,
                                          hostname='0.0.0.0',
                                          count=REMOTES_COUNT)
                      )  # automatically creates a local docker container

    observation_n = env.reset()
    idx = 0
    while True:
        # your agent here
        #
        # Try sending this instead of a random action: ('KeyEvent', 'ArrowUp', True)
        action_n = [env.action_space.sample() for ob in observation_n]
        observation_n, reward_n, done_n, info = env.step(action_n)
        print("idx: {}, reward: {}".format(idx * REMOTES_COUNT, reward_n))
        idx += 1
    return 0
Ejemplo n.º 2
0
    writer = SummaryWriter(comment="-wob_click_" + name)
    saves_path = os.path.join(SAVES_DIR, name)
    os.makedirs(saves_path, exist_ok=True)

    demo_samples = None
    if args.demo:
        demo_samples = vnc_demo.load_demo(args.demo, env_name)
        if not demo_samples:
            demo_samples = None
        else:
            print("Loaded %d demo samples, will use them during training" % len(demo_samples))

    env = gym.make(env_name)
    env = universe.wrappers.experimental.SoftmaxClickMouse(env)
    env = wob_vnc.MiniWoBCropper(env)
    wob_vnc.configure(env, wob_vnc.remotes_url(port_ofs=args.port_ofs, hostname=args.host, count=REMOTES_COUNT))

    net = model_vnc.Model(input_shape=wob_vnc.WOB_SHAPE, n_actions=env.action_space.n)
    if args.cuda:
        net.cuda()
    print(net)
    optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3)

    agent = ptan.agent.PolicyAgent(lambda x: net(x)[0], cuda=args.cuda,
                                   apply_softmax=True)
    exp_source = ptan.experience.ExperienceSourceFirstLast(
        [env], agent, gamma=GAMMA, steps_count=REWARD_STEPS, vectorized=True)

    best_reward = None
    with common.RewardTracker(writer) as tracker:
        with ptan.common.utils.TBMeanTracker(writer, batch_size=10) as tb_tracker:
Ejemplo n.º 3
0
    env_name = args.env
    if not env_name.startswith('wob.mini.'):
        env_name = "wob.mini." + env_name

    name = env_name.split('.')[-1] + "_" + args.name
    writer = SummaryWriter(comment="-wob_click_mm_" + name)
    saves_path = os.path.join(SAVES_DIR, name)
    os.makedirs(saves_path, exist_ok=True)

    env = gym.make(env_name)
    env = universe.wrappers.experimental.SoftmaxClickMouse(env)
    env = wob_vnc.MiniWoBCropper(env, keep_text=True)
    wob_vnc.configure(
        env,
        wob_vnc.remotes_url(port_ofs=args.port_ofs,
                            hostname=REMOTES_HOST,
                            count=REMOTES_COUNT))

    net = model_vnc.ModelMultimodal(input_shape=wob_vnc.WOB_SHAPE,
                                    n_actions=env.action_space.n)
    if args.cuda:
        net.cuda()
    print(net)
    optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3)

    preprocessor = model_vnc.MultimodalPreprocessor()
    agent = ptan.agent.PolicyAgent(lambda x: net(x)[0],
                                   cuda=args.cuda,
                                   apply_softmax=True,
                                   preprocessor=preprocessor)
    exp_source = ptan.experience.ExperienceSourceFirstLast(