Beispiel #1
0
 def env(self):
     env = Environment.make('miniwob', self.TASK_NAME)
     base_url = os.environ.get('MINIWOB_BASE_URL')
     print 'BASE URL:', base_url
     env.configure(num_instances=3, seeds=[1,2,'hello'], base_url=base_url)
     yield env
     env.close()
Beispiel #2
0
def test_environment():
    try:
        task_name = sys.argv[1]
    except IndexError:
        print 'Usage: python {} TASK_NAME'.format(sys.argv[0])
        exit(1)
    env = Environment.make('miniwob', task_name)
    base_url = os.environ.get('MINIWOB_BASE_URL')
    env.configure(num_instances=1, seeds=[0], base_url=base_url)
    states = env.reset()
    print states[0].dom.visualize()
    env.close()
Beispiel #3
0
def extract_utterances():
    try:
        task_name = sys.argv[1]
    except:
        print >> sys.stderr, 'Usage: {} task_name'.format(sys.argv[0])
        exit(1)
    from wge.environment import Environment
    FIELD_EXTRACTORS[task_name] = lambda utt: Fields({})
    env = Environment.make('miniwob', task_name)
    base_url = os.environ.get('MINIWOB_BASE_URL')
    env.configure(num_instances=4, seeds=range(4), base_url=base_url)
    for i in xrange(25):
        states = env.reset()
        for state in states:
            print 'UTT:\t{}'.format(state.utterance.replace('\n', ' '))
    env.close()
Beispiel #4
0
 def env(self):
     env = Environment.make('miniwob', self.TASK_NAME)
     base_url = os.environ.get('MINIWOB_BASE_URL')
     print 'BASE URL:', base_url
     if self.FRAGILE is True:
         env.configure(base_url=base_url,
                       num_instances=1,
                       seeds=[1],
                       wait_ms=300)
     elif self.FRAGILE == 'instance':
         env.configure(base_url=base_url, num_instances=1, seeds=[1])
     elif self.FRAGILE == 'delay':
         env.configure(base_url=base_url,
                       num_instances=3,
                       seeds=range(3),
                       wait_ms=1000)
     else:
         env.configure(base_url=base_url, num_instances=3, seeds=range(3))
     yield env
     env.close()
Beispiel #5
0
            actions (list[list[VNCEvent]]): a batch of VNCEvent sequences. An empty sequence means no actions.

        Returns:
            states (list[dict]):
                dict['text'] (dict)
                dict['vision'] (np.ndarray)
            rewards (list[float])
            dones (list[bool]): once `done` is True, further actions on that
                instance will give undefined results.
            info (dict): additional debug information.
                Global debug information is directly in the root level
                Local information for instance i is in info['n'][i]
        """
        return self.env.step(actions)

    def close(self):
        self.env.close()

    @property
    def num_instances(self):
        return self._num_instances


if __name__ == '__main__':
    env = Environment.make(domain='formwob', subdomain='Delta-v0')
    env.configure()
    while True:
        observation, _, done, info = env.step([])
        print(observation)
        time.sleep(1)
Beispiel #6
0
    def __init__(self, config, save_dir):
        super(MiniWoBTrainingRun, self).__init__(config, save_dir)
        self.workspace.add_dir('traces_replay', join('traces', 'replay'))
        self.workspace.add_file('traces_demo', join('traces', 'demo-parse-log.txt'))

        # need to make sure that these times coincide
        assert config.log.trace_evaluate % config.log.evaluate == 0
        assert (config.log.explore % config.explore.program == 0 or
                config.log.explore % config.explore.neural == 0)
        assert config.log.replay % config.train.replay == 0
        assert config.log.trace_replay % config.log.replay == 0
        assert config.log.trace_explore % config.log.explore == 0

        # construct environment
        Episode.configure(config.discount_negative_reward)
        env = Environment.make(config.env.domain, config.env.subdomain)  # TODO: Refactor into a get_environment
        env.configure(
            num_instances=config.env.num_instances, seeds=range(config.env.num_instances), headless=config.env.headless,
            base_url=os.environ.get("MINIWOB_BASE_URL"),
            cache_state=False,  # never cache state
            reward_processor=get_reward_processor(config.env.reward_processor),
            wait_ms=config.env.wait_ms,
            block_on_reset=config.env.block_on_reset,
            refresh_freq=config.env.refresh_freq,
        )
        self._env = env

        # construct episode generators
        self._basic_episode_generator = BasicEpisodeGenerator(self._env,
                                        config.explore.max_steps_per_episode,
                                        config.log.visualize_attention)

        def state_equality_checker(s1, s2):
            """Compare two State objects."""
            r1 = s1.dom.visualize() if s1 else None
            r2 = s2.dom.visualize() if s2 else None
            return r1 == r2
            # TODO(kelvin): better equality check

        # construct episode logger
        trace_dir = join(self.workspace.root, 'traces')
        self._episode_logger = EpisodeLogger(trace_dir, self.tb_logger,
                                             self.metadata)

        # construct replay buffer

        # group episodes by query fields
        episode_grouper = lambda ep: frozenset(ep[0].state.fields.keys)
        episode_identifier = lambda ep: id(ep)

        # each has its own buffer
        group_buffer_factory = lambda: RewardPrioritizedReplayBuffer(
            max_size=config.replay_buffer.size,
            sampling_quantile=1.0,
            discount_factor=config.gamma)

        # buffers are combined into a single grouped buffer
        self._replay_buffer = GroupedReplayBuffer(
            episode_grouper, episode_identifier,
            group_buffer_factory, min_group_size=config.replay_buffer.min_size)

        self._replay_steps = config.train.replay_steps
        self._gamma = config.gamma

        # construct replay logger
        self._replay_logger = ReplayLogger(self.workspace.traces_replay,
                self.tb_logger, self.metadata)

        # load demonstrations
        with open(self.workspace.traces_demo, 'w', 'utf8') as fout:     # pylint: disable=no-member
            # NOTE: this may be an empty list for some tasks
            self._demonstrations = load_demonstrations(
                    config.env.subdomain, config.demonstrations.base_dir,
                    config.demonstrations.parser, logfile=fout,
                    min_raw_reward=config.demonstrations.min_raw_reward)

            # keep a random subset of demonstrations
            with random_seed(0):
                random.shuffle(self._demonstrations)
            self._demonstrations = self._demonstrations[:config.demonstrations.max_to_use]

        num_demonstrations = len(self._demonstrations)
        self.metadata['stats.num_demonstrations'] = num_demonstrations
        if num_demonstrations == 0:
            logging.warn('NO DEMONSTRATIONS AVAILABLE')

        # build neural policy
        neural_policy = try_gpu(MiniWoBPolicy.from_config(config.policy))
        optimizer = optim.Adam(neural_policy.parameters(),
                               lr=config.train.learning_rate)

        # TODO: reload replay buffer?
        self.train_state = self.checkpoints.load_latest(
                neural_policy, optimizer)

        # build program policy
        self._program_policy = self._build_program_policy()