def env(self): env = Environment.make('miniwob', self.TASK_NAME) base_url = os.environ.get('MINIWOB_BASE_URL') print 'BASE URL:', base_url env.configure(num_instances=3, seeds=[1,2,'hello'], base_url=base_url) yield env env.close()
def test_environment(): try: task_name = sys.argv[1] except IndexError: print 'Usage: python {} TASK_NAME'.format(sys.argv[0]) exit(1) env = Environment.make('miniwob', task_name) base_url = os.environ.get('MINIWOB_BASE_URL') env.configure(num_instances=1, seeds=[0], base_url=base_url) states = env.reset() print states[0].dom.visualize() env.close()
def extract_utterances(): try: task_name = sys.argv[1] except: print >> sys.stderr, 'Usage: {} task_name'.format(sys.argv[0]) exit(1) from wge.environment import Environment FIELD_EXTRACTORS[task_name] = lambda utt: Fields({}) env = Environment.make('miniwob', task_name) base_url = os.environ.get('MINIWOB_BASE_URL') env.configure(num_instances=4, seeds=range(4), base_url=base_url) for i in xrange(25): states = env.reset() for state in states: print 'UTT:\t{}'.format(state.utterance.replace('\n', ' ')) env.close()
def env(self): env = Environment.make('miniwob', self.TASK_NAME) base_url = os.environ.get('MINIWOB_BASE_URL') print 'BASE URL:', base_url if self.FRAGILE is True: env.configure(base_url=base_url, num_instances=1, seeds=[1], wait_ms=300) elif self.FRAGILE == 'instance': env.configure(base_url=base_url, num_instances=1, seeds=[1]) elif self.FRAGILE == 'delay': env.configure(base_url=base_url, num_instances=3, seeds=range(3), wait_ms=1000) else: env.configure(base_url=base_url, num_instances=3, seeds=range(3)) yield env env.close()
actions (list[list[VNCEvent]]): a batch of VNCEvent sequences. An empty sequence means no actions. Returns: states (list[dict]): dict['text'] (dict) dict['vision'] (np.ndarray) rewards (list[float]) dones (list[bool]): once `done` is True, further actions on that instance will give undefined results. info (dict): additional debug information. Global debug information is directly in the root level Local information for instance i is in info['n'][i] """ return self.env.step(actions) def close(self): self.env.close() @property def num_instances(self): return self._num_instances if __name__ == '__main__': env = Environment.make(domain='formwob', subdomain='Delta-v0') env.configure() while True: observation, _, done, info = env.step([]) print(observation) time.sleep(1)
def __init__(self, config, save_dir): super(MiniWoBTrainingRun, self).__init__(config, save_dir) self.workspace.add_dir('traces_replay', join('traces', 'replay')) self.workspace.add_file('traces_demo', join('traces', 'demo-parse-log.txt')) # need to make sure that these times coincide assert config.log.trace_evaluate % config.log.evaluate == 0 assert (config.log.explore % config.explore.program == 0 or config.log.explore % config.explore.neural == 0) assert config.log.replay % config.train.replay == 0 assert config.log.trace_replay % config.log.replay == 0 assert config.log.trace_explore % config.log.explore == 0 # construct environment Episode.configure(config.discount_negative_reward) env = Environment.make(config.env.domain, config.env.subdomain) # TODO: Refactor into a get_environment env.configure( num_instances=config.env.num_instances, seeds=range(config.env.num_instances), headless=config.env.headless, base_url=os.environ.get("MINIWOB_BASE_URL"), cache_state=False, # never cache state reward_processor=get_reward_processor(config.env.reward_processor), wait_ms=config.env.wait_ms, block_on_reset=config.env.block_on_reset, refresh_freq=config.env.refresh_freq, ) self._env = env # construct episode generators self._basic_episode_generator = BasicEpisodeGenerator(self._env, config.explore.max_steps_per_episode, config.log.visualize_attention) def state_equality_checker(s1, s2): """Compare two State objects.""" r1 = s1.dom.visualize() if s1 else None r2 = s2.dom.visualize() if s2 else None return r1 == r2 # TODO(kelvin): better equality check # construct episode logger trace_dir = join(self.workspace.root, 'traces') self._episode_logger = EpisodeLogger(trace_dir, self.tb_logger, self.metadata) # construct replay buffer # group episodes by query fields episode_grouper = lambda ep: frozenset(ep[0].state.fields.keys) episode_identifier = lambda ep: id(ep) # each has its own buffer group_buffer_factory = lambda: RewardPrioritizedReplayBuffer( max_size=config.replay_buffer.size, sampling_quantile=1.0, discount_factor=config.gamma) # buffers are combined into a single grouped buffer self._replay_buffer = GroupedReplayBuffer( episode_grouper, episode_identifier, group_buffer_factory, min_group_size=config.replay_buffer.min_size) self._replay_steps = config.train.replay_steps self._gamma = config.gamma # construct replay logger self._replay_logger = ReplayLogger(self.workspace.traces_replay, self.tb_logger, self.metadata) # load demonstrations with open(self.workspace.traces_demo, 'w', 'utf8') as fout: # pylint: disable=no-member # NOTE: this may be an empty list for some tasks self._demonstrations = load_demonstrations( config.env.subdomain, config.demonstrations.base_dir, config.demonstrations.parser, logfile=fout, min_raw_reward=config.demonstrations.min_raw_reward) # keep a random subset of demonstrations with random_seed(0): random.shuffle(self._demonstrations) self._demonstrations = self._demonstrations[:config.demonstrations.max_to_use] num_demonstrations = len(self._demonstrations) self.metadata['stats.num_demonstrations'] = num_demonstrations if num_demonstrations == 0: logging.warn('NO DEMONSTRATIONS AVAILABLE') # build neural policy neural_policy = try_gpu(MiniWoBPolicy.from_config(config.policy)) optimizer = optim.Adam(neural_policy.parameters(), lr=config.train.learning_rate) # TODO: reload replay buffer? self.train_state = self.checkpoints.load_latest( neural_policy, optimizer) # build program policy self._program_policy = self._build_program_policy()