def __init__(self, config, save_dir): super(ContextVAETrainingRun, self).__init__(config, save_dir) # extra dir for storing TrainStates where NaN was encountered self.workspace.add_dir('nan_checkpoints') self.workspace.add_dir('traces') # build model with random_seed(config.optim.seed): print 'seed:'+str(config.optim.seed) model, optimizer = self._build_model(config.model, config.optim, config.dataset) self.train_state = self.checkpoints.load_latest(model, optimizer) # load data data_dir = join(data.workspace.datasets, config.dataset.path) self._examples = EditDataSplits(data_dir, config.dataset)
def __init__(self, examples, scorer, negative_sampler, num_negatives_loss, num_negatives_ranking, verbose=True): """Create evaluation. Args: examples (list[Seq2SeqExample]) scorer (Seq2SeqScorer) negative_sampler (NegativeSampler) num_negatives_loss: number of negatives used to compute the training loss num_negatives_ranking: number negatives used to compute rank verbose (bool) """ # compute stats with random_seed(0): # compute loss loss = scorer.loss(examples, num_negatives_loss) loss = loss.data[0] # convert Variable into Python float # compute mean rank # generate RankingExamples for each example rank_examples = [] for ex in examples: output_options = [ex.output_words ] # first option is the correct one output_options.extend( negative_sampler.sample( ex, num_negatives_ranking)) # the rest are negative rank_example = RankingExample(ex.input_words, output_options) rank_examples.append(rank_example) scores = scorer.score_rank_examples(rank_examples) # compute ranks pos_ranks = [] for i, score_group in enumerate(scores): pos_ranks.append(ranks(score_group, ascending=False)[0]) if verbose and i < 5: print RankTrace(rank_examples[i], score_group) mean_rank = np.mean(pos_ranks) self._stats = {'loss': loss, 'mean_rank': mean_rank}
def __init__(self, config, save_dir): super(RetrieverTrainingRun, self).__init__(config, save_dir) # load data try: dataset_limit = config.dataset.limit except ConfigMissingException: dataset_limit = float('inf') data_dir = join(data.workspace.root, config.dataset.path) examples = RetrieverDataSplits(data_dir, dataset_limit) self.examples = examples # build network with random_seed(config.optim.seed): model, optimizer = self._build_model(config, examples.train) # reload state, if any self._train_state = self.checkpoints.load_latest(model, optimizer)
def _evaluate(self, data_splits, big_eval): """Evaluate. Args: data_splits (RetrieverDataSplits) big_eval (bool) """ config = self.config.eval num_samples = config.big_num_examples if big_eval else config.num_examples format_name = lambda name: '{}_{}'.format('big' if big_eval else 'small', name) with random_seed(0): train_sample = sample_if_large(data_splits.train, num_samples) self._evaluate_split(train_sample, format_name('train')) valid_sample = sample_if_large(data_splits.valid, num_samples) self._evaluate_split(valid_sample, format_name('valid'))
def __init__(self, config, save_dir): super(MiniWoBTrainingRun, self).__init__(config, save_dir) self.workspace.add_dir('traces_replay', join('traces', 'replay')) self.workspace.add_file('traces_demo', join('traces', 'demo-parse-log.txt')) # need to make sure that these times coincide assert config.log.trace_evaluate % config.log.evaluate == 0 assert (config.log.explore % config.explore.program == 0 or config.log.explore % config.explore.neural == 0) assert config.log.replay % config.train.replay == 0 assert config.log.trace_replay % config.log.replay == 0 assert config.log.trace_explore % config.log.explore == 0 # construct environment Episode.configure(config.discount_negative_reward) env = Environment.make(config.env.domain, config.env.subdomain) # TODO: Refactor into a get_environment env.configure( num_instances=config.env.num_instances, seeds=range(config.env.num_instances), headless=config.env.headless, base_url=os.environ.get("MINIWOB_BASE_URL"), cache_state=False, # never cache state reward_processor=get_reward_processor(config.env.reward_processor), wait_ms=config.env.wait_ms, block_on_reset=config.env.block_on_reset, refresh_freq=config.env.refresh_freq, ) self._env = env # construct episode generators self._basic_episode_generator = BasicEpisodeGenerator(self._env, config.explore.max_steps_per_episode, config.log.visualize_attention) def state_equality_checker(s1, s2): """Compare two State objects.""" r1 = s1.dom.visualize() if s1 else None r2 = s2.dom.visualize() if s2 else None return r1 == r2 # TODO(kelvin): better equality check # construct episode logger trace_dir = join(self.workspace.root, 'traces') self._episode_logger = EpisodeLogger(trace_dir, self.tb_logger, self.metadata) # construct replay buffer # group episodes by query fields episode_grouper = lambda ep: frozenset(ep[0].state.fields.keys) episode_identifier = lambda ep: id(ep) # each has its own buffer group_buffer_factory = lambda: RewardPrioritizedReplayBuffer( max_size=config.replay_buffer.size, sampling_quantile=1.0, discount_factor=config.gamma) # buffers are combined into a single grouped buffer self._replay_buffer = GroupedReplayBuffer( episode_grouper, episode_identifier, group_buffer_factory, min_group_size=config.replay_buffer.min_size) self._replay_steps = config.train.replay_steps self._gamma = config.gamma # construct replay logger self._replay_logger = ReplayLogger(self.workspace.traces_replay, self.tb_logger, self.metadata) # load demonstrations with open(self.workspace.traces_demo, 'w', 'utf8') as fout: # pylint: disable=no-member # NOTE: this may be an empty list for some tasks self._demonstrations = load_demonstrations( config.env.subdomain, config.demonstrations.base_dir, config.demonstrations.parser, logfile=fout, min_raw_reward=config.demonstrations.min_raw_reward) # keep a random subset of demonstrations with random_seed(0): random.shuffle(self._demonstrations) self._demonstrations = self._demonstrations[:config.demonstrations.max_to_use] num_demonstrations = len(self._demonstrations) self.metadata['stats.num_demonstrations'] = num_demonstrations if num_demonstrations == 0: logging.warn('NO DEMONSTRATIONS AVAILABLE') # build neural policy neural_policy = try_gpu(MiniWoBPolicy.from_config(config.policy)) optimizer = optim.Adam(neural_policy.parameters(), lr=config.train.learning_rate) # TODO: reload replay buffer? self.train_state = self.checkpoints.load_latest( neural_policy, optimizer) # build program policy self._program_policy = self._build_program_policy()