def __init__(self, config, save_dir):
        super(ContextVAETrainingRun, self).__init__(config, save_dir)

        # extra dir for storing TrainStates where NaN was encountered
        self.workspace.add_dir('nan_checkpoints')
        self.workspace.add_dir('traces')

        # build model
        with random_seed(config.optim.seed):
            print 'seed:'+str(config.optim.seed)
            model, optimizer = self._build_model(config.model, config.optim, config.dataset)
            self.train_state = self.checkpoints.load_latest(model, optimizer)

        # load data
        data_dir = join(data.workspace.datasets, config.dataset.path)
        self._examples = EditDataSplits(data_dir, config.dataset)
Beispiel #2
0
    def __init__(self,
                 examples,
                 scorer,
                 negative_sampler,
                 num_negatives_loss,
                 num_negatives_ranking,
                 verbose=True):
        """Create evaluation.
        
        Args:
            examples (list[Seq2SeqExample])
            scorer (Seq2SeqScorer)
            negative_sampler (NegativeSampler)
            num_negatives_loss: number of negatives used to compute the training loss
            num_negatives_ranking: number negatives used to compute rank
            verbose (bool)
        """
        # compute stats
        with random_seed(0):
            # compute loss
            loss = scorer.loss(examples, num_negatives_loss)
            loss = loss.data[0]  # convert Variable into Python float

            # compute mean rank
            # generate RankingExamples for each example
            rank_examples = []
            for ex in examples:
                output_options = [ex.output_words
                                  ]  # first option is the correct one
                output_options.extend(
                    negative_sampler.sample(
                        ex, num_negatives_ranking))  # the rest are negative
                rank_example = RankingExample(ex.input_words, output_options)
                rank_examples.append(rank_example)

            scores = scorer.score_rank_examples(rank_examples)

            # compute ranks
            pos_ranks = []
            for i, score_group in enumerate(scores):
                pos_ranks.append(ranks(score_group, ascending=False)[0])
                if verbose and i < 5:
                    print RankTrace(rank_examples[i], score_group)
            mean_rank = np.mean(pos_ranks)

            self._stats = {'loss': loss, 'mean_rank': mean_rank}
    def __init__(self, config, save_dir):
        super(RetrieverTrainingRun, self).__init__(config, save_dir)

        # load data
        try:
            dataset_limit = config.dataset.limit
        except ConfigMissingException:
            dataset_limit = float('inf')
        data_dir = join(data.workspace.root, config.dataset.path)
        examples = RetrieverDataSplits(data_dir, dataset_limit)
        self.examples = examples

        # build network
        with random_seed(config.optim.seed):
            model, optimizer = self._build_model(config, examples.train)

        # reload state, if any
        self._train_state = self.checkpoints.load_latest(model, optimizer)
    def _evaluate(self, data_splits, big_eval):
        """Evaluate.
        
        Args:
            data_splits (RetrieverDataSplits)
            big_eval (bool)
        """
        config = self.config.eval
        num_samples = config.big_num_examples if big_eval else config.num_examples

        format_name = lambda name: '{}_{}'.format('big' if big_eval else 'small', name)

        with random_seed(0):
            train_sample = sample_if_large(data_splits.train, num_samples)
            self._evaluate_split(train_sample, format_name('train'))

            valid_sample = sample_if_large(data_splits.valid, num_samples)
            self._evaluate_split(valid_sample, format_name('valid'))
Beispiel #5
0
    def __init__(self, config, save_dir):
        super(MiniWoBTrainingRun, self).__init__(config, save_dir)
        self.workspace.add_dir('traces_replay', join('traces', 'replay'))
        self.workspace.add_file('traces_demo', join('traces', 'demo-parse-log.txt'))

        # need to make sure that these times coincide
        assert config.log.trace_evaluate % config.log.evaluate == 0
        assert (config.log.explore % config.explore.program == 0 or
                config.log.explore % config.explore.neural == 0)
        assert config.log.replay % config.train.replay == 0
        assert config.log.trace_replay % config.log.replay == 0
        assert config.log.trace_explore % config.log.explore == 0

        # construct environment
        Episode.configure(config.discount_negative_reward)
        env = Environment.make(config.env.domain, config.env.subdomain)  # TODO: Refactor into a get_environment
        env.configure(
            num_instances=config.env.num_instances, seeds=range(config.env.num_instances), headless=config.env.headless,
            base_url=os.environ.get("MINIWOB_BASE_URL"),
            cache_state=False,  # never cache state
            reward_processor=get_reward_processor(config.env.reward_processor),
            wait_ms=config.env.wait_ms,
            block_on_reset=config.env.block_on_reset,
            refresh_freq=config.env.refresh_freq,
        )
        self._env = env

        # construct episode generators
        self._basic_episode_generator = BasicEpisodeGenerator(self._env,
                                        config.explore.max_steps_per_episode,
                                        config.log.visualize_attention)

        def state_equality_checker(s1, s2):
            """Compare two State objects."""
            r1 = s1.dom.visualize() if s1 else None
            r2 = s2.dom.visualize() if s2 else None
            return r1 == r2
            # TODO(kelvin): better equality check

        # construct episode logger
        trace_dir = join(self.workspace.root, 'traces')
        self._episode_logger = EpisodeLogger(trace_dir, self.tb_logger,
                                             self.metadata)

        # construct replay buffer

        # group episodes by query fields
        episode_grouper = lambda ep: frozenset(ep[0].state.fields.keys)
        episode_identifier = lambda ep: id(ep)

        # each has its own buffer
        group_buffer_factory = lambda: RewardPrioritizedReplayBuffer(
            max_size=config.replay_buffer.size,
            sampling_quantile=1.0,
            discount_factor=config.gamma)

        # buffers are combined into a single grouped buffer
        self._replay_buffer = GroupedReplayBuffer(
            episode_grouper, episode_identifier,
            group_buffer_factory, min_group_size=config.replay_buffer.min_size)

        self._replay_steps = config.train.replay_steps
        self._gamma = config.gamma

        # construct replay logger
        self._replay_logger = ReplayLogger(self.workspace.traces_replay,
                self.tb_logger, self.metadata)

        # load demonstrations
        with open(self.workspace.traces_demo, 'w', 'utf8') as fout:     # pylint: disable=no-member
            # NOTE: this may be an empty list for some tasks
            self._demonstrations = load_demonstrations(
                    config.env.subdomain, config.demonstrations.base_dir,
                    config.demonstrations.parser, logfile=fout,
                    min_raw_reward=config.demonstrations.min_raw_reward)

            # keep a random subset of demonstrations
            with random_seed(0):
                random.shuffle(self._demonstrations)
            self._demonstrations = self._demonstrations[:config.demonstrations.max_to_use]

        num_demonstrations = len(self._demonstrations)
        self.metadata['stats.num_demonstrations'] = num_demonstrations
        if num_demonstrations == 0:
            logging.warn('NO DEMONSTRATIONS AVAILABLE')

        # build neural policy
        neural_policy = try_gpu(MiniWoBPolicy.from_config(config.policy))
        optimizer = optim.Adam(neural_policy.parameters(),
                               lr=config.train.learning_rate)

        # TODO: reload replay buffer?
        self.train_state = self.checkpoints.load_latest(
                neural_policy, optimizer)

        # build program policy
        self._program_policy = self._build_program_policy()