Ejemplo n.º 1
0
    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        predictions = []
        scores = []

        batches = iterators.iter_batches(eval_instances,
                                         self.options.listener_eval_batch_size)
        num_batches = (len(eval_instances) -
                       1) // self.options.listener_eval_batch_size + 1

        if self.options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)

            xs, (y, ) = self._data_to_arrays(batch, test=True)

            probs = self.model.predict(xs)
            if random:
                indices = sample(probs)
                predictions.extend(indices)
            else:
                predictions.extend(probs.argmax(axis=1))
            scores_arr = np.log(probs[np.arange(len(batch)), y])
            scores.extend(scores_arr.tolist())
        progress.end_task()
        if self.options.verbosity >= 9:
            print('%s %ss:') % (self.id, 'sample' if random else 'prediction')
            for inst, prediction in zip(eval_instances, predictions):
                print('%s -> %s' % (repr(inst.input), repr(prediction)))

        return predictions, scores
Ejemplo n.º 2
0
    def train(self, training_instances, validation_instances=None, metrics=None):
        id_tag = (self.id + ': ') if self.id else ''
        if self.options.verbosity >= 2:
            print(id_tag + 'Training priors')
        self.train_priors(training_instances, listener_data=self.options.listener)

        self.dataset = training_instances
        xs, ys = self._data_to_arrays(training_instances, init_vectorizer=True)
        self._build_model()

        if self.options.verbosity >= 2:
            print(id_tag + 'Training conditional model')
        summary_path = config.get_file_path('losses.tfevents')
        if summary_path:
            writer = summary.SummaryWriter(summary_path)
        else:
            writer = None
        progress.start_task('Iteration', self.options.train_iters)
        for iteration in range(self.options.train_iters):
            progress.progress(iteration)
            self.model.fit(xs, ys, batch_size=self.options.batch_size,
                           num_epochs=self.options.train_epochs,
                           summary_writer=writer, step=iteration * self.options.train_epochs)
            validation_results = self.validate(validation_instances, metrics, iteration=iteration)
            if writer is not None:
                step = (iteration + 1) * self.options.train_epochs
                self.on_iter_end(step, writer)
                for key, value in validation_results.iteritems():
                    tag = 'val/' + key.split('.', 1)[1].replace('.', '/')
                    writer.log_scalar(step, tag, value)
        writer.flush()
        progress.end_task()
Ejemplo n.º 3
0
 def train(self, training_instances, validation_instances='ignored', metrics='ignored'):
     progress.start_task('Example', len(training_instances))
     for i, inst in enumerate(training_instances):
         progress.progress(i)
         self.seen.update([inst.output])
     progress.end_task()
     self.num_examples += len(training_instances)
Ejemplo n.º 4
0
    def train(self,
              training_instances,
              validation_instances=None,
              metrics=None):
        if not hasattr(self, 'model'):
            self.model = self.build_model(
                self.init_vectorizer(training_instances))

        minibatches = iterators.gen_batches(training_instances,
                                            self.options.batch_size)
        progress.start_task('Epoch', self.options.train_epochs)
        for epoch in range(self.options.train_epochs):
            progress.progress(epoch)

            progress.start_task('Minibatch', len(minibatches))
            for b, batch in enumerate(minibatches):
                progress.progress(b)
                self.train_batch(batch)
            progress.end_task()

            self.validate_and_log(validation_instances,
                                  metrics,
                                  self.model.summary_writer,
                                  epoch=epoch)
        progress.end_task()
Ejemplo n.º 5
0
    def predict_and_score(self,
                          eval_instances,
                          random=False,
                          split='default',
                          verbosity=4):
        predictions = []
        scores = []

        minibatches = iterators.gen_batches(eval_instances,
                                            self.options.batch_size)
        tokenize, detokenize = tokenizers.TOKENIZERS[self.options.tokenizer]

        if verbosity > 2:
            progress.start_task('Eval minibatch', len(minibatches))
        for b, batch in enumerate(minibatches):
            if verbosity > 2:
                progress.progress(b)
            outputs_batch, scores_batch = self.model.eval(
                [self.instance_to_tuple(inst) for inst in batch], split=split)
            preds_batch = outputs_batch['sample' if random else 'beam']
            detokenized = self.collate_preds(preds_batch, detokenize)
            predictions.extend(detokenized)
            scores.extend(self.collate_scores(scores_batch))
        if verbosity > 2:
            progress.end_task()
        return predictions, scores
Ejemplo n.º 6
0
    def train(self, training_instances, validation_instances, metrics):
        self.init_vectorizers(training_instances)

        self.build_graph()
        self.init_params()

        batches = iterators.gen_batches(training_instances,
                                        batch_size=self.options.batch_size)

        if self.options.verbosity >= 1:
            progress.start_task('Epoch', self.options.train_epochs)

        for epoch in range(self.options.train_epochs):
            if self.options.verbosity >= 1:
                progress.progress(epoch)

            if self.options.verbosity >= 1:
                progress.start_task('Batch', len(batches))

            for i, batch in enumerate(batches):
                if self.options.verbosity >= 1:
                    progress.progress(i)

                batch = list(batch)
                feed_dict = self.vectorize_inputs(batch)
                feed_dict.update(self.vectorize_labels(batch))
                self.run_train(feed_dict)

            if self.options.verbosity >= 1:
                progress.end_task()

        if self.options.verbosity >= 1:
            progress.end_task()
Ejemplo n.º 7
0
    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        predictions = []
        scores = []

        batches = iterators.iter_batches(eval_instances, self.options.listener_eval_batch_size)
        num_batches = (len(eval_instances) - 1) // self.options.listener_eval_batch_size + 1

        if self.options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)

            xs, (y,) = self._data_to_arrays(batch, test=True)

            probs = self.model.predict(xs)
            if random:
                indices = sample(probs)
                predictions.extend(indices)
            else:
                predictions.extend(probs.argmax(axis=1))
            scores_arr = np.log(probs[np.arange(len(batch)), y])
            scores.extend(scores_arr.tolist())
        progress.end_task()
        if self.options.verbosity >= 9:
            print('%s %ss:') % (self.id, 'sample' if random else 'prediction')
            for inst, prediction in zip(eval_instances, predictions):
                print('%s -> %s' % (repr(inst.input), repr(prediction)))

        return predictions, scores
Ejemplo n.º 8
0
    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        predictions = []
        scores = []

        batches = iterators.gen_batches(eval_instances,
                                        batch_size=self.options.eval_batch_size)

        with gzip.open(config.get_file_path('dists.b64.gz'), 'w'):
            pass

        if self.options.verbosity + verbosity >= 1:
            progress.start_task('Eval batch', len(batches))

        for i, batch in enumerate(batches):
            if self.options.verbosity + verbosity >= 1:
                progress.progress(i)

            batch = list(batch)

            feed_dict = self.vectorize_inputs(batch)
            feed_dict.update(self.vectorize_labels(batch))
            output = self.run_predict(feed_dict)
            predictions_batch = self.output_to_preds(output, batch, sample=random)
            predictions.extend(predictions_batch)
            labels = self.vectorize_labels(batch)
            scores_batch = self.output_to_scores(output, labels)
            scores.extend(scores_batch)

        if self.options.verbosity + verbosity >= 1:
            progress.end_task()

        return predictions, scores
Ejemplo n.º 9
0
    def score(self, eval_instances, verbosity=0):
        result = []
        batches = iterators.iter_batches(eval_instances,
                                         self.options.speaker_eval_batch_size)
        num_batches = (len(eval_instances) -
                       1) // self.options.speaker_eval_batch_size + 1

        if self.options.verbosity + verbosity >= 2:
            print('Scoring')
        if self.options.verbosity + verbosity >= 1:
            progress.start_task('Score batch', num_batches)
        for batch_num, batch in enumerate(batches):
            if self.options.verbosity + verbosity >= 1:
                progress.progress(batch_num)
            batch = list(batch)

            xs, (n, ) = self._data_to_arrays(batch, test=False)
            if self.use_color_mask:
                mask = xs[3]
            else:
                mask = xs[2]

            probs = self.model.predict(xs)
            token_probs = probs[np.arange(probs.shape[0])[:, np.newaxis],
                                np.arange(probs.shape[1]), n]
            scores_arr = np.sum(np.log(token_probs) * mask, axis=1)
            scores = scores_arr.tolist()
            result.extend(scores)
        if self.options.verbosity + verbosity >= 1:
            progress.end_task()

        return result
Ejemplo n.º 10
0
    def predict_and_score(self, eval_instances):
        num_instances = len(eval_instances)

        # make features for eval dataset
        print "making features for eval dataset..."
        self.X_eval = self.make_features(eval_instances)

        # find log probabilities using model trained above
        print "finding probabilities..."
        log_probs = self.model.predict_log_proba(self.X_eval)[:, 1]
        reshaped = np.reshape(log_probs, (num_instances, 3))
        final_probs = reshaped - logsumexp(reshaped, axis=1, keepdims=True)

        preds = []
        scores = []
        print "making predictions..."
        progress.start_task('Example', len(eval_instances))
        for i, inst in enumerate(eval_instances):
            progress.progress(i)
            pred = np.argmax(final_probs[i])
            score = final_probs[i][inst.output]
            preds.append(pred)
            scores.append(score)
        progress.end_task()
        return preds, scores
Ejemplo n.º 11
0
    def predict_and_score(self, eval_instances):
        num_instances = len(eval_instances)

        # make features for eval dataset
        print "making features for eval dataset..."
        self.X_eval = self.make_features(eval_instances)

        # find log probabilities using model trained above
        print "finding probabilities..."
        log_probs = self.model.predict_log_proba(self.X_eval)[:,1]
        reshaped = np.reshape(log_probs,(num_instances,3))
        final_probs = reshaped - logsumexp(reshaped, axis=1, keepdims=True)

        preds = []
        scores = []
        print "making predictions..."
        progress.start_task('Example', len(eval_instances))
        for i, inst in enumerate(eval_instances):
            progress.progress(i)
            pred = np.argmax(final_probs[i])
            score = final_probs[i][inst.output]
            preds.append(pred)
            scores.append(score)
        progress.end_task()
        return preds, scores
Ejemplo n.º 12
0
    def predict(self, eval_instances, random=False, verbosity=0):
        result = []
        batches = iterators.iter_batches(eval_instances,
                                         self.options.speaker_eval_batch_size)
        num_batches = (len(eval_instances) -
                       1) // self.options.speaker_eval_batch_size + 1

        eos_index = self.seq_vec.vectorize(['</s>'])[0]

        if self.options.verbosity + verbosity >= 2:
            print('Predicting')
        if self.options.verbosity + verbosity >= 1:
            progress.start_task('Predict batch', num_batches)
        for batch_num, batch in enumerate(batches):
            if self.options.verbosity + verbosity >= 1:
                progress.progress(batch_num)
            batch = list(batch)

            (c, _p, mask), (_y, ) = self._data_to_arrays(batch, test=True)
            assert mask.all()  # We shouldn't be masking anything in prediction

            beam_size = 1 if random else self.options.speaker_beam_size
            done = np.zeros((len(batch), beam_size), dtype=np.bool)
            beam = np.zeros((len(batch), beam_size, self.seq_vec.max_len),
                            dtype=np.int32)
            beam[:, :, 0] = self.seq_vec.vectorize(['<s>'])[0]
            beam_scores = np.log(np.zeros((len(batch), beam_size)))
            beam_scores[:, 0] = 0.0

            c = np.repeat(c, beam_size, axis=0)
            mask = np.repeat(mask, beam_size, axis=0)

            for length in range(1, self.seq_vec.max_len):
                if done.all():
                    break
                p = beam.reshape(
                    (beam.shape[0] * beam.shape[1], beam.shape[2]))[:, :-1]
                probs = self.model.predict([c, p, mask])
                if random:
                    indices = sample(probs[:, length - 1, :])
                    beam[:, 0, length] = indices
                    done = np.logical_or(done, indices == eos_index)
                else:
                    assert probs.shape[1] == p.shape[1], (probs.shape[1],
                                                          p.shape[1])
                    assert probs.shape[2] == len(
                        self.seq_vec.tokens), (probs.shape[2],
                                               len(self.seq_vec.tokens))
                    scores = np.log(probs)[:, length - 1, :].reshape(
                        (beam.shape[0], beam.shape[1], probs.shape[2]))
                    beam_search_step(scores, length, beam, beam_scores, done,
                                     eos_index)
            outputs = self.seq_vec.unvectorize_all(beam[:, 0, :])
            result.extend([' '.join(strip_invalid_tokens(o)) for o in outputs])
        if self.options.verbosity + verbosity >= 1:
            progress.end_task()

        return result
Ejemplo n.º 13
0
 def predict_and_score(self, eval_instances):
     predict = [''] * len(eval_instances)
     score = []
     progress.start_task('Example', len(eval_instances))
     for i, inst in enumerate(eval_instances):
         progress.progress(i)
         score.append(self._get_log_prob(inst.output))
     progress.end_task()
     return predict, score
Ejemplo n.º 14
0
 def predict_and_score(self, eval_instances):
     predict = [''] * len(eval_instances)
     score = []
     progress.start_task('Example', len(eval_instances))
     for i, inst in enumerate(eval_instances):
         progress.progress(i)
         score.append(self._get_log_prob(inst.output))
     progress.end_task()
     return predict, score
Ejemplo n.º 15
0
    def train_one_batch(self, insts, env, t):
        env.configure([inst.input for inst in insts], verbosity=self.options.verbosity)
        observation = env._get_obs()
        info = None
        self.init_belief(env, observation)

        if self.options.verbosity >= 1:
            progress.start_task('Step', self.options.max_steps)

        for step in range(self.options.max_steps):
            if self.options.verbosity >= 1:
                progress.progress(step)

            if self.options.render:
                env.render()
            actions = self.action(env, observation, info, testing=False)
            prev_obs = observation
            observation, reward, done, info = env.step(actions)
            self.update_belief(env, prev_obs, actions, observation, reward, done, info)
            if all(done):
                break

        '''
        from tensorflow.python.client import timeline
        trace = timeline.Timeline(step_stats=self.run_metadata.step_stats)

        with config.open('timeline.ctf.json', 'w') as trace_file:
            trace_file.write(trace.generate_chrome_trace_format())
        '''

        rewards = np.array(self.rewards)  # max_steps x batch_size
        done = np.array(self.done, dtype=np.int32)  # max_steps x batch_size
        actions = np.array(self.actions).reshape(rewards.shape)
        # force actions on steps where reward is zero (already done) to nop
        actions[1:, :] *= (1 - done)[:-1, :]
        for game in range(rewards.shape[1]):
            action_hist = np.bincount(actions[:, game],
                                      minlength=len(cards_env.ACTIONS)).tolist()
            if self.options.verbosity >= 7:
                print('Total reward: {}  {}'.format(rewards[:, game].sum(), action_hist))
        total_rewards = np.repeat(rewards.sum(axis=0), rewards.shape[0])
        assert total_rewards.shape == (rewards.shape[0] * rewards.shape[1],), \
            (total_rewards.shape, rewards.shape)
        credit = np.ones(done.shape)
        credit[1:, :] *= 1.0 - done[:-1, :]
        credit = credit.ravel()  # (credit / credit.sum(axis=0)).ravel()
        assert credit.shape == total_rewards.shape, (credit.shape, total_rewards.shape)

        if self.options.verbosity >= 1:
            progress.end_task()

        feed_dict = self.batch_inputs(self.inputs[:-cards_env.MAX_BATCH_SIZE])
        for label, value in zip(self.label_vars, [np.array(self.actions),
                                                  total_rewards,
                                                  credit]):
            feed_dict[label] = value
        self.run_train(feed_dict)
Ejemplo n.º 16
0
    def train(self, training_instances, validation_instances=None, metrics=None,
              keep_params=False):
        id_tag = (self.id + ': ') if self.id else ''
        if self.options.verbosity >= 2:
            print(id_tag + 'Training priors')
        self.train_priors(training_instances, listener_data=self.options.listener)

        self.dataset = training_instances
        xs, ys = self._data_to_arrays(training_instances,
                                      init_vectorizer=not hasattr(self, 'model'))
        if not hasattr(self, 'model') or not keep_params:
            if self.options.verbosity >= 2:
                print(id_tag + 'Building model')
            if keep_params:
                warnings.warn("keep_params was passed, but the model hasn't been built; "
                              "initializing all parameters.")
            self._build_model()
        else:
            if not hasattr(self.options, 'reset_optimizer_vars') or \
                    self.options.reset_optimizer_vars:
                if self.options.verbosity >= 2:
                    print(id_tag + 'Resetting optimizer')
                self.model.reset_optimizer()

        if self.options.verbosity >= 2:
            print(id_tag + 'Training conditional model')
        if hasattr(self, 'writer'):
            writer = self.writer
        else:
            summary_path = config.get_file_path('losses.tfevents')
            if summary_path:
                writer = summary.SummaryWriter(summary_path)
            else:
                writer = None
            self.writer = writer

        if not hasattr(self, 'step_base'):
            self.step_base = 0

        progress.start_task('Iteration', self.options.train_iters)
        for iteration in range(self.options.train_iters):
            progress.progress(iteration)
            self.model.fit(xs, ys, batch_size=self.options.batch_size,
                           num_epochs=self.options.train_epochs,
                           summary_writer=writer,
                           step=self.step_base + iteration * self.options.train_epochs)
            validation_results = self.validate(validation_instances, metrics, iteration=iteration)
            if writer is not None:
                step = self.step_base + (iteration + 1) * self.options.train_epochs
                self.on_iter_end(step, writer)
                for key, value in validation_results.iteritems():
                    tag = 'val/' + key.split('.', 1)[1].replace('.', '/')
                    writer.log_scalar(step, tag, value)

        self.step_base += self.options.train_iters * self.options.train_epochs
        writer.flush()
        progress.end_task()
Ejemplo n.º 17
0
    def fit(self, Xs, ys, batch_size, num_epochs, summary_writer=None, step=0):
        if not isinstance(Xs, Sequence):
            raise ValueError('Xs should be a sequence, instead got %s' % (Xs,))
        if not isinstance(ys, Sequence):
            raise ValueError('ys should be a sequence, instead got %s' % (ys,))
        history = OrderedDict((tag, []) for tag in self.monitored_tags)
        id_tag = (self.id + '/') if self.id else ''
        params = self.params()

        progress.start_task('Epoch', num_epochs)
        epoch_start = time.time()
        for epoch in range(num_epochs):
            progress.progress(epoch)
            history_epoch = OrderedDict((tag, []) for tag in self.monitored_tags)
            num_minibatches_approx = len(ys[0]) // batch_size + 1

            progress.start_task('Minibatch', num_minibatches_approx)
            for i, batch in enumerate(self.minibatches(Xs, ys, batch_size, shuffle=True)):
                progress.progress(i)
                if self.options.verbosity >= 8:
                    print('types: %s' % ([type(v) for t in batch for v in t],))
                    print('shapes: %s' % ([v.shape for t in batch for v in t],))
                inputs, targets, synth = batch
                monitored = self.train_fn(*inputs + targets + synth)
                for tag, value in zip(self.monitored_tags, monitored):
                    if self.options.verbosity >= 10:
                        print('%s: %s' % (tag, value))
                    history_epoch[tag].append(value)
            progress.end_task()

            for tag, values in history_epoch.items():
                values_array = np.array([np.asarray(v) for v in values])
                history[tag].append(values_array)
                mean_values = np.mean(values_array, axis=0)
                if len(mean_values.shape) == 0:
                    summary_writer.log_scalar(step + epoch, tag, mean_values)
                else:
                    summary_writer.log_histogram(step + epoch, tag, mean_values)

            if self.options.monitor_params:
                for param in params:
                    val = param.get_value()
                    tag = 'param/' + param.name
                    if len(val.shape) == 0:
                        summary_writer.log_scalar(step + epoch, tag, val)
                    else:
                        summary_writer.log_histogram(step + epoch, tag, val)

            epoch_end = time.time()
            examples_per_sec = len(ys[0]) / (epoch_end - epoch_start)
            summary_writer.log_scalar(step + epoch,
                                      id_tag + 'examples_per_sec', examples_per_sec)
            epoch_start = epoch_end
        progress.end_task()

        return history
Ejemplo n.º 18
0
    def predict_and_score(self, eval_instances, random='ignored', verbosity=0):
        self.get_options()

        eval_instances = list(eval_instances)
        predictions = []
        scores = []

        env = gym.make(cards_env.register())

        batches = iterators.gen_batches(eval_instances, batch_size=cards_env.MAX_BATCH_SIZE)

        if self.options.verbosity + verbosity >= 1:
            progress.start_task('Eval batch', len(batches))

        for i, batch in enumerate(batches):
            batch = list(batch)
            if self.options.verbosity + verbosity >= 1:
                progress.progress(i)

            total_reward = np.zeros((len(batch),))
            done = np.zeros((len(batch),), dtype=np.bool)

            env.configure([inst.input for inst in batch], verbosity=verbosity)
            observation = env._get_obs()
            info = None
            self.init_belief(env, observation)

            if self.options.verbosity + verbosity >= 1:
                progress.start_task('Step', self.options.max_steps)

            for step in range(self.options.max_steps):
                if self.options.verbosity + verbosity >= 1:
                    progress.progress(step)
                if self.options.render:
                    env.render()
                action = self.action(env, observation, info)
                prev_obs = [np.copy(a) for a in observation]
                observation, reward, done_step, info = env.step(action)
                self.update_belief(env, prev_obs, action, observation, reward, done, info)
                done = np.bitwise_or(done, done_step[:len(batch)])
                total_reward += np.array(reward[:len(batch)])
                if done.all():
                    break

            if self.options.verbosity + verbosity >= 1:
                progress.end_task()

            predictions.extend([''] * len(batch))
            scores.extend(total_reward.tolist())

        env.close()

        if self.options.verbosity + verbosity >= 1:
            progress.end_task()

        return predictions, scores
Ejemplo n.º 19
0
 def predict_and_score(self, eval_instances):
     most_common = self.seen.most_common(1)[0][0]
     predict = [most_common] * len(eval_instances)
     score = []
     progress.start_task('Example', len(eval_instances))
     for i, inst in enumerate(eval_instances):
         progress.progress(i)
         score.append(np.log(self._get_smoothed_prob(inst.output)))
     progress.end_task()
     return predict, score
Ejemplo n.º 20
0
 def predict_and_score(self, eval_instances):
     most_common = self.seen.most_common(1)[0][0]
     predict = [most_common] * len(eval_instances)
     score = []
     progress.start_task('Example', len(eval_instances))
     for i, inst in enumerate(eval_instances):
         progress.progress(i)
         score.append(np.log(self._get_smoothed_prob(inst.output)))
     progress.end_task()
     return predict, score
Ejemplo n.º 21
0
 def train(self,
           training_instances,
           validation_instances='ignored',
           metrics='ignored'):
     progress.start_task('Example', len(training_instances))
     for i, inst in enumerate(training_instances):
         progress.progress(i)
         self.seen.update([inst.output])
     progress.end_task()
     self.num_examples += len(training_instances)
Ejemplo n.º 22
0
    def init_vectorizer(self, training_instances):
        vec = vectorizers.Seq2SeqVectorizer()
        vec.add((['<s>', '</s>'], ['<s>', '</s>']))

        progress.start_task('Vectorizer instance', len(training_instances))
        for i, inst in enumerate(training_instances):
            progress.progress(i)
            vec.add(self.instance_to_tuple(inst))
        progress.end_task()

        return vec
Ejemplo n.º 23
0
    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        from fields import build_instance

        options = self.get_options()
        predictions = []
        scores = []
        base_is_listener = self.override_listener()
        assert options.listener, 'Eval data should be listener data for DirectRefGameLearner'

        true_batch_size = options.listener_eval_batch_size / options.num_distractors
        batches = iterators.iter_batches(eval_instances, true_batch_size)
        num_batches = (len(eval_instances) - 1) // true_batch_size + 1

        if options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)
            assert batch[
                0].alt_outputs, 'No context given for direct listener testing'
            context = len(batch[0].alt_outputs)
            if self.options.direct_base_uses_context:
                output_grid = [
                    build_instance(inst.input, target, inst.alt_outputs,
                                   base_is_listener) for inst in batch
                    for target in range(len(inst.alt_outputs))
                ]
            else:
                output_grid = [
                    build_instance(inst.input, color, None, base_is_listener)
                    for inst in batch for color in inst.alt_outputs
                ]
            assert len(output_grid) == context * len(batch), \
                'Context must be the same number of colors for all examples'
            true_indices = np.array([inst.output for inst in batch])
            grid_scores = self.base.score(output_grid, verbosity=verbosity)
            log_probs = np.array(grid_scores).reshape((len(batch), context))
            # Renormalize over only the context colors
            log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis]
            # Cap confidences to reasonable values
            if options.direct_min_score is not None and options.direct_min_score <= 0.0:
                log_probs = np.maximum(options.direct_min_score, log_probs)
                # Normalize again (so we always return log probabilities)
                log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis]
            assert log_probs.shape == (len(batch), context)
            pred_indices = np.argmax(log_probs, axis=1)
            predictions.extend(pred_indices.tolist())
            # Extract the score of the true color
            scores.extend(log_probs[np.arange(len(batch)),
                                    true_indices].tolist())
        progress.end_task()

        return predictions, scores
Ejemplo n.º 24
0
    def init_vectorizer(self, training_instances):
        vec = self.vectorizer_class()
        vec.add((['<input>', '</input>'],
                 ['<dialogue>', '</dialogue>', '<eos>', 'YOU:', 'THEM:'],
                 ['<output>', '</output>']))

        progress.start_task('Vectorizer instance', len(training_instances))
        for i, inst in enumerate(training_instances):
            progress.progress(i)
            vec.add(self.instance_to_tuple(inst))
        progress.end_task()

        return vec
Ejemplo n.º 25
0
    def train(self, training_instances, validation_instances='ignored', metrics='ignored'):
        self.names = sorted(set(inst.output for inst in training_instances)) + ['<unk>']
        self.name_to_index = defaultdict(lambda: -1,
                                         {n: i for i, n in enumerate(self.names)})
        self.hists = []
        progress.start_task('Histogram', len(self.GRANULARITY))
        for i, g in enumerate(self.GRANULARITY):
            progress.progress(i)
            self.hists.append(Histogram(training_instances, self.names,
                                        granularity=g, use_progress=True))
        progress.end_task()

        self.num_params = sum(h.num_params for h in self.hists)
Ejemplo n.º 26
0
 def predict_and_score(self, eval_instances):
     predictions = []
     scores = []
     progress.start_task('Example', len(eval_instances))
     for i, inst in enumerate(eval_instances):
         progress.progress(i)
         hist_probs = self.hist_probs(inst.input)
         name = self.names[hist_probs.argmax()]
         prob = hist_probs[self.name_to_index[inst.output]]
         predictions.append(name)
         scores.append(np.log(prob))
     progress.end_task()
     return predictions, scores
Ejemplo n.º 27
0
 def predict_and_score(self, eval_instances):
     predictions = []
     scores = []
     progress.start_task('Example', len(eval_instances))
     for i, inst in enumerate(eval_instances):
         progress.progress(i)
         hist_probs = self.hist_probs(inst.input)
         name = self.names[hist_probs.argmax()]
         prob = hist_probs[self.name_to_index[inst.output]]
         predictions.append(name)
         scores.append(np.log(prob))
     progress.end_task()
     return predictions, scores
Ejemplo n.º 28
0
    def train(self, training_instances, validation_instances='ignored', metrics='ignored'):
        tokenize = TOKENIZERS[self.tokenizer]

        tokenized = [tokenize(inst.output) + ['</s>'] for inst in training_instances]
        self.seq_vec.add_all(tokenized)
        unk_replaced = self.seq_vec.unk_replace_all(tokenized)

        progress.start_task('Example', len(training_instances))
        for i, utt in enumerate(unk_replaced):
            progress.progress(i)
            self.token_counts.update(utt)
            self.num_tokens += len(utt)
        progress.end_task()
Ejemplo n.º 29
0
    def add_data(self, training_instances):
        if self.use_progress:
            progress.start_task('Example', len(training_instances))

        for i, inst in enumerate(training_instances):
            if self.use_progress:
                progress.progress(i)

            bucket = self.get_bucket(inst.input)
            self.buckets[bucket][inst.output] += 1
            self.bucket_counts[bucket] += 1

        if self.use_progress:
            progress.end_task()
Ejemplo n.º 30
0
    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        predictions = []
        scores = []

        progress.start_task('Instance', len(eval_instances))
        for inst_num, inst in enumerate(eval_instances):
            progress.progress(inst_num)

            pred, score = self.predict_one_inst(inst)
            predictions.append(pred)
            scores.append(score)
        progress.end_task()

        return predictions, scores
Ejemplo n.º 31
0
    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        predictions = []
        scores = []

        progress.start_task('Instance', len(eval_instances))
        for inst_num, inst in enumerate(eval_instances):
            progress.progress(inst_num)

            pred, score = self.predict_one_inst(inst)
            predictions.append(pred)
            scores.append(score)
        progress.end_task()

        return predictions, scores
Ejemplo n.º 32
0
    def add_data(self, training_instances):
        if self.use_progress:
            progress.start_task('Example', len(training_instances))

        for i, inst in enumerate(training_instances):
            if self.use_progress:
                progress.progress(i)

            bucket = self.get_bucket(inst.input)
            self.buckets[bucket][inst.output] += 1
            self.bucket_counts[bucket] += 1

        if self.use_progress:
            progress.end_task()
Ejemplo n.º 33
0
    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        from fields import build_instance

        options = self.get_options()
        predictions = []
        scores = []
        base_is_listener = self.override_listener()
        assert options.listener, 'Eval data should be listener data for DirectRefGameLearner'

        true_batch_size = options.listener_eval_batch_size / options.num_distractors
        batches = iterators.iter_batches(eval_instances, true_batch_size)
        num_batches = (len(eval_instances) - 1) // true_batch_size + 1

        if options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)
            assert batch[0].alt_outputs, 'No context given for direct listener testing'
            context = len(batch[0].alt_outputs)
            if self.options.direct_base_uses_context:
                output_grid = [build_instance(inst.input, target, inst.alt_outputs,
                                              base_is_listener)
                               for inst in batch for target in range(len(inst.alt_outputs))]
            else:
                output_grid = [build_instance(inst.input, color, None, base_is_listener)
                               for inst in batch for color in inst.alt_outputs]
            assert len(output_grid) == context * len(batch), \
                'Context must be the same number of colors for all examples'
            true_indices = np.array([inst.output for inst in batch])
            grid_scores = self.base.score(output_grid, verbosity=verbosity)
            log_probs = np.array(grid_scores).reshape((len(batch), context))
            # Renormalize over only the context colors
            log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis]
            # Cap confidences to reasonable values
            if options.direct_min_score is not None and options.direct_min_score <= 0.0:
                log_probs = np.maximum(options.direct_min_score, log_probs)
                # Normalize again (so we always return log probabilities)
                log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis]
            assert log_probs.shape == (len(batch), context)
            pred_indices = np.argmax(log_probs, axis=1)
            predictions.extend(pred_indices.tolist())
            # Extract the score of the true color
            scores.extend(log_probs[np.arange(len(batch)), true_indices].tolist())
        progress.end_task()

        return predictions, scores
Ejemplo n.º 34
0
    def train(self,
              training_instances,
              validation_instances='ignored',
              metrics='ignored'):
        tokenize = TOKENIZERS[self.tokenizer]

        tokenized = [
            tokenize(inst.output) + ['</s>'] for inst in training_instances
        ]
        self.seq_vec.add_all(tokenized)
        unk_replaced = self.seq_vec.unk_replace_all(tokenized)

        progress.start_task('Example', len(training_instances))
        for i, utt in enumerate(unk_replaced):
            progress.progress(i)
            self.token_counts.update(utt)
            self.num_tokens += len(utt)
        progress.end_task()
Ejemplo n.º 35
0
    def predict_and_score(self, eval_instances, random='ignored', verbosity='ignored'):
        options = config.options()
        predictions = []
        scores = []
        pool = multiprocessing.Pool(options.lux_threads)
        batch_size = options.lux_batch_size

        progress.start_task('Example', len(eval_instances))
        for start in range(0, len(eval_instances), batch_size):
            progress.progress(start)
            batch_output = pool.map(lux_predict_and_score,
                                    eval_instances[start:start + batch_size])
            batch_preds, batch_scores = zip(*batch_output)
            predictions.extend(batch_preds)
            scores.extend(batch_scores)
        progress.end_task()

        return predictions, scores
Ejemplo n.º 36
0
def output_grids(model, input_filename):
    with gzip.open(input_filename, 'rb') as infile:
        grids = [json.loads(line.strip()) for line in infile]

    dirname, filename = os.path.split(input_filename)

    data_filename = os.path.join(dirname, 'data.eval.jsons')
    with open(data_filename, 'r') as infile:
        insts = [json.loads(line.strip()) for line in infile]

    output_filename = os.path.join(dirname, 's0_' + filename)
    with gzip.open(output_filename, 'w') as outfile:
        progress.start_task('Example', len(insts))
        for i, (inst, grid) in enumerate(zip(insts, grids)):
            progress.progress(i)
            insts, shape = build_insts(inst, grid)
            scores = model.score(insts, verbosity=-4)
            substitute_grid(scores, grid, shape)
            json.dump(grid, outfile)
            outfile.write('\n')
        progress.end_task()
Ejemplo n.º 37
0
    def predict_and_score(self, eval_instances, random='ignored', verbosity=4):
        eval_instances = list(eval_instances)
        predictions = []
        scores = []

        if verbosity >= 1:
            progress.start_task('Eval instance', len(eval_instances))

        for i, inst in enumerate(eval_instances):
            if verbosity >= 1:
                progress.progress(i)

            pred = ''  # TODO: make prediction
            score = -float('inf')  # TODO: score gold output
            predictions.append(pred)
            scores.append(score)

        if verbosity >= 1:
            progress.end_task()

        return predictions, scores
Ejemplo n.º 38
0
    def train(self,
              training_instances,
              validation_instances='ignored',
              metrics='ignored'):
        self.names = sorted(set(inst.output
                                for inst in training_instances)) + ['<unk>']
        self.name_to_index = defaultdict(
            lambda: -1, {n: i
                         for i, n in enumerate(self.names)})
        self.hists = []
        progress.start_task('Histogram', len(self.GRANULARITY))
        for i, g in enumerate(self.GRANULARITY):
            progress.progress(i)
            self.hists.append(
                Histogram(training_instances,
                          self.names,
                          granularity=g,
                          use_progress=True))
        progress.end_task()

        self.num_params = sum(h.num_params for h in self.hists)
Ejemplo n.º 39
0
def output_grids(model, input_filename):
    with gzip.open(input_filename, 'rb') as infile:
        grids = [json.loads(line.strip()) for line in infile]

    dirname, filename = os.path.split(input_filename)

    data_filename = os.path.join(dirname, 'data.eval.jsons')
    with open(data_filename, 'r') as infile:
        insts = [json.loads(line.strip()) for line in infile]

    output_filename = os.path.join(dirname, 's0_' + filename)
    with gzip.open(output_filename, 'w') as outfile:
        progress.start_task('Example', len(insts))
        for i, (inst, grid) in enumerate(zip(insts, grids)):
            progress.progress(i)
            insts, shape = build_insts(inst, grid)
            scores = model.score(insts, verbosity=-4)
            substitute_grid(scores, grid, shape)
            json.dump(grid, outfile)
            outfile.write('\n')
        progress.end_task()
Ejemplo n.º 40
0
    def predict_and_score(self, eval_instances, random="ignored", verbosity=4):
        eval_instances = list(eval_instances)
        predictions = []
        scores = []

        if verbosity >= 1:
            progress.start_task("Eval instance", len(eval_instances))

        for i, inst in enumerate(eval_instances):
            if verbosity >= 1:
                progress.progress(i)

            pred = ""  # TODO: make prediction
            score = -float("inf")  # TODO: score gold output
            predictions.append(pred)
            scores.append(score)

        if verbosity >= 1:
            progress.end_task()

        return predictions, scores
Ejemplo n.º 41
0
    def predict_and_score(self,
                          eval_instances,
                          random='ignored',
                          verbosity='ignored'):
        options = config.options()
        predictions = []
        scores = []
        pool = multiprocessing.Pool(options.lux_threads)
        batch_size = options.lux_batch_size

        progress.start_task('Example', len(eval_instances))
        for start in range(0, len(eval_instances), batch_size):
            progress.progress(start)
            batch_output = pool.map(lux_predict_and_score,
                                    eval_instances[start:start + batch_size])
            batch_preds, batch_scores = zip(*batch_output)
            predictions.extend(batch_preds)
            scores.extend(batch_scores)
        progress.end_task()

        return predictions, scores
Ejemplo n.º 42
0
    def predict_and_score(self, eval_instances, random=False, split='default', verbosity=4):
        predictions = []
        scores = []

        if verbosity > 2:
            progress.start_task('Eval instances', len(eval_instances))
        for i, inst in enumerate(eval_instances):
            if verbosity > 2:
                progress.progress(i)
            game = get_game(inst.input)
            num_turns = count_dialogue_turns(inst.input)
            if (game, num_turns) in self.lookup:
                pred = self.lookup[(game, num_turns)]
            elif game in self.lookup:
                pred = self.lookup[game]
            else:
                pred = 'NEVER BEEN HERE BEFORE'
            predictions.append(pred)
            scores.append(0.0)
        if verbosity > 2:
            progress.end_task()
        return predictions, scores
Ejemplo n.º 43
0
    def train(self, training_instances, validation_instances='ignored', metrics='ignored'):
        self.build_graph()
        env = gym.make(cards_env.register())

        self.init_params()

        if self.options.verbosity >= 1:
            progress.start_task('Epoch', self.options.pg_train_epochs)

        for epoch in range(self.options.pg_train_epochs):
            if self.options.verbosity >= 1:
                progress.progress(epoch)

            batches = iterators.iter_batches(training_instances,
                                             self.options.pg_batch_size)
            num_batches = (len(training_instances) - 1) // self.options.pg_batch_size + 1

            if self.options.verbosity >= 1:
                progress.start_task('Batch', num_batches)

            try:
                for batch_num, batch in enumerate(batches):
                    if self.options.verbosity >= 1:
                        progress.progress(batch_num)
                    step = epoch * num_batches + batch_num
                    self.train_one_batch(list(batch), env, t=step)
                    if step % 10 == 0:
                        check_prefix = config.get_file_path('checkpoint')
                        self.saver.save(self.session, check_prefix, global_step=step)
            except KeyboardInterrupt:
                self.summary_writer.flush()
                raise

            if self.options.verbosity >= 1:
                progress.end_task()

        if self.options.verbosity >= 1:
            progress.end_task()
Ejemplo n.º 44
0
    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        predictions = []
        scores = []

        if self.options.verbosity + verbosity >= 1:
            progress.start_task('Instance', len(eval_instances))

        all_cards = [r + s for r in cards_env.RANKS for s in cards_env.SUITS]
        cards_to_loc = {k: (1, 1) for k in all_cards}

        for i, inst in enumerate(eval_instances):
            if self.options.verbosity + verbosity >= 1:
                progress.progress(i)

            walls = inst.input['walls']
            num_possible_locs = np.ones(walls.shape).sum() - walls.sum()
            predictions.append(world.build_world(walls, dict(cards_to_loc)).__dict__)
            score = -len(all_cards) * np.log(num_possible_locs + 3.0) - np.log(num_possible_locs)
            scores.append(score)

        if self.options.verbosity + verbosity >= 1:
            progress.end_task()

        return predictions, scores
Ejemplo n.º 45
0
    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        options = config.options()
        predictions = []
        scores = []

        all_utts = self.base.seq_vec.tokens
        sym_vec = vectorizers.SymbolVectorizer()
        sym_vec.add_all(all_utts)
        prior_scores = self.prior_scores(all_utts)

        base_is_listener = (type(self.base) in listener.LISTENERS.values())

        true_batch_size = options.listener_eval_batch_size / len(all_utts)
        batches = iterators.iter_batches(eval_instances, true_batch_size)
        num_batches = (len(eval_instances) - 1) // true_batch_size + 1

        if options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)
            context = len(
                batch[0].alt_inputs) if batch[0].alt_inputs is not None else 0
            if context:
                output_grid = [
                    (instance.Instance(utt, color)
                     if base_is_listener else instance.Instance(color, utt))
                    for inst in batch for color in inst.alt_inputs
                    for utt in sym_vec.tokens
                ]
                assert len(output_grid) == context * len(batch) * len(all_utts), \
                    'Context must be the same number of colors for all examples'
                true_indices = np.array([inst.input for inst in batch])
            else:
                output_grid = [
                    (instance.Instance(utt, inst.input) if base_is_listener
                     else instance.Instance(inst.input, utt)) for inst in batch
                    for utt in sym_vec.tokens
                ]
                true_indices = sym_vec.vectorize_all(
                    [inst.input for inst in batch])
                if len(true_indices.shape) == 2:
                    # Sequence vectorizer; we're only using single tokens for now.
                    true_indices = true_indices[:, 0]
            scores = self.base.score(output_grid, verbosity=verbosity)
            if context:
                log_probs = np.array(scores).reshape(
                    (len(batch), context, len(all_utts)))
                orig_log_probs = log_probs[np.arange(len(batch)),
                                           true_indices, :]
                # Renormalize over only the context colors, and extract the score of
                # the true color.
                log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis, :]
                log_probs = log_probs[np.arange(len(batch)), true_indices, :]
            else:
                log_probs = np.array(scores).reshape(
                    (len(batch), len(all_utts)))
                orig_log_probs = log_probs
            assert log_probs.shape == (len(batch), len(all_utts))
            # Add in the prior scores, if used (S1 \propto L0 * P)
            if prior_scores is not None:
                log_probs = log_probs + 0.5 * prior_scores
            if options.exhaustive_base_weight:
                w = options.exhaustive_base_weight
                log_probs = w * orig_log_probs + (1.0 - w) * log_probs
            # Normalize across utterances. Note that the listener returns probability
            # densities over colors.
            log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis]
            if random:
                pred_indices = sample(np.exp(log_probs))
            else:
                pred_indices = np.argmax(log_probs, axis=1)
            predictions.extend(sym_vec.unvectorize_all(pred_indices))
            scores.extend(log_probs[np.arange(len(batch)),
                                    true_indices].tolist())
        progress.end_task()

        return predictions, scores
Ejemplo n.º 46
0
 def train(self, training_instances, validation_instances=None, metrics=None):
     progress.start_task('Instance', len(training_instances))
     for i, inst in enumerate(training_instances):
         progress.progress(i)
         self.train_inst(inst)
     progress.end_task()
Ejemplo n.º 47
0
    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        options = config.options()
        predictions = []
        scores = []

        all_utts = self.base.seq_vec.tokens
        sym_vec = vectorizers.SymbolVectorizer()
        sym_vec.add_all(all_utts)
        prior_scores = self.prior_scores(all_utts)

        base_is_listener = (type(self.base) in listener.LISTENERS.values())

        true_batch_size = options.listener_eval_batch_size / len(all_utts)
        batches = iterators.iter_batches(eval_instances, true_batch_size)
        num_batches = (len(eval_instances) - 1) // true_batch_size + 1

        if options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)
            context = len(batch[0].alt_inputs) if batch[0].alt_inputs is not None else 0
            if context:
                output_grid = [(instance.Instance(utt, color)
                                if base_is_listener else
                                instance.Instance(color, utt))
                               for inst in batch for color in inst.alt_inputs
                               for utt in sym_vec.tokens]
                assert len(output_grid) == context * len(batch) * len(all_utts), \
                    'Context must be the same number of colors for all examples'
                true_indices = np.array([inst.input for inst in batch])
            else:
                output_grid = [(instance.Instance(utt, inst.input)
                                if base_is_listener else
                                instance.Instance(inst.input, utt))
                               for inst in batch for utt in sym_vec.tokens]
                true_indices = sym_vec.vectorize_all([inst.input for inst in batch])
                if len(true_indices.shape) == 2:
                    # Sequence vectorizer; we're only using single tokens for now.
                    true_indices = true_indices[:, 0]
            scores = self.base.score(output_grid, verbosity=verbosity)
            if context:
                log_probs = np.array(scores).reshape((len(batch), context, len(all_utts)))
                orig_log_probs = log_probs[np.arange(len(batch)), true_indices, :]
                # Renormalize over only the context colors, and extract the score of
                # the true color.
                log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis, :]
                log_probs = log_probs[np.arange(len(batch)), true_indices, :]
            else:
                log_probs = np.array(scores).reshape((len(batch), len(all_utts)))
                orig_log_probs = log_probs
            assert log_probs.shape == (len(batch), len(all_utts))
            # Add in the prior scores, if used (S1 \propto L0 * P)
            if prior_scores is not None:
                log_probs = log_probs + 0.5 * prior_scores
            if options.exhaustive_base_weight:
                w = options.exhaustive_base_weight
                log_probs = w * orig_log_probs + (1.0 - w) * log_probs
            # Normalize across utterances. Note that the listener returns probability
            # densities over colors.
            log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis]
            if random:
                pred_indices = sample(np.exp(log_probs))
            else:
                pred_indices = np.argmax(log_probs, axis=1)
            predictions.extend(sym_vec.unvectorize_all(pred_indices))
            scores.extend(log_probs[np.arange(len(batch)), true_indices].tolist())
        progress.end_task()

        return predictions, scores
Ejemplo n.º 48
0
    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        options = self.get_options()
        predictions = []
        scores = []

        if options.verbosity + verbosity >= 2:
            print('Building alternative utterance list')
        sym_vec = vectorizers.SymbolVectorizer()
        sym_vec.add_all([inst.input for inst in self.get_dataset(self.base)])

        assert eval_instances[0].alt_outputs, \
            'Context required for L(S(L)): %s' % eval_instances[0].__dict__
        context_len = len(eval_instances[0].alt_outputs)
        if options.exhaustive_num_samples > 0:
            num_alt_utts = options.exhaustive_num_samples * context_len + 1
            num_sample_sets = options.exhaustive_num_sample_sets
        else:
            num_alt_utts = len(sym_vec.tokens) + 1
            num_sample_sets = 1
        true_batch_size = max(options.listener_eval_batch_size /
                              (num_alt_utts * num_sample_sets * context_len), 1)
        batches = iterators.iter_batches(eval_instances, true_batch_size)
        num_batches = (len(eval_instances) - 1) // true_batch_size + 1

        if options.exhaustive_output_speaker_samples:
            self.truncate_utterances_files('s1_samples.%s.jsons', num_sample_sets)
        if options.exhaustive_output_speaker_predictions:
            self.truncate_utterances_files('s1_predictions.%s.jsons', num_sample_sets)
        if options.exhaustive_output_all_grids:
            self.truncate_utterances_files('grids.%s.jsons.gz', 1)

        if options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)
            output_grid = self.build_grid(batch, sym_vec.tokens)
            assert len(output_grid) == len(batch) * num_sample_sets * context_len * num_alt_utts, \
                'Context must be the same number of colors for all examples %s' % \
                ((len(output_grid), len(batch), num_sample_sets, context_len, num_alt_utts),)
            true_indices = np.array([inst.output for inst in batch])
            grid_scores = self.base.score(output_grid, verbosity=verbosity)
            l0_log_probs = np.array(grid_scores).reshape((len(batch), num_sample_sets,
                                                          context_len, num_alt_utts))
            # Renormalize over only the context colors, and extract the score of
            # the true color according to the base model.
            l0_log_probs -= logsumexp(l0_log_probs, axis=2)[:, :, np.newaxis, :]
            assert l0_log_probs.shape == (len(batch), num_sample_sets,
                                          context_len, num_alt_utts), l0_log_probs.shape
            orig_log_probs = l0_log_probs[np.arange(len(batch)), 0, :, 0]
            assert orig_log_probs.shape == (len(batch), context_len), orig_log_probs.shape
            # Apply temperature parameter before speaker.
            utilities = options.exhaustive_inv_temperature * l0_log_probs
            # Normalize across utterances. Note that the listener returns probability
            # densities over colors.
            s1_log_probs = utilities - logsumexp(utilities, axis=3)[:, :, :, np.newaxis]
            assert s1_log_probs.shape == (len(batch), num_sample_sets,
                                          context_len, num_alt_utts), s1_log_probs.shape
            if options.exhaustive_output_speaker_samples or \
                    options.exhaustive_output_speaker_predictions:
                speaker_dist = s1_log_probs[np.arange(len(batch)), :, true_indices, 1:]
                if options.exhaustive_output_speaker_samples:
                    speaker_sample_indices = sample(np.exp(speaker_dist))
                    self.write_speaker_utterances('s1_samples.%s.jsons', output_grid,
                                                  speaker_sample_indices, l0_log_probs.shape)
                if options.exhaustive_output_speaker_predictions:
                    speaker_pred_indices = np.argmax(speaker_dist, axis=2)
                    self.write_speaker_utterances('s1_predictions.%s.jsons', output_grid,
                                                  speaker_pred_indices, l0_log_probs.shape)
            # Normalize again across context colors.
            l2_log_probs = s1_log_probs - logsumexp(s1_log_probs, axis=2)[:, :, np.newaxis, :]
            assert l2_log_probs.shape == (len(batch), num_sample_sets,
                                          context_len, num_alt_utts), l2_log_probs.shape
            # Extract the score of each color for the input utterance according to the L2 model.
            log_probs = l2_log_probs[np.arange(len(batch)), :, :, 0]
            assert log_probs.shape == (len(batch), num_sample_sets, context_len), log_probs.shape
            # Blend L0 and L2 (if enabled) to produce final score.
            if options.exhaustive_base_weight:
                w = options.exhaustive_base_weight
                # Bump zero probabilities up to epsilon ~= 3e-23, because previously we would
                # only have -inf log probs, but now if w < 0 we could get NaNs.
                log_probs = (w * np.maximum(orig_log_probs[:, np.newaxis, :], -52.0) +
                             (1.0 - w) * np.maximum(log_probs, -52.0))
            # Normalize across context one more time to prevent cheating when
            # blending.
            log_probs -= logsumexp(log_probs, axis=2)[:, :, np.newaxis]
            # Average (in probability space) over sample sets
            log_probs = logsumexp(log_probs, axis=1) - np.log(log_probs.shape[1])
            if options.exhaustive_output_all_grids:
                self.write_grids(output_grid,
                                 l0_log_probs, s1_log_probs, l2_log_probs, log_probs)
            if random:
                pred_indices = sample(np.exp(log_probs))
            else:
                pred_indices = np.argmax(log_probs, axis=1)
            predictions.extend(pred_indices)
            # Extract the score of the true color according to the combined model.
            scores.extend(log_probs[np.arange(len(batch)), true_indices].tolist())
        progress.end_task()

        return predictions, scores
Ejemplo n.º 49
0
    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        options = self.get_options()
        predictions = []
        scores = []

        if options.verbosity + verbosity >= 2:
            print('Building alternative utterance list')
        sym_vec = vectorizers.SymbolVectorizer()
        sym_vec.add_all([inst.input for inst in self.get_dataset(self.base)])

        assert eval_instances[0].alt_outputs, \
            'Context required for L(S(L)): %s' % eval_instances[0].__dict__
        context_len = len(eval_instances[0].alt_outputs)
        if options.exhaustive_num_samples > 0:
            num_alt_utts = options.exhaustive_num_samples * context_len + 1
            num_sample_sets = options.exhaustive_num_sample_sets
        else:
            num_alt_utts = len(sym_vec.tokens) + 1
            num_sample_sets = 1
        true_batch_size = max(
            options.listener_eval_batch_size /
            (num_alt_utts * num_sample_sets * context_len), 1)
        batches = iterators.iter_batches(eval_instances, true_batch_size)
        num_batches = (len(eval_instances) - 1) // true_batch_size + 1

        if options.exhaustive_output_speaker_samples:
            self.truncate_utterances_files('s1_samples.%s.jsons',
                                           num_sample_sets)
        if options.exhaustive_output_speaker_predictions:
            self.truncate_utterances_files('s1_predictions.%s.jsons',
                                           num_sample_sets)
        if options.exhaustive_output_all_grids:
            self.truncate_utterances_files('grids.%s.jsons.gz', 1)

        if options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)
            output_grid = self.build_grid(batch, sym_vec.tokens)
            assert len(output_grid) == len(batch) * num_sample_sets * context_len * num_alt_utts, \
                'Context must be the same number of colors for all examples %s' % \
                ((len(output_grid), len(batch), num_sample_sets, context_len, num_alt_utts),)
            true_indices = np.array([inst.output for inst in batch])
            grid_scores = self.base.score(output_grid, verbosity=verbosity)
            l0_log_probs = np.array(grid_scores).reshape(
                (len(batch), num_sample_sets, context_len, num_alt_utts))
            # Renormalize over only the context colors, and extract the score of
            # the true color according to the base model.
            l0_log_probs -= logsumexp(l0_log_probs, axis=2)[:, :,
                                                            np.newaxis, :]
            assert l0_log_probs.shape == (len(batch), num_sample_sets,
                                          context_len,
                                          num_alt_utts), l0_log_probs.shape
            orig_log_probs = l0_log_probs[np.arange(len(batch)), 0, :, 0]
            assert orig_log_probs.shape == (len(batch),
                                            context_len), orig_log_probs.shape
            # Apply temperature parameter before speaker.
            utilities = options.exhaustive_inv_temperature * l0_log_probs
            # Normalize across utterances. Note that the listener returns probability
            # densities over colors.
            s1_log_probs = utilities - logsumexp(utilities, axis=3)[:, :, :,
                                                                    np.newaxis]
            assert s1_log_probs.shape == (len(batch), num_sample_sets,
                                          context_len,
                                          num_alt_utts), s1_log_probs.shape
            if options.exhaustive_output_speaker_samples or \
                    options.exhaustive_output_speaker_predictions:
                speaker_dist = s1_log_probs[np.arange(len(batch)), :,
                                            true_indices, 1:]
                if options.exhaustive_output_speaker_samples:
                    speaker_sample_indices = sample(np.exp(speaker_dist))
                    self.write_speaker_utterances('s1_samples.%s.jsons',
                                                  output_grid,
                                                  speaker_sample_indices,
                                                  l0_log_probs.shape)
                if options.exhaustive_output_speaker_predictions:
                    speaker_pred_indices = np.argmax(speaker_dist, axis=2)
                    self.write_speaker_utterances('s1_predictions.%s.jsons',
                                                  output_grid,
                                                  speaker_pred_indices,
                                                  l0_log_probs.shape)
            # Normalize again across context colors.
            l2_log_probs = s1_log_probs - logsumexp(
                s1_log_probs, axis=2)[:, :, np.newaxis, :]
            assert l2_log_probs.shape == (len(batch), num_sample_sets,
                                          context_len,
                                          num_alt_utts), l2_log_probs.shape
            # Extract the score of each color for the input utterance according to the L2 model.
            log_probs = l2_log_probs[np.arange(len(batch)), :, :, 0]
            assert log_probs.shape == (len(batch), num_sample_sets,
                                       context_len), log_probs.shape
            # Blend L0 and L2 (if enabled) to produce final score.
            if options.exhaustive_base_weight:
                w = options.exhaustive_base_weight
                # Bump zero probabilities up to epsilon ~= 3e-23, because previously we would
                # only have -inf log probs, but now if w < 0 we could get NaNs.
                log_probs = (
                    w * np.maximum(orig_log_probs[:, np.newaxis, :], -52.0) +
                    (1.0 - w) * np.maximum(log_probs, -52.0))
            # Normalize across context one more time to prevent cheating when
            # blending.
            log_probs -= logsumexp(log_probs, axis=2)[:, :, np.newaxis]
            # Average (in probability space) over sample sets
            log_probs = logsumexp(log_probs, axis=1) - np.log(
                log_probs.shape[1])
            if options.exhaustive_output_all_grids:
                self.write_grids(output_grid, l0_log_probs, s1_log_probs,
                                 l2_log_probs, log_probs)
            if random:
                pred_indices = sample(np.exp(log_probs))
            else:
                pred_indices = np.argmax(log_probs, axis=1)
            predictions.extend(pred_indices)
            # Extract the score of the true color according to the combined model.
            scores.extend(log_probs[np.arange(len(batch)),
                                    true_indices].tolist())
        progress.end_task()

        return predictions, scores