Beispiel #1
0
 def __init__(self, id=None):
     super(ListenerLearner, self).__init__(id=id)
     self.word_counts = Counter()
     self.seq_vec = SequenceVectorizer()
     self.color_vec = BucketsVectorizer(
         self.options.listener_color_resolution,
         hsv=self.options.listener_hsv)
Beispiel #2
0
 def init_vectorizer(self):
     if self.res and self.res[0]:
         if len(self.res) == 1:
             self.res = self.res * 3
         self.color_vec = BucketsVectorizer(self.res, hsv=self.hsv)
         self.vectorize = lambda c: self.color_vec.vectorize(c, hsv=True)
         self.unvectorize = lambda c: self.color_vec.unvectorize(c,
                                                                 hsv=True)
         self.score_adjustment = -np.log(
             (256.0**3) / self.color_vec.num_types)
     else:
         self.vectorize = lambda c: c
         self.unvectorize = lambda c: c
         self.score_adjustment = 0.0
Beispiel #3
0
class UniformPrior(object):
    '''A uniform color prior in RGB space.'''
    def __init__(self, recurrent=False):
        self.sampler = BucketsVectorizer([1], hsv=False)
        self.recurrent = recurrent

    def train(self, training_instances, listener_data='ignored'):
        pass

    def apply(self, input_vars):
        c = input_vars[0]
        if self.recurrent:
            if c.ndim == 2:
                ones = T.ones_like(c[:, 0])
            elif c.ndim == 3:
                ones = T.ones_like(c[:, 0, 0])
            else:
                assert False, 'need handling for higher rank color vectors (recurrent): %d' % c.ndim
        else:
            if c.ndim == 1:
                ones = T.ones_like(c)
            elif c.ndim == 2:
                ones = T.ones_like(c[:, 0])
            else:
                assert False, 'need handling for higher rank color vectors (atomic): %d' % c.ndim
        return -3.0 * np.log(256.0) * ones

    def sample(self, num_samples):
        '''
        :return: a list of `num_samples` colors sampled uniformly in RGB space,
                 but expressed as HSV triples.
        '''
        colors = self.sampler.unvectorize_all(np.zeros(num_samples, dtype=np.int32),
                                              random=True, hsv=True)
        return [instance.Instance(c) for c in colors]
Beispiel #4
0
    def __init__(self, id=None):
        self.get_options()
        self.init_submodels(id)
        super(RSALearner, self).__init__(id=id)

        color_resolution = (self.options.listener_color_resolution
                            if self.options.listener else
                            self.options.speaker_color_resolution)
        self.seq_vec = SequenceVectorizer()
        self.color_vec = BucketsVectorizer(color_resolution,
                                           hsv=self.options.speaker_hsv)
 def init_vectorizer(self):
     if self.res and self.res[0]:
         if len(self.res) == 1:
             self.res = self.res * 3
         self.color_vec = BucketsVectorizer(self.res, hsv=self.hsv)
         self.vectorize = lambda c: self.color_vec.vectorize(c, hsv=True)
         self.unvectorize = lambda c: self.color_vec.unvectorize(c, hsv=True)
         self.score_adjustment = -np.log((256.0 ** 3) / self.color_vec.num_types)
     else:
         self.vectorize = lambda c: c
         self.unvectorize = lambda c: c
         self.score_adjustment = 0.0
Beispiel #6
0
class LookupLearner(Learner):
    def __init__(self):
        options = config.options()
        self.counters = defaultdict(Counter)
        if options.listener:
            res = options.listener_color_resolution
            hsv = options.listener_hsv
        else:
            res = options.speaker_color_resolution
            hsv = options.speaker_hsv
        self.res = res
        self.hsv = hsv
        self.init_vectorizer()

    def init_vectorizer(self):
        if self.res and self.res[0]:
            if len(self.res) == 1:
                self.res = self.res * 3
            self.color_vec = BucketsVectorizer(self.res, hsv=self.hsv)
            self.vectorize = lambda c: self.color_vec.vectorize(c, hsv=True)
            self.unvectorize = lambda c: self.color_vec.unvectorize(c,
                                                                    hsv=True)
            self.score_adjustment = -np.log(
                (256.0**3) / self.color_vec.num_types)
        else:
            self.vectorize = lambda c: c
            self.unvectorize = lambda c: c
            self.score_adjustment = 0.0

    @property
    def num_params(self):
        return sum(len(c) for c in self.counters.values())

    def train(self,
              training_instances,
              validation_instances='ignored',
              metrics='ignored'):
        options = config.options()
        for inst in training_instances:
            inp, out = inst.input, inst.output
            if options.listener:
                out = self.vectorize(out)
            else:
                inp = self.vectorize(inp)
            self.counters[inp][out] += 1

    def predict_and_score(self, eval_instances, random='ignored', verbosity=0):
        options = config.options()
        if options.verbosity + verbosity >= 2:
            print('Testing')
        predictions = []
        scores = []
        for inst in eval_instances:
            inp, out = inst.input, inst.output
            if options.listener:
                out = self.vectorize(out)
            else:
                inp = self.vectorize(inp)

            counter = self.counters[inp]
            highest = counter.most_common(1)
            if highest:
                if options.listener:
                    prediction = self.unvectorize(highest[0][0])
                else:
                    prediction = highest[0][0]
            elif options.listener:
                prediction = (0, 0, 0)
            else:
                prediction = '<unk>'

            total = sum(counter.values())
            if total:
                if options.verbosity + verbosity >= 9:
                    print('%s -> %s: %s of %s [%s]' %
                          (repr(inp), repr(out), counter[out], total,
                           inst.input))
                prob = counter[out] * 1.0 / total
            else:
                if options.verbosity + verbosity >= 9:
                    print('%s -> %s: no data [%s]' %
                          (repr(inp), repr(out), inst.input))
                prob = 1.0 * (inst.output == prediction)
            score = np.log(prob)
            if options.listener:
                score += self.score_adjustment

            predictions.append(prediction)
            scores.append(score)

        return predictions, scores

    def __getstate__(self):
        return {
            'counters': {k: dict(v)
                         for k, v in self.counters.iteritems()},
            'res': self.res,
            'hsv': self.hsv,
        }

    def __setstate__(self, state):
        self.res = state['res']
        self.hsv = state['hsv']
        self.init_vectorizer()
        self.counters = defaultdict(
            Counter, {k: Counter(v)
                      for k, v in state['counters']})
Beispiel #7
0
 def __init__(self, recurrent=False):
     self.sampler = BucketsVectorizer([1], hsv=False)
     self.recurrent = recurrent
Beispiel #8
0
class ListenerLearner(NeuralLearner):
    '''
    An LSTM-based listener (guesses colors from descriptions).
    '''
    def __init__(self, id=None):
        super(ListenerLearner, self).__init__(id=id)
        self.word_counts = Counter()
        self.seq_vec = SequenceVectorizer(
            unk_threshold=self.options.listener_unk_threshold)
        self.color_vec = BucketsVectorizer(
            self.options.listener_color_resolution,
            hsv=self.options.listener_hsv)

    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        predictions = []
        scores = []
        batches = iterators.iter_batches(eval_instances,
                                         self.options.listener_eval_batch_size)
        num_batches = (len(eval_instances) -
                       1) // self.options.listener_eval_batch_size + 1

        if self.options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)

            xs, (y, ) = self._data_to_arrays(batch, test=True)

            probs = self.model.predict(xs)
            if random:
                indices = sample(probs)
                predictions.extend(self.unvectorize(indices, random=True))
            else:
                predictions.extend(self.unvectorize(probs.argmax(axis=1)))
            scores_arr = np.log(probs[np.arange(len(batch)),
                                      y]) + self.bucket_adjustment()
            scores.extend(scores_arr.tolist())
        progress.end_task()
        if self.options.verbosity >= 9:
            print('%s %ss:') % (self.id, 'sample' if random else 'prediction')
            for inst, prediction in zip(eval_instances, predictions):
                print('%s -> %s' % (repr(inst.input), repr(prediction)))

        return predictions, scores

    def unvectorize(self, indices, random=False):
        return self.color_vec.unvectorize_all(indices, random=random, hsv=True)

    def bucket_adjustment(self):
        bucket_volume = (256.0**3) / self.color_vec.num_types
        return -np.log(bucket_volume)

    def on_iter_end(self, step, writer):
        most_common = [
            desc for desc, count in self.word_counts.most_common(10)
        ]
        insts = [instance.Instance(input=desc) for desc in most_common]
        xs, (y, ) = self._data_to_arrays(insts, test=True)
        probs = self.model.predict(xs)
        for i, desc in enumerate(most_common):
            dist = probs[i, :]
            for image, channel in zip(
                    self.color_vec.visualize_distribution(dist), '012'):
                writer.log_image(step, '%s/%s/%s' % (self.id, desc, channel),
                                 image)
        super(ListenerLearner, self).on_iter_end(step, writer)

    def _data_to_arrays(self,
                        training_instances,
                        init_vectorizer=False,
                        test=False,
                        inverted=False):
        def get_multi(val):
            if isinstance(val, tuple):
                assert len(val) == 1
                return val[0]
            else:
                return val

        get_i, get_o = (lambda inst: inst.input), (lambda inst: inst.output)
        get_desc, get_color = (get_o, get_i) if inverted else (get_i, get_o)

        get_i_ind, get_o_ind = (
            (lambda inst: inst.alt_inputs[get_multi(inst.input)]),
            (lambda inst: inst.alt_outputs[get_multi(inst.output)]))
        get_color_indexed = get_i_ind if inverted else get_o_ind

        if hasattr(self.options, 'listener_tokenizer'):
            tokenize = TOKENIZERS[self.options.listener_tokenizer]
        else:
            tokenize = TOKENIZERS['whitespace']

        if init_vectorizer:
            tokenized = [['<s>'] + tokenize(get_desc(inst)) + ['</s>']
                         for inst in training_instances]
            self.seq_vec.add_all(tokenized)
            unk_replaced = self.seq_vec.unk_replace_all(tokenized)
            self.word_counts.update(
                [get_desc(inst) for inst in training_instances])
            config.dump(unk_replaced, 'unk_replaced.train.jsons', lines=True)

        sentences = []
        colors = []
        if self.options.verbosity >= 9:
            print('%s _data_to_arrays:' % self.id)
        for i, inst in enumerate(training_instances):
            desc = tokenize(get_desc(inst))
            color = get_color(inst)
            if isinstance(color, numbers.Number):
                color = get_color_indexed(inst)
            if not color:
                assert test
                color = (0.0, 0.0, 0.0)
            s = ['<s>'] * (self.seq_vec.max_len - 1 - len(desc)) + desc
            s.append('</s>')
            if self.options.verbosity >= 9:
                print('%s -> %s' % (repr(s), repr(color)))
            sentences.append(s)
            colors.append(color)

        x = np.zeros((len(sentences), self.seq_vec.max_len), dtype=np.int32)
        y = np.zeros((len(sentences), ), dtype=np.int32)
        for i, sentence in enumerate(sentences):
            if len(sentence) > x.shape[1]:
                sentence = sentence[:x.shape[1]]
            x[i, :] = self.seq_vec.vectorize(sentence)
            y[i] = self.color_vec.vectorize(colors[i], hsv=True)

        return [x], [y]

    def _build_model(self, model_class=SimpleLasagneModel):
        id_tag = (self.id + '/') if self.id else ''

        input_var = T.imatrix(id_tag + 'inputs')
        target_var = T.ivector(id_tag + 'targets')

        self.l_out, self.input_layers = self._get_l_out([input_var])
        self.loss = categorical_crossentropy

        self.model = model_class(
            [input_var], [target_var],
            self.l_out,
            loss=self.loss,
            optimizer=OPTIMIZERS[self.options.listener_optimizer],
            learning_rate=self.options.listener_learning_rate,
            id=self.id)

    def train_priors(self, training_instances, listener_data=False):
        prior_class = PRIORS[self.options.listener_prior]
        self.prior_emp = prior_class(
        )  # TODO: accurate values for empirical prior
        self.prior_smooth = prior_class()

        self.prior_emp.train(training_instances, listener_data=listener_data)
        self.prior_smooth.train(training_instances,
                                listener_data=listener_data)

    def _get_l_out(self, input_vars):
        check_options(self.options)
        id_tag = (self.id + '/') if self.id else ''

        input_var = input_vars[0]

        l_in = InputLayer(shape=(None, self.seq_vec.max_len),
                          input_var=input_var,
                          name=id_tag + 'desc_input')
        l_in_embed = EmbeddingLayer(
            l_in,
            input_size=len(self.seq_vec.tokens),
            output_size=self.options.listener_cell_size,
            name=id_tag + 'desc_embed')

        cell = CELLS[self.options.listener_cell]
        cell_kwargs = {
            'grad_clipping': self.options.listener_grad_clipping,
            'num_units': self.options.listener_cell_size,
        }
        if self.options.listener_cell == 'LSTM':
            cell_kwargs['forgetgate'] = Gate(
                b=Constant(self.options.listener_forget_bias))
        if self.options.listener_cell != 'GRU':
            cell_kwargs['nonlinearity'] = NONLINEARITIES[
                self.options.listener_nonlinearity]

        l_rec1 = cell(l_in_embed, name=id_tag + 'rec1', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec1_drop = DropoutLayer(l_rec1,
                                       p=self.options.listener_dropout,
                                       name=id_tag + 'rec1_drop')
        else:
            l_rec1_drop = l_rec1
        l_rec2 = cell(l_rec1_drop, name=id_tag + 'rec2', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec2_drop = DropoutLayer(l_rec2,
                                       p=self.options.listener_dropout,
                                       name=id_tag + 'rec2_drop')
        else:
            l_rec2_drop = l_rec2

        l_hidden = DenseLayer(
            l_rec2_drop,
            num_units=self.options.listener_cell_size,
            nonlinearity=NONLINEARITIES[self.options.listener_nonlinearity],
            name=id_tag + 'hidden')
        if self.options.listener_dropout > 0.0:
            l_hidden_drop = DropoutLayer(l_hidden,
                                         p=self.options.listener_dropout,
                                         name=id_tag + 'hidden_drop')
        else:
            l_hidden_drop = l_hidden
        l_scores = DenseLayer(l_hidden_drop,
                              num_units=self.color_vec.num_types,
                              nonlinearity=None,
                              name=id_tag + 'scores')
        l_out = NonlinearityLayer(l_scores,
                                  nonlinearity=softmax,
                                  name=id_tag + 'out')

        return l_out, [l_in]

    def sample_prior_smooth(self, num_samples):
        return self.prior_smooth.sample(num_samples)
 def __init__(self, id=None):
     super(ListenerLearner, self).__init__(id=id)
     self.word_counts = Counter()
     self.seq_vec = SequenceVectorizer(unk_threshold=self.options.listener_unk_threshold)
     self.color_vec = BucketsVectorizer(self.options.listener_color_resolution,
                                        hsv=self.options.listener_hsv)
Beispiel #10
0
class ListenerLearner(NeuralLearner):
    '''
    An LSTM-based listener (guesses colors from descriptions).
    '''
    def __init__(self, id=None):
        super(ListenerLearner, self).__init__(id=id)
        self.word_counts = Counter()
        self.seq_vec = SequenceVectorizer(unk_threshold=self.options.listener_unk_threshold)
        self.color_vec = BucketsVectorizer(self.options.listener_color_resolution,
                                           hsv=self.options.listener_hsv)

    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        predictions = []
        scores = []
        batches = iterators.iter_batches(eval_instances, self.options.listener_eval_batch_size)
        num_batches = (len(eval_instances) - 1) // self.options.listener_eval_batch_size + 1

        if self.options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)

            xs, (y,) = self._data_to_arrays(batch, test=True)

            probs = self.model.predict(xs)
            self.on_predict(xs)
            if random:
                indices = sample(probs)
                predictions.extend(self.unvectorize(indices, random=True))
            else:
                predictions.extend(self.unvectorize(probs.argmax(axis=1)))
            scores_arr = np.log(probs[np.arange(len(batch)), y]) + self.bucket_adjustment()
            scores.extend(scores_arr.tolist())
        progress.end_task()
        if self.options.verbosity >= 9:
            print('%s %ss:') % (self.id, 'sample' if random else 'prediction')
            for inst, prediction in zip(eval_instances, predictions):
                print('%s -> %s' % (repr(inst.input), repr(prediction)))

        return predictions, scores

    def unvectorize(self, indices, random=False):
        return self.color_vec.unvectorize_all(indices, random=random, hsv=True)

    def bucket_adjustment(self):
        bucket_volume = (256.0 ** 3) / self.color_vec.num_types
        return -np.log(bucket_volume)

    def on_predict(self, xs):
        pass

    def on_iter_end(self, step, writer):
        most_common = [desc for desc, count in self.word_counts.most_common(10)]
        insts = [instance.Instance(input=desc) for desc in most_common]
        xs, (y,) = self._data_to_arrays(insts, test=True)
        probs = self.model.predict(xs)
        for i, desc in enumerate(most_common):
            dist = probs[i, :]
            for image, channel in zip(self.color_vec.visualize_distribution(dist), '012'):
                writer.log_image(step, '%s/%s/%s' % (self.id, desc, channel), image)
        super(ListenerLearner, self).on_iter_end(step, writer)

    def _data_to_arrays(self, training_instances,
                        init_vectorizer=False, test=False, inverted=False):
        def get_multi(val):
            if isinstance(val, tuple):
                assert len(val) == 1
                return val[0]
            else:
                return val

        get_i, get_o = (lambda inst: inst.input), (lambda inst: inst.output)
        get_desc, get_color = (get_o, get_i) if inverted else (get_i, get_o)

        get_i_ind, get_o_ind = ((lambda inst: inst.alt_inputs[get_multi(inst.input)]),
                                (lambda inst: inst.alt_outputs[get_multi(inst.output)]))
        get_color_indexed = get_i_ind if inverted else get_o_ind

        if hasattr(self.options, 'listener_tokenizer'):
            tokenize = TOKENIZERS[self.options.listener_tokenizer]
        else:
            tokenize = TOKENIZERS['whitespace']

        if init_vectorizer:
            tokenized = [['<s>'] + tokenize(get_desc(inst)) + ['</s>']
                         for inst in training_instances]
            self.seq_vec.add_all(tokenized)
            unk_replaced = self.seq_vec.unk_replace_all(tokenized)
            self.word_counts.update([get_desc(inst) for inst in training_instances])
            config.dump(unk_replaced, 'unk_replaced.train.jsons', lines=True)

        sentences = []
        colors = []
        if self.options.verbosity >= 9:
            print('%s _data_to_arrays:' % self.id)
        for i, inst in enumerate(training_instances):
            desc = tokenize(get_desc(inst))
            color = get_color(inst)
            if isinstance(color, numbers.Number):
                color = get_color_indexed(inst)
            if not color:
                assert test
                color = (0.0, 0.0, 0.0)
            s = ['<s>'] * (self.seq_vec.max_len - 1 - len(desc)) + desc
            s.append('</s>')
            if self.options.verbosity >= 9:
                print('%s -> %s' % (repr(s), repr(color)))
            sentences.append(s)
            colors.append(color)

        x = np.zeros((len(sentences), self.seq_vec.max_len), dtype=np.int32)
        y = np.zeros((len(sentences),), dtype=np.int32)
        for i, sentence in enumerate(sentences):
            if len(sentence) > x.shape[1]:
                sentence = sentence[:x.shape[1]]
            x[i, :] = self.seq_vec.vectorize(sentence)
            y[i] = self.color_vec.vectorize(colors[i], hsv=True)

        return [x], [y]

    def _build_model(self, model_class=SimpleLasagneModel):
        id_tag = (self.id + '/') if self.id else ''

        input_var = T.imatrix(id_tag + 'inputs')
        target_var = T.ivector(id_tag + 'targets')

        self.l_out, self.input_layers = self._get_l_out([input_var])
        self.loss = categorical_crossentropy

        self.model = model_class(
            [input_var], [target_var], self.l_out,
            loss=self.loss, optimizer=OPTIMIZERS[self.options.listener_optimizer],
            learning_rate=self.options.listener_learning_rate,
            id=self.id)

    def train_priors(self, training_instances, listener_data=False):
        prior_class = PRIORS[self.options.listener_prior]
        self.prior_emp = prior_class()  # TODO: accurate values for empirical prior
        self.prior_smooth = prior_class()

        self.prior_emp.train(training_instances, listener_data=listener_data)
        self.prior_smooth.train(training_instances, listener_data=listener_data)

    def _get_l_out(self, input_vars):
        check_options(self.options)
        id_tag = (self.id + '/') if self.id else ''

        input_var = input_vars[0]

        l_in = InputLayer(shape=(None, self.seq_vec.max_len), input_var=input_var,
                          name=id_tag + 'desc_input')
        l_in_embed = EmbeddingLayer(l_in, input_size=len(self.seq_vec.tokens),
                                    output_size=self.options.listener_cell_size,
                                    name=id_tag + 'desc_embed')

        cell = CELLS[self.options.listener_cell]
        cell_kwargs = {
            'grad_clipping': self.options.listener_grad_clipping,
            'num_units': self.options.listener_cell_size,
        }
        if self.options.listener_cell == 'LSTM':
            cell_kwargs['forgetgate'] = Gate(b=Constant(self.options.listener_forget_bias))
        if self.options.listener_cell != 'GRU':
            cell_kwargs['nonlinearity'] = NONLINEARITIES[self.options.listener_nonlinearity]

        l_rec1 = cell(l_in_embed, name=id_tag + 'rec1', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec1_drop = DropoutLayer(l_rec1, p=self.options.listener_dropout,
                                       name=id_tag + 'rec1_drop')
        else:
            l_rec1_drop = l_rec1
        l_rec2 = cell(l_rec1_drop, name=id_tag + 'rec2', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec2_drop = DropoutLayer(l_rec2, p=self.options.listener_dropout,
                                       name=id_tag + 'rec2_drop')
        else:
            l_rec2_drop = l_rec2

        l_hidden = DenseLayer(l_rec2_drop, num_units=self.options.listener_cell_size,
                              nonlinearity=NONLINEARITIES[self.options.listener_nonlinearity],
                              name=id_tag + 'hidden')
        if self.options.listener_dropout > 0.0:
            l_hidden_drop = DropoutLayer(l_hidden, p=self.options.listener_dropout,
                                         name=id_tag + 'hidden_drop')
        else:
            l_hidden_drop = l_hidden
        l_scores = DenseLayer(l_hidden_drop, num_units=self.color_vec.num_types, nonlinearity=None,
                              name=id_tag + 'scores')
        l_out = NonlinearityLayer(l_scores, nonlinearity=softmax, name=id_tag + 'out')

        return l_out, [l_in]

    def sample_prior_smooth(self, num_samples):
        return self.prior_smooth.sample(num_samples)
Beispiel #11
0
class LookupLearner(Learner):
    def __init__(self):
        options = config.options()
        self.counters = defaultdict(Counter)
        if options.listener:
            res = options.listener_color_resolution
            hsv = options.listener_hsv
        else:
            res = options.speaker_color_resolution
            hsv = options.speaker_hsv
        self.res = res
        self.hsv = hsv
        self.init_vectorizer()

    def init_vectorizer(self):
        if self.res and self.res[0]:
            if len(self.res) == 1:
                self.res = self.res * 3
            self.color_vec = BucketsVectorizer(self.res, hsv=self.hsv)
            self.vectorize = lambda c: self.color_vec.vectorize(c, hsv=True)
            self.unvectorize = lambda c: self.color_vec.unvectorize(c, hsv=True)
            self.score_adjustment = -np.log((256.0 ** 3) / self.color_vec.num_types)
        else:
            self.vectorize = lambda c: c
            self.unvectorize = lambda c: c
            self.score_adjustment = 0.0

    @property
    def num_params(self):
        return sum(len(c) for c in self.counters.values())

    def train(self, training_instances, validation_instances='ignored', metrics='ignored'):
        options = config.options()
        for inst in training_instances:
            inp, out = inst.input, inst.output
            if options.listener:
                out = self.vectorize(out)
            else:
                inp = self.vectorize(inp)
            self.counters[inp][out] += 1

    def predict_and_score(self, eval_instances, random='ignored', verbosity=0):
        options = config.options()
        if options.verbosity + verbosity >= 2:
            print('Testing')
        predictions = []
        scores = []
        for inst in eval_instances:
            inp, out = inst.input, inst.output
            if options.listener:
                out = self.vectorize(out)
            else:
                inp = self.vectorize(inp)

            counter = self.counters[inp]
            highest = counter.most_common(1)
            if highest:
                if options.listener:
                    prediction = self.unvectorize(highest[0][0])
                else:
                    prediction = highest[0][0]
            elif options.listener:
                prediction = (0, 0, 0)
            else:
                prediction = '<unk>'

            total = sum(counter.values())
            if total:
                if options.verbosity + verbosity >= 9:
                    print('%s -> %s: %s of %s [%s]' % (repr(inp), repr(out), counter[out],
                                                       total, inst.input))
                prob = counter[out] * 1.0 / total
            else:
                if options.verbosity + verbosity >= 9:
                    print('%s -> %s: no data [%s]' % (repr(inp), repr(out), inst.input))
                prob = 1.0 * (inst.output == prediction)
            score = np.log(prob)
            if options.listener:
                score += self.score_adjustment

            predictions.append(prediction)
            scores.append(score)

        return predictions, scores

    def __getstate__(self):
        return {
            'counters': {k: dict(v) for k, v in self.counters.iteritems()},
            'res': self.res,
            'hsv': self.hsv,
        }

    def __setstate__(self, state):
        self.res = state['res']
        self.hsv = state['hsv']
        self.init_vectorizer()
        self.counters = defaultdict(Counter, {k: Counter(v) for k, v in state['counters']})