Esempio n. 1
0
class UniformPrior(object):
    '''A uniform color prior in RGB space.'''
    def __init__(self, recurrent=False):
        self.sampler = BucketsVectorizer([1], hsv=False)
        self.recurrent = recurrent

    def train(self, training_instances, listener_data='ignored'):
        pass

    def apply(self, input_vars):
        c = input_vars[0]
        if self.recurrent:
            if c.ndim == 2:
                ones = T.ones_like(c[:, 0])
            elif c.ndim == 3:
                ones = T.ones_like(c[:, 0, 0])
            else:
                assert False, 'need handling for higher rank color vectors (recurrent): %d' % c.ndim
        else:
            if c.ndim == 1:
                ones = T.ones_like(c)
            elif c.ndim == 2:
                ones = T.ones_like(c[:, 0])
            else:
                assert False, 'need handling for higher rank color vectors (atomic): %d' % c.ndim
        return -3.0 * np.log(256.0) * ones

    def sample(self, num_samples):
        '''
        :return: a list of `num_samples` colors sampled uniformly in RGB space,
                 but expressed as HSV triples.
        '''
        colors = self.sampler.unvectorize_all(np.zeros(num_samples, dtype=np.int32),
                                              random=True, hsv=True)
        return [instance.Instance(c) for c in colors]
Esempio n. 2
0
class ListenerLearner(NeuralLearner):
    '''
    An LSTM-based listener (guesses colors from descriptions).
    '''
    def __init__(self, id=None):
        super(ListenerLearner, self).__init__(id=id)
        self.word_counts = Counter()
        self.seq_vec = SequenceVectorizer(
            unk_threshold=self.options.listener_unk_threshold)
        self.color_vec = BucketsVectorizer(
            self.options.listener_color_resolution,
            hsv=self.options.listener_hsv)

    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        predictions = []
        scores = []
        batches = iterators.iter_batches(eval_instances,
                                         self.options.listener_eval_batch_size)
        num_batches = (len(eval_instances) -
                       1) // self.options.listener_eval_batch_size + 1

        if self.options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)

            xs, (y, ) = self._data_to_arrays(batch, test=True)

            probs = self.model.predict(xs)
            if random:
                indices = sample(probs)
                predictions.extend(self.unvectorize(indices, random=True))
            else:
                predictions.extend(self.unvectorize(probs.argmax(axis=1)))
            scores_arr = np.log(probs[np.arange(len(batch)),
                                      y]) + self.bucket_adjustment()
            scores.extend(scores_arr.tolist())
        progress.end_task()
        if self.options.verbosity >= 9:
            print('%s %ss:') % (self.id, 'sample' if random else 'prediction')
            for inst, prediction in zip(eval_instances, predictions):
                print('%s -> %s' % (repr(inst.input), repr(prediction)))

        return predictions, scores

    def unvectorize(self, indices, random=False):
        return self.color_vec.unvectorize_all(indices, random=random, hsv=True)

    def bucket_adjustment(self):
        bucket_volume = (256.0**3) / self.color_vec.num_types
        return -np.log(bucket_volume)

    def on_iter_end(self, step, writer):
        most_common = [
            desc for desc, count in self.word_counts.most_common(10)
        ]
        insts = [instance.Instance(input=desc) for desc in most_common]
        xs, (y, ) = self._data_to_arrays(insts, test=True)
        probs = self.model.predict(xs)
        for i, desc in enumerate(most_common):
            dist = probs[i, :]
            for image, channel in zip(
                    self.color_vec.visualize_distribution(dist), '012'):
                writer.log_image(step, '%s/%s/%s' % (self.id, desc, channel),
                                 image)
        super(ListenerLearner, self).on_iter_end(step, writer)

    def _data_to_arrays(self,
                        training_instances,
                        init_vectorizer=False,
                        test=False,
                        inverted=False):
        def get_multi(val):
            if isinstance(val, tuple):
                assert len(val) == 1
                return val[0]
            else:
                return val

        get_i, get_o = (lambda inst: inst.input), (lambda inst: inst.output)
        get_desc, get_color = (get_o, get_i) if inverted else (get_i, get_o)

        get_i_ind, get_o_ind = (
            (lambda inst: inst.alt_inputs[get_multi(inst.input)]),
            (lambda inst: inst.alt_outputs[get_multi(inst.output)]))
        get_color_indexed = get_i_ind if inverted else get_o_ind

        if hasattr(self.options, 'listener_tokenizer'):
            tokenize = TOKENIZERS[self.options.listener_tokenizer]
        else:
            tokenize = TOKENIZERS['whitespace']

        if init_vectorizer:
            tokenized = [['<s>'] + tokenize(get_desc(inst)) + ['</s>']
                         for inst in training_instances]
            self.seq_vec.add_all(tokenized)
            unk_replaced = self.seq_vec.unk_replace_all(tokenized)
            self.word_counts.update(
                [get_desc(inst) for inst in training_instances])
            config.dump(unk_replaced, 'unk_replaced.train.jsons', lines=True)

        sentences = []
        colors = []
        if self.options.verbosity >= 9:
            print('%s _data_to_arrays:' % self.id)
        for i, inst in enumerate(training_instances):
            desc = tokenize(get_desc(inst))
            color = get_color(inst)
            if isinstance(color, numbers.Number):
                color = get_color_indexed(inst)
            if not color:
                assert test
                color = (0.0, 0.0, 0.0)
            s = ['<s>'] * (self.seq_vec.max_len - 1 - len(desc)) + desc
            s.append('</s>')
            if self.options.verbosity >= 9:
                print('%s -> %s' % (repr(s), repr(color)))
            sentences.append(s)
            colors.append(color)

        x = np.zeros((len(sentences), self.seq_vec.max_len), dtype=np.int32)
        y = np.zeros((len(sentences), ), dtype=np.int32)
        for i, sentence in enumerate(sentences):
            if len(sentence) > x.shape[1]:
                sentence = sentence[:x.shape[1]]
            x[i, :] = self.seq_vec.vectorize(sentence)
            y[i] = self.color_vec.vectorize(colors[i], hsv=True)

        return [x], [y]

    def _build_model(self, model_class=SimpleLasagneModel):
        id_tag = (self.id + '/') if self.id else ''

        input_var = T.imatrix(id_tag + 'inputs')
        target_var = T.ivector(id_tag + 'targets')

        self.l_out, self.input_layers = self._get_l_out([input_var])
        self.loss = categorical_crossentropy

        self.model = model_class(
            [input_var], [target_var],
            self.l_out,
            loss=self.loss,
            optimizer=OPTIMIZERS[self.options.listener_optimizer],
            learning_rate=self.options.listener_learning_rate,
            id=self.id)

    def train_priors(self, training_instances, listener_data=False):
        prior_class = PRIORS[self.options.listener_prior]
        self.prior_emp = prior_class(
        )  # TODO: accurate values for empirical prior
        self.prior_smooth = prior_class()

        self.prior_emp.train(training_instances, listener_data=listener_data)
        self.prior_smooth.train(training_instances,
                                listener_data=listener_data)

    def _get_l_out(self, input_vars):
        check_options(self.options)
        id_tag = (self.id + '/') if self.id else ''

        input_var = input_vars[0]

        l_in = InputLayer(shape=(None, self.seq_vec.max_len),
                          input_var=input_var,
                          name=id_tag + 'desc_input')
        l_in_embed = EmbeddingLayer(
            l_in,
            input_size=len(self.seq_vec.tokens),
            output_size=self.options.listener_cell_size,
            name=id_tag + 'desc_embed')

        cell = CELLS[self.options.listener_cell]
        cell_kwargs = {
            'grad_clipping': self.options.listener_grad_clipping,
            'num_units': self.options.listener_cell_size,
        }
        if self.options.listener_cell == 'LSTM':
            cell_kwargs['forgetgate'] = Gate(
                b=Constant(self.options.listener_forget_bias))
        if self.options.listener_cell != 'GRU':
            cell_kwargs['nonlinearity'] = NONLINEARITIES[
                self.options.listener_nonlinearity]

        l_rec1 = cell(l_in_embed, name=id_tag + 'rec1', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec1_drop = DropoutLayer(l_rec1,
                                       p=self.options.listener_dropout,
                                       name=id_tag + 'rec1_drop')
        else:
            l_rec1_drop = l_rec1
        l_rec2 = cell(l_rec1_drop, name=id_tag + 'rec2', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec2_drop = DropoutLayer(l_rec2,
                                       p=self.options.listener_dropout,
                                       name=id_tag + 'rec2_drop')
        else:
            l_rec2_drop = l_rec2

        l_hidden = DenseLayer(
            l_rec2_drop,
            num_units=self.options.listener_cell_size,
            nonlinearity=NONLINEARITIES[self.options.listener_nonlinearity],
            name=id_tag + 'hidden')
        if self.options.listener_dropout > 0.0:
            l_hidden_drop = DropoutLayer(l_hidden,
                                         p=self.options.listener_dropout,
                                         name=id_tag + 'hidden_drop')
        else:
            l_hidden_drop = l_hidden
        l_scores = DenseLayer(l_hidden_drop,
                              num_units=self.color_vec.num_types,
                              nonlinearity=None,
                              name=id_tag + 'scores')
        l_out = NonlinearityLayer(l_scores,
                                  nonlinearity=softmax,
                                  name=id_tag + 'out')

        return l_out, [l_in]

    def sample_prior_smooth(self, num_samples):
        return self.prior_smooth.sample(num_samples)
Esempio n. 3
0
class ListenerLearner(NeuralLearner):
    '''
    An LSTM-based listener (guesses colors from descriptions).
    '''
    def __init__(self, id=None):
        super(ListenerLearner, self).__init__(id=id)
        self.word_counts = Counter()
        self.seq_vec = SequenceVectorizer(unk_threshold=self.options.listener_unk_threshold)
        self.color_vec = BucketsVectorizer(self.options.listener_color_resolution,
                                           hsv=self.options.listener_hsv)

    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        predictions = []
        scores = []
        batches = iterators.iter_batches(eval_instances, self.options.listener_eval_batch_size)
        num_batches = (len(eval_instances) - 1) // self.options.listener_eval_batch_size + 1

        if self.options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)

            xs, (y,) = self._data_to_arrays(batch, test=True)

            probs = self.model.predict(xs)
            self.on_predict(xs)
            if random:
                indices = sample(probs)
                predictions.extend(self.unvectorize(indices, random=True))
            else:
                predictions.extend(self.unvectorize(probs.argmax(axis=1)))
            scores_arr = np.log(probs[np.arange(len(batch)), y]) + self.bucket_adjustment()
            scores.extend(scores_arr.tolist())
        progress.end_task()
        if self.options.verbosity >= 9:
            print('%s %ss:') % (self.id, 'sample' if random else 'prediction')
            for inst, prediction in zip(eval_instances, predictions):
                print('%s -> %s' % (repr(inst.input), repr(prediction)))

        return predictions, scores

    def unvectorize(self, indices, random=False):
        return self.color_vec.unvectorize_all(indices, random=random, hsv=True)

    def bucket_adjustment(self):
        bucket_volume = (256.0 ** 3) / self.color_vec.num_types
        return -np.log(bucket_volume)

    def on_predict(self, xs):
        pass

    def on_iter_end(self, step, writer):
        most_common = [desc for desc, count in self.word_counts.most_common(10)]
        insts = [instance.Instance(input=desc) for desc in most_common]
        xs, (y,) = self._data_to_arrays(insts, test=True)
        probs = self.model.predict(xs)
        for i, desc in enumerate(most_common):
            dist = probs[i, :]
            for image, channel in zip(self.color_vec.visualize_distribution(dist), '012'):
                writer.log_image(step, '%s/%s/%s' % (self.id, desc, channel), image)
        super(ListenerLearner, self).on_iter_end(step, writer)

    def _data_to_arrays(self, training_instances,
                        init_vectorizer=False, test=False, inverted=False):
        def get_multi(val):
            if isinstance(val, tuple):
                assert len(val) == 1
                return val[0]
            else:
                return val

        get_i, get_o = (lambda inst: inst.input), (lambda inst: inst.output)
        get_desc, get_color = (get_o, get_i) if inverted else (get_i, get_o)

        get_i_ind, get_o_ind = ((lambda inst: inst.alt_inputs[get_multi(inst.input)]),
                                (lambda inst: inst.alt_outputs[get_multi(inst.output)]))
        get_color_indexed = get_i_ind if inverted else get_o_ind

        if hasattr(self.options, 'listener_tokenizer'):
            tokenize = TOKENIZERS[self.options.listener_tokenizer]
        else:
            tokenize = TOKENIZERS['whitespace']

        if init_vectorizer:
            tokenized = [['<s>'] + tokenize(get_desc(inst)) + ['</s>']
                         for inst in training_instances]
            self.seq_vec.add_all(tokenized)
            unk_replaced = self.seq_vec.unk_replace_all(tokenized)
            self.word_counts.update([get_desc(inst) for inst in training_instances])
            config.dump(unk_replaced, 'unk_replaced.train.jsons', lines=True)

        sentences = []
        colors = []
        if self.options.verbosity >= 9:
            print('%s _data_to_arrays:' % self.id)
        for i, inst in enumerate(training_instances):
            desc = tokenize(get_desc(inst))
            color = get_color(inst)
            if isinstance(color, numbers.Number):
                color = get_color_indexed(inst)
            if not color:
                assert test
                color = (0.0, 0.0, 0.0)
            s = ['<s>'] * (self.seq_vec.max_len - 1 - len(desc)) + desc
            s.append('</s>')
            if self.options.verbosity >= 9:
                print('%s -> %s' % (repr(s), repr(color)))
            sentences.append(s)
            colors.append(color)

        x = np.zeros((len(sentences), self.seq_vec.max_len), dtype=np.int32)
        y = np.zeros((len(sentences),), dtype=np.int32)
        for i, sentence in enumerate(sentences):
            if len(sentence) > x.shape[1]:
                sentence = sentence[:x.shape[1]]
            x[i, :] = self.seq_vec.vectorize(sentence)
            y[i] = self.color_vec.vectorize(colors[i], hsv=True)

        return [x], [y]

    def _build_model(self, model_class=SimpleLasagneModel):
        id_tag = (self.id + '/') if self.id else ''

        input_var = T.imatrix(id_tag + 'inputs')
        target_var = T.ivector(id_tag + 'targets')

        self.l_out, self.input_layers = self._get_l_out([input_var])
        self.loss = categorical_crossentropy

        self.model = model_class(
            [input_var], [target_var], self.l_out,
            loss=self.loss, optimizer=OPTIMIZERS[self.options.listener_optimizer],
            learning_rate=self.options.listener_learning_rate,
            id=self.id)

    def train_priors(self, training_instances, listener_data=False):
        prior_class = PRIORS[self.options.listener_prior]
        self.prior_emp = prior_class()  # TODO: accurate values for empirical prior
        self.prior_smooth = prior_class()

        self.prior_emp.train(training_instances, listener_data=listener_data)
        self.prior_smooth.train(training_instances, listener_data=listener_data)

    def _get_l_out(self, input_vars):
        check_options(self.options)
        id_tag = (self.id + '/') if self.id else ''

        input_var = input_vars[0]

        l_in = InputLayer(shape=(None, self.seq_vec.max_len), input_var=input_var,
                          name=id_tag + 'desc_input')
        l_in_embed = EmbeddingLayer(l_in, input_size=len(self.seq_vec.tokens),
                                    output_size=self.options.listener_cell_size,
                                    name=id_tag + 'desc_embed')

        cell = CELLS[self.options.listener_cell]
        cell_kwargs = {
            'grad_clipping': self.options.listener_grad_clipping,
            'num_units': self.options.listener_cell_size,
        }
        if self.options.listener_cell == 'LSTM':
            cell_kwargs['forgetgate'] = Gate(b=Constant(self.options.listener_forget_bias))
        if self.options.listener_cell != 'GRU':
            cell_kwargs['nonlinearity'] = NONLINEARITIES[self.options.listener_nonlinearity]

        l_rec1 = cell(l_in_embed, name=id_tag + 'rec1', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec1_drop = DropoutLayer(l_rec1, p=self.options.listener_dropout,
                                       name=id_tag + 'rec1_drop')
        else:
            l_rec1_drop = l_rec1
        l_rec2 = cell(l_rec1_drop, name=id_tag + 'rec2', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec2_drop = DropoutLayer(l_rec2, p=self.options.listener_dropout,
                                       name=id_tag + 'rec2_drop')
        else:
            l_rec2_drop = l_rec2

        l_hidden = DenseLayer(l_rec2_drop, num_units=self.options.listener_cell_size,
                              nonlinearity=NONLINEARITIES[self.options.listener_nonlinearity],
                              name=id_tag + 'hidden')
        if self.options.listener_dropout > 0.0:
            l_hidden_drop = DropoutLayer(l_hidden, p=self.options.listener_dropout,
                                         name=id_tag + 'hidden_drop')
        else:
            l_hidden_drop = l_hidden
        l_scores = DenseLayer(l_hidden_drop, num_units=self.color_vec.num_types, nonlinearity=None,
                              name=id_tag + 'scores')
        l_out = NonlinearityLayer(l_scores, nonlinearity=softmax, name=id_tag + 'out')

        return l_out, [l_in]

    def sample_prior_smooth(self, num_samples):
        return self.prior_smooth.sample(num_samples)