Example #1
0
import colorsys
import numpy as np
import operator
import theano.tensor as T
import skimage.color
from collections import Sequence, Counter
from lasagne.layers import InputLayer, EmbeddingLayer, reshape
from matplotlib.colors import hsv_to_rgb

from stanza.research.rng import get_rng

rng = get_rng()


class SymbolVectorizer(object):
    '''
    Maps symbols from an alphabet/vocabulary of indefinite size to and from
    sequential integer ids.

    >>> vec = SymbolVectorizer()
    >>> vec.add_all(['larry', 'moe', 'larry', 'curly', 'moe'])
    >>> vec.vectorize_all(['curly', 'larry', 'moe', 'pikachu'])
    array([3, 1, 2, 0], dtype=int32)
    >>> vec.unvectorize_all([3, 3, 2])
    ['curly', 'curly', 'moe']
    '''
    def __init__(self):
        self.tokens = []
        self.token_indices = {}
        self.indices_token = {}
        self.add('<unk>')
Example #2
0
parser = config.get_options_parser()
parser.add_argument(
    '--split',
    type=str,
    default='dev',
    help='The data split to draw the human evaluation data from.')
parser.add_argument('--test_size',
                    type=int,
                    default=None,
                    help='The number of examples to use in human evaluation. '
                    'If None, use the whole dev/test set.')

BATCH_SIZE = 10

random = rng.get_rng()


class HumanListener(learner.Learner):
    def __init__(self):
        self.num_params = 0
        self.memory = {}
        with open('human_listener.txt', 'r') as infile:
            for line in infile:
                if '\t' in line:
                    desc, color_str = line.strip().split('\t')
                    assert color_str.startswith('(') and color_str.endswith(
                        ')'), color_str
                    color = tuple(
                        float(d) for d in color_str[1:-1].split(', '))
                    self.memory[desc] = color
Example #3
0
OPTIMIZERS = {
    name: func
    for name, func in lasagne.updates.__dict__.iteritems()
    if (name in lasagne.updates.__all__ and
        not name.startswith('apply_') and not name.endswith('_constraint'))
}

CELLS = {
    name[:-len('Layer')]: func
    for name, func in lasagne.layers.recurrent.__dict__.iteritems()
    if (name in lasagne.layers.recurrent.__all__ and name.endswith('Layer') and
        name != 'CustomRecurrentLayer')
}

rng = get_rng()
lasagne.random.set_rng(rng)


def detect_nan(i, node, fn):
    if not isinstance(node.op, (T.AllocEmpty, T.IncSubtensor,
                                G.GpuAllocEmpty, G.GpuIncSubtensor)):
        for output in fn.outputs:
            if (not isinstance(output[0], np.random.RandomState) and
                    not np.isfinite(output[0]).all()):
                print('*** NaN detected ***')
                theano.printing.debugprint(node)
                print('Inputs : %s' % [input[0] for input in fn.inputs])
                print('Outputs: %s' % [output[0] for output in fn.outputs])
                raise AssertionError
Example #4
0
from lasagne.nonlinearities import softmax
from lasagne.updates import rmsprop

from stanza.monitoring import progress
from stanza.research import config, instance, iterators, rng
import color_instances
import speaker
import data_aug
from helpers import ForgetSizeLayer, GaussianScoreLayer, logit_softmax_nd
from neural import NeuralLearner, SimpleLasagneModel
from neural import NONLINEARITIES, OPTIMIZERS, CELLS, sample
from vectorizers import SequenceVectorizer, BucketsVectorizer, SymbolVectorizer
from vectorizers import strip_invalid_tokens, COLOR_REPRS
from tokenizers import TOKENIZERS

random = rng.get_rng()

parser = config.get_options_parser()
parser.add_argument('--listener_cell_size', type=int, default=20,
                    help='The number of dimensions of all hidden layers and cells in '
                         'the listener model. If 0 and using the AtomicListenerLearner, '
                         'remove all hidden layers and only train a linear classifier.')
parser.add_argument('--listener_forget_bias', type=float, default=5.0,
                    help='The initial value of the forget gate bias in LSTM cells in '
                         'the listener model. A positive initial forget gate bias '
                         'encourages the model to remember everything by default.')
parser.add_argument('--listener_nonlinearity', choices=NONLINEARITIES.keys(), default='tanh',
                    help='The nonlinearity/activation function to use for dense and '
                         'LSTM layers in the listener model.')
parser.add_argument('--listener_cell', choices=CELLS.keys(), default='LSTM',
                    help='The recurrent cell to use for the listener model.')