import colorsys import numpy as np import operator import theano.tensor as T import skimage.color from collections import Sequence, Counter from lasagne.layers import InputLayer, EmbeddingLayer, reshape from matplotlib.colors import hsv_to_rgb from stanza.research.rng import get_rng rng = get_rng() class SymbolVectorizer(object): ''' Maps symbols from an alphabet/vocabulary of indefinite size to and from sequential integer ids. >>> vec = SymbolVectorizer() >>> vec.add_all(['larry', 'moe', 'larry', 'curly', 'moe']) >>> vec.vectorize_all(['curly', 'larry', 'moe', 'pikachu']) array([3, 1, 2, 0], dtype=int32) >>> vec.unvectorize_all([3, 3, 2]) ['curly', 'curly', 'moe'] ''' def __init__(self): self.tokens = [] self.token_indices = {} self.indices_token = {} self.add('<unk>')
parser = config.get_options_parser() parser.add_argument( '--split', type=str, default='dev', help='The data split to draw the human evaluation data from.') parser.add_argument('--test_size', type=int, default=None, help='The number of examples to use in human evaluation. ' 'If None, use the whole dev/test set.') BATCH_SIZE = 10 random = rng.get_rng() class HumanListener(learner.Learner): def __init__(self): self.num_params = 0 self.memory = {} with open('human_listener.txt', 'r') as infile: for line in infile: if '\t' in line: desc, color_str = line.strip().split('\t') assert color_str.startswith('(') and color_str.endswith( ')'), color_str color = tuple( float(d) for d in color_str[1:-1].split(', ')) self.memory[desc] = color
OPTIMIZERS = { name: func for name, func in lasagne.updates.__dict__.iteritems() if (name in lasagne.updates.__all__ and not name.startswith('apply_') and not name.endswith('_constraint')) } CELLS = { name[:-len('Layer')]: func for name, func in lasagne.layers.recurrent.__dict__.iteritems() if (name in lasagne.layers.recurrent.__all__ and name.endswith('Layer') and name != 'CustomRecurrentLayer') } rng = get_rng() lasagne.random.set_rng(rng) def detect_nan(i, node, fn): if not isinstance(node.op, (T.AllocEmpty, T.IncSubtensor, G.GpuAllocEmpty, G.GpuIncSubtensor)): for output in fn.outputs: if (not isinstance(output[0], np.random.RandomState) and not np.isfinite(output[0]).all()): print('*** NaN detected ***') theano.printing.debugprint(node) print('Inputs : %s' % [input[0] for input in fn.inputs]) print('Outputs: %s' % [output[0] for output in fn.outputs]) raise AssertionError
from lasagne.nonlinearities import softmax from lasagne.updates import rmsprop from stanza.monitoring import progress from stanza.research import config, instance, iterators, rng import color_instances import speaker import data_aug from helpers import ForgetSizeLayer, GaussianScoreLayer, logit_softmax_nd from neural import NeuralLearner, SimpleLasagneModel from neural import NONLINEARITIES, OPTIMIZERS, CELLS, sample from vectorizers import SequenceVectorizer, BucketsVectorizer, SymbolVectorizer from vectorizers import strip_invalid_tokens, COLOR_REPRS from tokenizers import TOKENIZERS random = rng.get_rng() parser = config.get_options_parser() parser.add_argument('--listener_cell_size', type=int, default=20, help='The number of dimensions of all hidden layers and cells in ' 'the listener model. If 0 and using the AtomicListenerLearner, ' 'remove all hidden layers and only train a linear classifier.') parser.add_argument('--listener_forget_bias', type=float, default=5.0, help='The initial value of the forget gate bias in LSTM cells in ' 'the listener model. A positive initial forget gate bias ' 'encourages the model to remember everything by default.') parser.add_argument('--listener_nonlinearity', choices=NONLINEARITIES.keys(), default='tanh', help='The nonlinearity/activation function to use for dense and ' 'LSTM layers in the listener model.') parser.add_argument('--listener_cell', choices=CELLS.keys(), default='LSTM', help='The recurrent cell to use for the listener model.')