def __init__(self, dim_proj, rng, prefix='lstm'): """ Initialize the LSTM params """ self.param_names = [] params = OrderedDict() W = numpy.concatenate([ortho_weight(dim_proj), ortho_weight(dim_proj), ortho_weight(dim_proj), ortho_weight(dim_proj)], axis=1) params[_p(prefix, 'W')] = W self.param_names.append(_p(prefix, 'W')) # TODO: why is the axis 1, figure out U = numpy.concatenate([ortho_weight(dim_proj), ortho_weight(dim_proj), ortho_weight(dim_proj), ortho_weight(dim_proj)], axis=1) params[_p(prefix, 'U')] = U self.param_names.append(_p(prefix, 'U')) b = numpy.zeros((4 * dim_proj)) params[_p(prefix, 'b')] = b.astype(theano.config.floatX) self.param_names.append(_p(prefix, 'b')) self.prefix = prefix self.params = params self.tparams = init_tparams(params)
def __init__(self, dim_proj, ydim, word_dict, random_seed): """ Embedding and classifier params """ self.layers = {} self.random_seed = random_seed self.dim_proj = dim_proj self.ydim = ydim self.rng = numpy.random.RandomState(self.random_seed) self.params = OrderedDict() self.tparams = OrderedDict() self.f_pred_prob = None self.f_pred = None def unpack(source, target): for kk, vv in source.items(): target[kk] = vv # Add parameters from dictionary unpack(word_dict.params, self.params) unpack(word_dict.tparams, self.tparams) # Initialize LSTM and add its params self.layers['lstm'] = LSTM(dim_proj, self.rng) unpack(self.layers['lstm'].params, self.params) unpack(self.layers['lstm'].tparams, self.tparams) # Initialize other params other_params = OrderedDict() other_params['U'] = 0.01 * numpy.random.randn(dim_proj, ydim) \ .astype(theano.config.floatX) other_params['b'] = numpy.zeros((ydim,)).astype(theano.config.floatX) other_tparams = init_tparams(other_params) unpack(other_params, self.params) unpack(other_tparams, self.tparams)
def __init__(self, dim_proj, ydim, word_dict, random_seed, use_dropout=True): """ Embedding and classifier params """ self.layers = {} self.random_seed = random_seed self.dim_proj = dim_proj self.ydim = ydim self.rng = numpy.random.RandomState(self.random_seed) self.params = OrderedDict() self.tparams = OrderedDict() self.f_cost = None self.f_decode = None self.use_dropout = use_dropout def unpack(source, target): for kk, vv in source.items(): target[kk] = vv # Add parameters from dictionary unpack(word_dict.params, self.params) unpack(word_dict.tparams, self.tparams) # Initialize LSTM and add its params # Encoder - Layer 1 self.layers['enc_lstm_1'] = LSTM(dim_proj, self.rng, prefix='enc_lstm_1') unpack(self.layers['enc_lstm_1'].params, self.params) unpack(self.layers['enc_lstm_1'].tparams, self.tparams) # Encoder - Layer 2 self.layers['enc_lstm_2'] = LSTM(dim_proj, self.rng, prefix='enc_lstm_2') unpack(self.layers['enc_lstm_2'].params, self.params) unpack(self.layers['enc_lstm_2'].tparams, self.tparams) # Decoder - Layer 1 self.layers['dec_lstm_1'] = LSTM(dim_proj, self.rng, prefix='dec_lstm_1') unpack(self.layers['dec_lstm_1'].params, self.params) unpack(self.layers['dec_lstm_1'].tparams, self.tparams) # Decoder - Layer2 self.layers['dec_lstm_2'] = LSTM(dim_proj, self.rng, prefix='dec_lstm_2') unpack(self.layers['dec_lstm_2'].params, self.params) unpack(self.layers['dec_lstm_2'].tparams, self.tparams) # Initialize other params other_params = OrderedDict() other_params['U'] = 0.01 * numpy.random.randn(dim_proj, ydim) \ .astype(theano.config.floatX) other_params['b'] = numpy.zeros((ydim, )).astype(theano.config.floatX) other_tparams = init_tparams(other_params) unpack(other_params, self.params) unpack(other_tparams, self.tparams)
def __init__(self, dim_proj, dim_input=None, prefix='lstm'): """ Initialize the GRU params dim_proj : The embedding dimension of the hidden layer dim_input : The embedding dimension of the input """ self.param_names = [] params = OrderedDict() if dim_input is None: dim_input = dim_proj # Initialize weights using a scaled standard normal distribution # which will fall back to orthogonal weights if dim_proj = dim_input # These weights transform the input to the dimensionality of the hidden states W = numpy.concatenate( [norm_init(dim_input, dim_proj), norm_init(dim_input, dim_proj)], axis=1) params[_p(prefix, 'W')] = W self.param_names.append(_p(prefix, 'W')) # axis=1 will concat horizontally. # ie., the resulting shape is dim_proj x (dim_proj*4) # Recurrence weights U = numpy.concatenate([ortho_weight(dim_proj), ortho_weight(dim_proj)], axis=1) params[_p(prefix, 'U')] = U self.param_names.append(_p(prefix, 'U')) b = numpy.zeros((2 * dim_proj)) params[_p(prefix, 'b')] = b.astype(theano.config.floatX) self.param_names.append(_p(prefix, 'b')) # Parameters for the calculating the candidate hidden state W_h = norm_init(dim_input, dim_proj) params[_p(prefix, 'W_h')] = W_h self.param_names.append(_p(prefix, 'W_h')) U_h = ortho_weight(dim_proj) params[_p(prefix, 'U_h')] = U_h self.param_names.append(_p(prefix, 'U_h')) b_h = numpy.zeros(dim_proj) params[_p(prefix, 'b_h')] = b_h.astype(theano.config.floatX) self.param_names.append(_p(prefix, 'b_h')) # Memory of the last final hidden states # Not archived self.h_final = None self.prefix = prefix self.params = params self.tparams = init_tparams(params) # TODO: Make this change to the LSTM module. dim_proj can be stored self.dim_proj = dim_proj self.dim_input = dim_input
def __init__(self, sentences, n_words, emb_dim): """ Initializes a dictionary. :type sentences: list(strings) :param sentences: A list of sentences (text) to initialize the vocabulary :type n_words : int :param n_words : The number of words to retain in the vocab. Less frequent words that are below this threshold will be replaced with UNK :type emb_dim: int :param emb_dim: The dimensionality for the word embeddings """ self.locked = False wordcount = dict() for ss_ in sentences: words = ss_.strip().split() for word in words: if word not in wordcount: wordcount[word] = 0 wordcount[word] += 1 counts = wordcount.values() keys = wordcount.keys() self.worddict = dict() self.reverse_worddict = dict() self.worddict['<UNK>'] = 1 self.reverse_worddict[1] = '<UNK>' self.worddict['<PAD>'] = 0 self.reverse_worddict[0] = '<PAD>' # Reverse and truncate at max_words sorted_idx = numpy.argsort(counts)[::-1][:n_words] dict_idx = 2 for ss_ in sorted_idx: if keys[ss_] == '<UNK>': continue self.worddict[keys[ss_]] = dict_idx self.reverse_worddict[dict_idx] = keys[ss_] dict_idx += 1 self.n_words = len(self.worddict) self.noise_distribution = None self.create_unigram_noise_dist(wordcount) self.locked = True print("Total words read by dict = %d" % numpy.sum(counts)) print("Total unique words read by dict = %d" % len(keys)) print("Total words retained = %d" % len(self.worddict)) self.embedding_size = emb_dim w_emb = self.initialize_embedding() params = OrderedDict() params['Wemb'] = w_emb self.params = params self.tparams = init_tparams(params)
def __init__(self, dim_proj, ydim, word_dict, random_seed, use_dropout=True): """ Embedding and classifier params """ self.layers = {} self.random_seed = random_seed self.dim_proj = dim_proj self.ydim = ydim self.rng = numpy.random.RandomState(self.random_seed) self.params = OrderedDict() self.tparams = OrderedDict() self.f_cost = None self.f_decode = None self.use_dropout = use_dropout def unpack(source, target): for kk, vv in source.items(): target[kk] = vv # Add parameters from dictionary unpack(word_dict.params, self.params) unpack(word_dict.tparams, self.tparams) # Initialize LSTM and add its params # Encoder - Layer 1 self.layers['enc_lstm_1'] = LSTM(dim_proj, self.rng, prefix='enc_lstm_1') unpack(self.layers['enc_lstm_1'].params, self.params) unpack(self.layers['enc_lstm_1'].tparams, self.tparams) # Encoder - Layer 2 self.layers['enc_lstm_2'] = LSTM(dim_proj, self.rng, prefix='enc_lstm_2') unpack(self.layers['enc_lstm_2'].params, self.params) unpack(self.layers['enc_lstm_2'].tparams, self.tparams) # Decoder - Layer 1 self.layers['dec_lstm_1'] = LSTM(dim_proj, self.rng, prefix='dec_lstm_1') unpack(self.layers['dec_lstm_1'].params, self.params) unpack(self.layers['dec_lstm_1'].tparams, self.tparams) # Decoder - Layer2 self.layers['dec_lstm_2'] = LSTM(dim_proj, self.rng, prefix='dec_lstm_2') unpack(self.layers['dec_lstm_2'].params, self.params) unpack(self.layers['dec_lstm_2'].tparams, self.tparams) # Initialize other params other_params = OrderedDict() other_params['U'] = 0.01 * numpy.random.randn(dim_proj, ydim) \ .astype(theano.config.floatX) other_params['b'] = numpy.zeros((ydim,)).astype(theano.config.floatX) other_tparams = init_tparams(other_params) unpack(other_params, self.params) unpack(other_tparams, self.tparams)
def create_unigram_noise_dist(self, wordcount): """ Creates a Unigram noise distribution for NCE :type wordcount: dict :param wordcount: A dictionary containing frequency counts for words """ counts = numpy.sort(wordcount.values())[::-1] # Don't count the UNK and PAD symbols in the second count freq = [0, sum(counts[self.n_words:])] \ + list(counts[:(self.n_words-2)]) assert len(freq) == self.n_words sum_freq = sum(freq) noise_distribution = [float(k) / sum_freq for k in freq] self.noise_distribution = init_tparams( OrderedDict([('noise_d', numpy_floatX(noise_distribution) .reshape(self.n_words,))]) )['noise_d']
def __init__(self, dim_proj, dim_input=None, prefix='lstm'): """ Initialize the LSTM params dim_proj : The embedding dimension of the hidden layer dim_input : The emedding dimension of the input """ self.param_names = [] params = OrderedDict() W = numpy.concatenate([ ortho_weight(dim_proj), ortho_weight(dim_proj), ortho_weight(dim_proj), ortho_weight(dim_proj) ], axis=1) params[_p(prefix, 'W')] = W self.param_names.append(_p(prefix, 'W')) # axis=1 will concat horizontally. # ie., the resulting shape is dim_proj x (dim_proj*4) U = numpy.concatenate([ ortho_weight(dim_proj), ortho_weight(dim_proj), ortho_weight(dim_proj), ortho_weight(dim_proj) ], axis=1) params[_p(prefix, 'U')] = U self.param_names.append(_p(prefix, 'U')) b = numpy.zeros((4 * dim_proj)) params[_p(prefix, 'b')] = b.astype(theano.config.floatX) self.param_names.append(_p(prefix, 'b')) # Memory of the last final hidden states # TODO:Not archived self.h_final = None self.dim_proj = dim_proj self.prefix = prefix self.params = params self.tparams = init_tparams(params)
def __init__(self, dim_proj, dim_input, prefix='logit', ortho=True): """ Initializes the parameters of a Logistic regression model :type input: theano.tensor.TensorType :param input: symbolic variable for one input batch, one row per sample :type n_in: int :param n_int: The dimensionality of the input layer :type n_out :param n_out: The dimensionality of the output (label) layer """ # Initialize weight matrix with 0s. Size is n_in X n_out self.param_names = [] params = OrderedDict() W = norm_init(dim_input, dim_proj, ortho) params[_p(prefix, 'W')] = W self.param_names.append(_p(prefix, 'W')) b = numpy_floatX(numpy.zeros(dim_proj,)) params[_p(prefix, 'b')] = b self.param_names.append(_p(prefix, 'b')) # batch normalization params gamma = numpy_floatX(numpy.ones((n_out,))) params[_p(prefix, 'gamma')] = gamma self.param_names.append(_p(prefix, 'gamma')) beta = numpy_floatX(numpy.ones((n_out,))) params[_p(prefix, 'beta')] = beta self.param_names.append(_p(prefix, 'beta')) self.prefix = prefix self.params = params self.tparams = init_tparams(params) # Legacy params # Softmax components computed on demand self.p_y_given_x = None self.y_pred = None self.lin_output = None