def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence #### YOUR CODE HERE #### ns = maxlen # make matrix here of corresponding h(t) # hs[-1] = initial hidden state (zeros) hs = zeros((ns+1, self.hdim)) # predicted probas ps = zeros((ns, self.vdim)) #### YOUR CODE HERE #### H = self.params.H U = self.params.U L = self.sparams.L bptt = self.bptt ## # Forward propagation for t in xrange(ns): hs[t + 1] = sigmoid(H.dot(hs[t]) + L[ys[t]]) ps[t] = softmax(U.dot(hs[t + 1])) ys = ys + [multinomial_sample(ps[t])] #ys.append(multinomial_sample(ps[t])) J -= log(ps[t][ys[t]]) if ys[t + 1] == end: break if t == ns - 1: ys = ys + [end] #### YOUR CODE HERE #### return ys, J
def fill_unknowns(words, vocab): ret = words p = np.array([freq for word, freq in dict(vocab["freq"]).items()]) #print "summize all probability -> ",np.sum(p) for i in range(len(words)): if words[i] == "UUUNKKK": ret[i] = vocab.index[multinomial_sample(p)] else: ret[i] = words[i] return ret
def fill_unknowns(words): #### YOUR CODE HERE #### ret = words for i in xrange(len(ret)): if ret[i] == 'UUUNKKK': index = multinomial_sample(vocab.freq) ret[i] = list(vocab.index)[index] #### END YOUR CODE #### return ret
def fill_unknowns(words,vocab): ret = words p = np.array([freq for word, freq in dict(vocab["freq"]).items()]) #print "summize all probability -> ",np.sum(p) for i in range(len(words)): if words[i] == "UUUNKKK": ret[i] = vocab.index[multinomial_sample(p)] else: ret[i] = words[i] return ret
def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence #### YOUR CODE HERE #### ns = maxlen # make matrix here of corresponding h(t) # hs[-1] = initial hidden state (zeros) hs = zeros((ns + 1, self.hdim)) # predicted probas ps = zeros((ns, self.vdim)) for t in xrange(ns): hs[t] = sigmoid( self.params.H.dot(hs[t - 1]) + self.sparams.L[ys[t]]) ps[t] = softmax(self.params.U.dot(hs[t])) ys = ys + [multinomial_sample(ps[t])] J -= log(ps[t][ys[t]]) if ys[t + 1] == end: break if t == ns - 1: ys = ys + [end] #### YOUR CODE HERE #### return ys, J
def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence #### YOUR CODE HERE #### h_ant = zeros((1, self.hdim)) for step in xrange(maxlen): a1 = self.params.H.dot(h_ant.T).T + self.sparams.L[ys[step]] h = sigmoid( a1 ) a2 = self.params.U.dot(h.T).T # print "h.shape %s" % (h.shape,) # print "a2.shape %s" % (a2.shape,) # print "self.params.U.shape %s" % (self.params.U.shape,) y_hat = softmax( a2 ) h_ant = h ys.append( multinomial_sample(y_hat) ) J -= log( y_hat[:,ys[step]] ) ys.append(end) #### YOUR CODE HERE #### return ys, J
def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence #### YOUR CODE HERE #### ps = zeros((maxlen, self.vdim)) hs = zeros((maxlen, self.hdim)) H = self.params.H L = self.sparams.L U = self.params.U start = init for i in xrange(maxlen): hs[i+1] = sigmoid(H.dot(hs[i]) + L[start]) ps[i] = softmax(U.dot(hs[i+1])) start = multinomial_sample(ps[i]) J -= log(ps[i][start]) ys.append(start) if start == end: break #### YOUR CODE HERE #### return ys, J
def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence #### YOUR CODE HERE #### ns = len(ys) t = 0 nextIdx = init hs = zeros((maxlen+1, self.hdim)) ps = zeros((maxlen, self.vdim)) while ns <= maxlen and nextIdx != end: hs[t] = sigmoid(self.params.H.dot(hs[t-1]) + self.sparams.L[ys[t]]) ps[t] = softmax(self.params.U.dot(hs[t])) J -= log(ps[t,ys[t]]) nextIdx = multinomial_sample(ps[t]) ys.append(nextIdx) ns = len(ys) t += 1 #### YOUR CODE HERE #### return ys, J
def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence #### YOUR CODE HERE #### h_ant = zeros((1, self.hdim)) for step in xrange(maxlen): a1 = self.params.H.dot(h_ant.T).T + self.sparams.L[ys[step]] h = sigmoid(a1) a2 = self.params.U.dot(h.T).T # print "h.shape %s" % (h.shape,) # print "a2.shape %s" % (a2.shape,) # print "self.params.U.shape %s" % (self.params.U.shape,) y_hat = softmax(a2) h_ant = h ys.append(multinomial_sample(y_hat)) J -= log(y_hat[:, ys[step]]) ys.append(end) #### YOUR CODE HERE #### return ys, J
def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence ns = maxlen hs = np.zeros((ns+1,self.hdim)) #### YOUR CODE HERE #### for i in range(ns): hs[i+1] = sigmoid(self.params.H.dot(hs[i])+self.params.W.dot(self.sparams.L[ys[i]])) p = self.hierarchicalU.getDistribution(hs[i+1]) y = multinomial_sample(p) ys.append(y) if y == end: break p = p*make_onehot(y,self.vdim) J += -np.log(np.sum(p)) ## #x only compute the node which gradient is updated x = self.hierarchicalU.getSumSquareU(self.hierarchicalU.root) Jreg = 0.5*self.lreg*(np.sum(self.params.H**2)+np.sum(self.params.W**2)+ x) #### YOUR CODE HERE #### return ys, J+Jreg
def generate_sequence(self, init, end, h0, maxlen=100, words = []): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence if len(words) > 0: ys.extend(words) hs = zeros((maxlen + 1, self.hdim)) hs[-1] = h0 ps = zeros((maxlen, self.vdim)) t = 0 while ys[t] != end and t < maxlen: # print ys[-1] hs[t, :] = sigmoid((self.params.H.dot(hs[t - 1, :].T)).T + self.sparams.L[ys[t], :]) ps[t, :] = softmax(self.params.U.dot(hs[t, :].T)).T # y = argmax(ps[t, :]) y = multinomial_sample(ps[t, :]) if t >= len(words): ys.append(y) J += - log(ps[t, y]) t += 1 return ys, J
def generate_sequence(self, d, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence hs = zeros((maxlen+1, self.hdim)) curr = init t = 0 d_vec = self.sparams.D[d] while curr != end and len(ys) < maxlen: x_t = curr zs_t = self.params.H.dot(hs[t-1]) + self.sparams.L[x_t] + d_vec hs[t] = sigmoid(zs_t) ps_t = softmax(self.params.U.dot(hs[t]) + self.params.G.dot(d_vec)) y = multinomial_sample(ps_t) ys.append(y) curr = y J += -1*log(ps_t[y]) t += 1 return ys, J
def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence #### YOUR CODE HERE #### h = zeros((self.hdim,)) for i in xrange(maxlen): h = sigmoid(dot(self.params.H, h) + self.sparams.L[ys[i]]) probs = softmax(dot(self.sparams.U, h)) sampled_word = multinomial_sample(probs**0.75) # 0.75 turns the temperature up -> more fun ys.append(sampled_word) J -= log(probs[sampled_word]) if sampled_word == end: break #### YOUR CODE HERE #### return ys, J
def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = list of index of start words (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = init # emitted sequence #### YOUR CODE HERE #### h = np.zeros(self.hdim) for x in ys: z = self.params.H.dot(h) + self.sparams.L[x] h = sigmoid(z) while ys[-1] != end: x = ys[-1] z = self.params.H.dot(h) + self.sparams.L[x] h = sigmoid(z) y_hat = softmax(self.params.U.dot(h)) y = multinomial_sample(y_hat) J -= np.log(y_hat[y]) ys.append(y) #### YOUR CODE HERE #### return ys, J
def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence #### YOUR CODE HERE #### h = zeros(self.hdim) t = 1 while t < maxlen: # shape Dh h = sigmoid(self.params.H.dot(h) + self.sparams.L[ys[t-1]]) # shape V p = softmax(self.params.U.dot(h)) ys += [multinomial_sample(p)] J += -log(p[ys[t]]) if ys[t] == end: break t += 1 #### YOUR CODE HERE #### return ys, J
def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence #generate one word and let it be next input #### YOUR CODE HERE #### t = 0 hs = zeros(self.hdim) while True: if (len(ys) > maxlen) or (ys[-1] == end): break hs = sigmoid(self.params.H.dot(hs) + self.sparams.L[ys[t]]) ps = softmax(self.params.U.dot(hs)) y = multinomial_sample(ps) J += -log(ps[y]) ys.append(y) t += 1 #### YOUR CODE HERE #### return ys, J
def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence #### YOUR CODE HERE #### h = zeros(self.hdim) for t in xrange(1, maxlen + 1): x = ys[t - 1] h = sigmoid(dot(self.params.H, h) + self.sparams.L[x]) y_hat = softmax(dot(self.params.U, h)) y = multinomial_sample(y_hat) print "selected %d of %d with p=%f" % (y, y_hat.shape[0], y_hat[y]) J += -log(y_hat[y]) ys.append(y) ys.append(end) #### YOUR CODE HERE #### return ys, J
def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence #### YOUR CODE HERE #### ns = maxlen hs = zeros((ns+1, self.hdim)) # hidden layers, hs[-1] is the initial, which is zeros here ps = zeros((ns, self.vdim)) # predicted probas for t in xrange(maxlen): hs[t] = sigmoid(self.params.H.dot(hs[t - 1]) + self.sparams.L[ys[t]]) probs = softmax(self.params.U.dot(hs[t])) y = multinomial_sample(probs) ys.append(y) J += - log(probs[y]) if y == end: break #### YOUR CODE HERE #### return ys, J
def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence hs = zeros((maxlen+1, self.hdim)) ps = zeros((maxlen, self.vdim)) for w in range(maxlen): z1 = self.params.H.dot(hs[w-1]) + self.sparams.L[ys[w]] hs[w] = sigmoid(z1) z2 = self.params.U.dot(hs[w]) ps = softmax(z2) y = multinomial_sample(ps) ys.append(y) J += -log(ps[y]) if y == end: break return ys, J
def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence hs = zeros((maxlen + 1, self.hdim)) ps = zeros((maxlen, self.vdim)) for w in range(maxlen): z1 = self.params.H.dot(hs[w - 1]) + self.sparams.L[ys[w]] hs[w] = sigmoid(z1) z2 = self.params.U.dot(hs[w]) ps = softmax(z2) y = multinomial_sample(ps) ys.append(y) J += -log(ps[y]) if y == end: break return ys, J
def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence #### YOUR CODE HERE #### hs = zeros((maxlen+1, self.hdim)) for t in xrange(maxlen): hs[t] = sigmoid(dot(self.params.H, hs[t - 1]) + self.sparams.L[ys[t]]) y_hat = softmax(dot(self.params.U, hs[t])) y_index = multinomial_sample(y_hat) ys.append(y_index) J -= log(y_hat[y_index]) if y_index == end: break #### YOUR CODE HERE #### return ys, J
def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence #### YOUR CODE HERE #### t = 0 hs = zeros(self.hdim) while True: if (len(ys) > maxlen) or (ys[-1] == end): break hs = sigmoid(self.params.H.dot(hs) + self.sparams.L[ys[t], :]) ps = softmax(self.params.U.dot(hs)) y = multinomial_sample(ps) J += -log(ps[y]) ys.append(y) t += 1 #### YOUR CODE HERE #### return ys, J
def generate_sequence(self, init, end, maxlen=100): """ Generate a sequence from the language model, by running the RNN forward and selecting, at each timestep, a random word from the a word from the emitted probability distribution. The MultinomialSampler class (in nn.math) may be helpful here for sampling a word. Use as: y = multinomial_sample(p) to sample an index y from the vector of probabilities p. Arguments: init = index of start word (word_to_num['<s>']) end = index of end word (word_to_num['</s>']) maxlen = maximum length to generate Returns: ys = sequence of indices J = total cross-entropy loss of generated sequence """ J = 0 # total loss ys = [init] # emitted sequence ht = zeros(self.hdim) #### YOUR CODE HERE #### for t in range(maxlen): if ys[-1] == end: break ht = sigmoid(dot(self.params.H, ht) + self.sparams.L[ys[t], :]) pt = softmax(dot(self.params.U, ht)) yt = multinomial_sample(pt) ys.append(yt) J -= log(pt[yt]) #### YOUR CODE HERE #### return ys, J