def softmax(x): """ Compute the softmax function in tensorflow. You might find the tensorflow functions tf.exp, tf.reduce_max, tf.reduce_sum, tf.expand_dims useful. (Many solutions are possible, so you may not need to use all of these functions). Recall also that many common tensorflow operations are sugared (e.g. x * y does a tensor multiplication if x and y are both tensors). Make sure to implement the numerical stability fixes as in the previous homework! Args: x: tf.Tensor with shape (n_samples, n_features). Note feature vectors are represented by row-vectors. (For simplicity, no need to handle 1-d input as in the previous homework) Returns: out: tf.Tensor with shape (n_sample, n_features). You need to construct this tensor in this problem. """ ### YOUR CODE HERE x_max = dy.max_dim(x, 1) x_sub = dy.colwise_add(x, -x_max) x_exp = dy.exp(x_sub) sum_exp = dy.colwise_add(dy.zeroes(x.dim()[0]), dy.sum_cols(x_exp)) out = dy.cdiv(x_exp, sum_exp) ### END YOUR CODE return out
def word_repr(self, char_seq): # obtain the word representation when given its character sequence wlen = len(char_seq) if 'rgW%d'%wlen not in self.param_exprs: self.param_exprs['rgW%d'%wlen] = dy.parameter(self.params['reset_gate_W'][wlen-1]) self.param_exprs['rgb%d'%wlen] = dy.parameter(self.params['reset_gate_b'][wlen-1]) self.param_exprs['cW%d'%wlen] = dy.parameter(self.params['com_W'][wlen-1]) self.param_exprs['cb%d'%wlen] = dy.parameter(self.params['com_b'][wlen-1]) self.param_exprs['ugW%d'%wlen] = dy.parameter(self.params['update_gate_W'][wlen-1]) self.param_exprs['ugb%d'%wlen] = dy.parameter(self.params['update_gate_b'][wlen-1]) chars = dy.concatenate(char_seq) reset_gate = dy.logistic(self.param_exprs['rgW%d'%wlen] * chars + self.param_exprs['rgb%d'%wlen]) comb = dy.concatenate([dy.tanh(self.param_exprs['cW%d'%wlen] * dy.cmult(reset_gate,chars) + self.param_exprs['cb%d'%wlen]),chars]) update_logits = self.param_exprs['ugW%d'%wlen] * comb + self.param_exprs['ugb%d'%wlen] update_gate = dy.transpose(dy.concatenate_cols([dy.softmax(dy.pickrange(update_logits,i*(wlen+1),(i+1)*(wlen+1))) for i in xrange(self.options['ndims'])])) # The following implementation of Softmax fucntion is not safe, but faster... #exp_update_logits = dy.exp(dy.reshape(update_logits,(self.options['ndims'],wlen+1))) #update_gate = dy.cdiv(exp_update_logits, dy.concatenate_cols([dy.sum_cols(exp_update_logits)] *(wlen+1))) #assert (not np.isnan(update_gate.npvalue()).any()) word = dy.sum_cols(dy.cmult(update_gate,dy.reshape(comb,(self.options['ndims'],wlen+1)))) return word
def log_sum_exp(scores, n_tags): npval = scores.npvalue() argmax_score = np.argmax(npval) max_score_expr = dy.pick(scores, argmax_score) max_score_expr_broadcast = dy.concatenate([max_score_expr] * n_tags) return max_score_expr + dy.log( dy.sum_cols(dy.transpose(dy.exp(scores - max_score_expr_broadcast))))
def __call__(self, *args): U = [dy.parameter(U_) for U_ in self.U] out = U[0] * args[0] for x, u in zip(args[1:], U[1:]): out = dy.cmult(out, u * x) out = dy.sum_cols(dy.transpose(out)) return out
def log_sum_exp(scores): npval = scores.npvalue() argmax_score = np.argmax(npval) max_score_expr = dynet.pick(scores, argmax_score) max_score_expr_broadcast = dynet.concatenate([max_score_expr] * (self.n_tags + 2)) return max_score_expr + dynet.log( dynet.sum_cols( dynet.transpose( dynet.exp(scores - max_score_expr_broadcast))))
def softmax(x): ### YOUR CODE HERE x_max = dy.max_dim(x, 1) x_sub = dy.colwise_add(x, -x_max) x_exp = dy.exp(x_sub) x_sum = dy.sum_cols(x_exp) x_tmp = dy.zeroes(x.dim()[0]) x_tmp = dy.colwise_add(x_tmp, x_sum) out = dy.cdiv(x_exp, x_tmp) ### END YOUR CODE return out
def word_repr(self, char_seq): # obtain the word representation when given its character sequence wlen = len(char_seq) if 'rgW%d' % wlen not in self.param_exprs: self.param_exprs['rgW%d' % wlen] = dy.parameter( self.params['reset_gate_W'][wlen - 1]) self.param_exprs['rgb%d' % wlen] = dy.parameter( self.params['reset_gate_b'][wlen - 1]) self.param_exprs['cW%d' % wlen] = dy.parameter( self.params['com_W'][wlen - 1]) self.param_exprs['cb%d' % wlen] = dy.parameter( self.params['com_b'][wlen - 1]) self.param_exprs['ugW%d' % wlen] = dy.parameter( self.params['update_gate_W'][wlen - 1]) self.param_exprs['ugb%d' % wlen] = dy.parameter( self.params['update_gate_b'][wlen - 1]) chars = dy.concatenate(char_seq) reset_gate = dy.logistic(self.param_exprs['rgW%d' % wlen] * chars + self.param_exprs['rgb%d' % wlen]) comb = dy.concatenate([ dy.tanh(self.param_exprs['cW%d' % wlen] * dy.cmult(reset_gate, chars) + self.param_exprs['cb%d' % wlen]), chars ]) update_logits = self.param_exprs[ 'ugW%d' % wlen] * comb + self.param_exprs['ugb%d' % wlen] update_gate = dy.transpose( dy.concatenate_cols([ dy.softmax( dy.pickrange(update_logits, i * (wlen + 1), (i + 1) * (wlen + 1))) for i in xrange(self.options['ndims']) ])) # The following implementation of Softmax fucntion is not safe, but faster... #exp_update_logits = dy.exp(dy.reshape(update_logits,(self.options['ndims'],wlen+1))) #update_gate = dy.cdiv(exp_update_logits, dy.concatenate_cols([dy.sum_cols(exp_update_logits)] *(wlen+1))) #assert (not np.isnan(update_gate.npvalue()).any()) word = dy.sum_cols( dy.cmult(update_gate, dy.reshape(comb, (self.options['ndims'], wlen + 1)))) return word
# Per-element unary functions. e = dy.tanh(e1) e = dy.exp(e1) e = dy.log(e1) e = dy.logistic(e1) # Sigmoid(x) e = dy.rectify(e1) # Relu (= max(x,0)) e = dy.softsign(e1) # x/(1+|x|) # softmaxes e = dy.softmax(e1) e = dy.log_softmax(e1, restrict=[]) # restrict is a set of indices. # if not empty, only entries in restrict are part # of softmax computation, others get 0. e = dy.sum_cols(e1) # Picking values from vector expressions e = dy.pick(e1, k) # k is unsigned integer, e1 is vector. return e1[k] e = e1[k] # same e = dy.pickrange( e1, k, v) # like python's e1[k:v] for lists. e1 is an Expression, k,v integers. e = e1[k:v] # same e = dy.pickneglogsoftmax( e1, k) # k is unsigned integer. equiv to: (pick(-log(dy.softmax(e1)), k)) # Neural net stuff dy.noise(