def softmax(x):
    """
    Compute the softmax function in tensorflow.

    You might find the tensorflow functions tf.exp, tf.reduce_max,
    tf.reduce_sum, tf.expand_dims useful. (Many solutions are possible, so you may
    not need to use all of these functions). Recall also that many common
    tensorflow operations are sugared (e.g. x * y does a tensor multiplication
    if x and y are both tensors). Make sure to implement the numerical stability
    fixes as in the previous homework!

    Args:
        x:   tf.Tensor with shape (n_samples, n_features). Note feature vectors are
                  represented by row-vectors. (For simplicity, no need to handle 1-d
                  input as in the previous homework)
    Returns:
        out: tf.Tensor with shape (n_sample, n_features). You need to construct this
                  tensor in this problem.
    """

    ### YOUR CODE HERE
    x_max = dy.max_dim(x, 1)
    x_sub = dy.colwise_add(x, -x_max)
    x_exp = dy.exp(x_sub)
    sum_exp = dy.colwise_add(dy.zeroes(x.dim()[0]), dy.sum_cols(x_exp))

    out = dy.cdiv(x_exp, sum_exp)
    ### END YOUR CODE

    return out
Beispiel #2
0
    def word_repr(self, char_seq):
        # obtain the word representation when given its character sequence
        wlen = len(char_seq)
        if 'rgW%d'%wlen not in self.param_exprs:
            self.param_exprs['rgW%d'%wlen] = dy.parameter(self.params['reset_gate_W'][wlen-1])
            self.param_exprs['rgb%d'%wlen] = dy.parameter(self.params['reset_gate_b'][wlen-1])
            self.param_exprs['cW%d'%wlen] = dy.parameter(self.params['com_W'][wlen-1])
            self.param_exprs['cb%d'%wlen] = dy.parameter(self.params['com_b'][wlen-1])
            self.param_exprs['ugW%d'%wlen] = dy.parameter(self.params['update_gate_W'][wlen-1])
            self.param_exprs['ugb%d'%wlen] = dy.parameter(self.params['update_gate_b'][wlen-1])
          
        chars = dy.concatenate(char_seq)
        reset_gate = dy.logistic(self.param_exprs['rgW%d'%wlen] * chars + self.param_exprs['rgb%d'%wlen])
        comb = dy.concatenate([dy.tanh(self.param_exprs['cW%d'%wlen] * dy.cmult(reset_gate,chars) + self.param_exprs['cb%d'%wlen]),chars])
        update_logits = self.param_exprs['ugW%d'%wlen] * comb + self.param_exprs['ugb%d'%wlen]
        
        update_gate = dy.transpose(dy.concatenate_cols([dy.softmax(dy.pickrange(update_logits,i*(wlen+1),(i+1)*(wlen+1))) for i in xrange(self.options['ndims'])]))
        
        # The following implementation of Softmax fucntion is not safe, but faster...
        #exp_update_logits = dy.exp(dy.reshape(update_logits,(self.options['ndims'],wlen+1)))
        #update_gate = dy.cdiv(exp_update_logits, dy.concatenate_cols([dy.sum_cols(exp_update_logits)] *(wlen+1)))
        #assert (not np.isnan(update_gate.npvalue()).any())

        word = dy.sum_cols(dy.cmult(update_gate,dy.reshape(comb,(self.options['ndims'],wlen+1))))
        return word
Beispiel #3
0
def log_sum_exp(scores, n_tags):
    npval = scores.npvalue()
    argmax_score = np.argmax(npval)
    max_score_expr = dy.pick(scores, argmax_score)
    max_score_expr_broadcast = dy.concatenate([max_score_expr] * n_tags)
    return max_score_expr + dy.log(
        dy.sum_cols(dy.transpose(dy.exp(scores - max_score_expr_broadcast))))
    def __call__(self, *args):
        U = [dy.parameter(U_) for U_ in self.U]

        out = U[0] * args[0]
        for x, u in zip(args[1:], U[1:]):
            out = dy.cmult(out, u * x)

        out = dy.sum_cols(dy.transpose(out))
        return out
Beispiel #5
0
 def log_sum_exp(scores):
     npval = scores.npvalue()
     argmax_score = np.argmax(npval)
     max_score_expr = dynet.pick(scores, argmax_score)
     max_score_expr_broadcast = dynet.concatenate([max_score_expr] *
                                                  (self.n_tags + 2))
     return max_score_expr + dynet.log(
         dynet.sum_cols(
             dynet.transpose(
                 dynet.exp(scores - max_score_expr_broadcast))))
def softmax(x):
    ### YOUR CODE HERE
    x_max = dy.max_dim(x, 1)
    x_sub = dy.colwise_add(x, -x_max)
    x_exp = dy.exp(x_sub)
    x_sum = dy.sum_cols(x_exp)
    x_tmp = dy.zeroes(x.dim()[0])
    x_tmp = dy.colwise_add(x_tmp, x_sum)
    out = dy.cdiv(x_exp, x_tmp)
    ### END YOUR CODE
    return out
Beispiel #7
0
    def word_repr(self, char_seq):
        # obtain the word representation when given its character sequence
        wlen = len(char_seq)
        if 'rgW%d' % wlen not in self.param_exprs:
            self.param_exprs['rgW%d' % wlen] = dy.parameter(
                self.params['reset_gate_W'][wlen - 1])
            self.param_exprs['rgb%d' % wlen] = dy.parameter(
                self.params['reset_gate_b'][wlen - 1])
            self.param_exprs['cW%d' % wlen] = dy.parameter(
                self.params['com_W'][wlen - 1])
            self.param_exprs['cb%d' % wlen] = dy.parameter(
                self.params['com_b'][wlen - 1])
            self.param_exprs['ugW%d' % wlen] = dy.parameter(
                self.params['update_gate_W'][wlen - 1])
            self.param_exprs['ugb%d' % wlen] = dy.parameter(
                self.params['update_gate_b'][wlen - 1])

        chars = dy.concatenate(char_seq)
        reset_gate = dy.logistic(self.param_exprs['rgW%d' % wlen] * chars +
                                 self.param_exprs['rgb%d' % wlen])
        comb = dy.concatenate([
            dy.tanh(self.param_exprs['cW%d' % wlen] *
                    dy.cmult(reset_gate, chars) +
                    self.param_exprs['cb%d' % wlen]), chars
        ])
        update_logits = self.param_exprs[
            'ugW%d' % wlen] * comb + self.param_exprs['ugb%d' % wlen]

        update_gate = dy.transpose(
            dy.concatenate_cols([
                dy.softmax(
                    dy.pickrange(update_logits, i * (wlen + 1),
                                 (i + 1) * (wlen + 1)))
                for i in xrange(self.options['ndims'])
            ]))

        # The following implementation of Softmax fucntion is not safe, but faster...
        #exp_update_logits = dy.exp(dy.reshape(update_logits,(self.options['ndims'],wlen+1)))
        #update_gate = dy.cdiv(exp_update_logits, dy.concatenate_cols([dy.sum_cols(exp_update_logits)] *(wlen+1)))
        #assert (not np.isnan(update_gate.npvalue()).any())

        word = dy.sum_cols(
            dy.cmult(update_gate,
                     dy.reshape(comb, (self.options['ndims'], wlen + 1))))
        return word
Beispiel #8
0
# Per-element unary functions.
e = dy.tanh(e1)
e = dy.exp(e1)
e = dy.log(e1)
e = dy.logistic(e1)  # Sigmoid(x)
e = dy.rectify(e1)  # Relu (= max(x,0))
e = dy.softsign(e1)  # x/(1+|x|)

# softmaxes
e = dy.softmax(e1)
e = dy.log_softmax(e1, restrict=[])  # restrict is a set of indices.
# if not empty, only entries in restrict are part
# of softmax computation, others get 0.

e = dy.sum_cols(e1)

# Picking values from vector expressions
e = dy.pick(e1, k)  # k is unsigned integer, e1 is vector. return e1[k]
e = e1[k]  # same

e = dy.pickrange(
    e1, k,
    v)  # like python's e1[k:v] for lists. e1 is an Expression, k,v integers.
e = e1[k:v]  # same

e = dy.pickneglogsoftmax(
    e1, k)  # k is unsigned integer. equiv to: (pick(-log(dy.softmax(e1)), k))

# Neural net stuff
dy.noise(