コード例 #1
0
ファイル: topic_lstm.py プロジェクト: tbepler/rnn
class TopicLSTM(object):
    def __init__(self, n_in, units, n_topics, sparsity=0, noise=NullNoise(), trans_weight=1.0):
        self.forward = LSTM(n_in, units)
        self.backward = LSTM(units, n_topics)
        self.linear = Linear(n_topics, n_topics)
        self.trans = DirichletTransition(n_topics)
        self.emit = Emmission(n_topics, n_in)
        self.sparsity = sparsity
        self.noise = noise
        self.n_topics = n_topics
        self.n_in = n_in
        self.trans_weight = trans_weight

    @property
    def weights(self):
        return self.forward.weights + self.backward.weights + self.trans.weights + self.emit.weights

    def transform(self, X, mask=None):
        Z_f, _ = self.forward.scanl(X, mask=mask)
        Z, _ = self.backward.scanr(Z_f, mask=mask) #, activation=softmax)
        return logsoftmax(self.linear(Z))

    def loss(self, X, mask=None, flank=0, Z=None):
        if Z is None:
            Z = self.transform(self.noise(X), mask=mask)
        E = self.emit(Z)
        L = cross_entropy(E, X)
        C = confusion(T.argmax(E,axis=-1), X, E.shape[-1])
        if mask is not None:
            L *= T.shape_padright(mask)
            C *= T.shape_padright(T.shape_padright(mask))
        n = X.shape[0]
        return L[flank:n-flank], C[flank:n-flank]

    def gradient(self, X, mask=None, flank=0):
        Z = self.transform(self.noise(X), mask=mask)
        n = Z.shape[0]
        L, C = self.loss(X, mask=mask, flank=flank, Z=Z)
        loss = T.sum(L) #/ self.n_in
        Tr = self.trans(Z)
        if mask is not None:
            Tr *= mask
        if self.trans_weight > 0:
            loss -= self.trans_weight*T.sum(Tr[flank:n-flank]) #/ self.n_topics
        m = n-2*flank
        #loss += self.trans.regularizer()*m/self.n_topics
        if self.sparsity > 0:
            R = self.sparsity*Z
            if mask is not None:
                R *= T.shape_padright(mask)
            loss += T.sum(R[flank:n-flank])
        gW = theano.grad(loss, self.weights, disconnected_inputs='warn')
        return gW, [L.sum(axis=[0,1]),C.sum(axis=[0,1])]
コード例 #2
0
ファイル: dirichlet_lstm.py プロジェクト: tbepler/rnn
class TopicLSTM(object):
    def __init__(self, n_in, units, n_topics, sparsity=0, noise=NullNoise()):
        self.forward = LSTM(n_in, units)
        self.backward = LSTM(units, n_topics)
        self.trans = DirichletTransition(n_topics)
        self.emit = Emmission(n_topics, n_in)
        self.sparsity = sparsity
        self.noise = noise

    @property
    def weights(self):
        return self.forward.weights + self.backward.weights + self.trans.weights + self.emit.weights

    def transform(self, X, mask=None):
        Z_f = self.forward.scanl(X, mask=mask)
        Z = self.backward.scanr(Z_f, mask=None, activation=logsoftmax)
        return Z

    def loss(self, X, mask=None, flank=0, Z=None):
        if Z is None:
            Z = self.transform(self.noise(X), mask=mask)
        Tr = self.trans(Z)
        E = self.emit(Z)
        L = cross_entropy(T.shape_padright(Tr) + E, X)
        C = confusion(T.argmax(E,axis=-1), X, E.shape[-1])
        if mask is not None:
            L *= T.shape_padright(mask)
            C *= T.shape_padright(T.shape_padright(mask))
        n = X.shape[0]
        return L[flank:n-flank], C[flank:n-flank]

    def gradient(self, X, mask=None, flank=0):
        Z = self.transform(self.noise(X), mask=mask)
        L, C = self.loss(X, mask=mask, flank=flank, Z=Z)
        loss = T.sum(L)
        n = Z.shape[0]
        if self.sparcity > 0:
            R = self.sparcity*Z
            if mask is not None:
                R *= T.shape_padright(R)
            loss += T.sum(R[flank:n-flank])
        gW = theano.grad(loss, self.weights)
        return gW, [L,C]