class TopicLSTM(object): def __init__(self, n_in, units, n_topics, sparsity=0, noise=NullNoise(), trans_weight=1.0): self.forward = LSTM(n_in, units) self.backward = LSTM(units, n_topics) self.linear = Linear(n_topics, n_topics) self.trans = DirichletTransition(n_topics) self.emit = Emmission(n_topics, n_in) self.sparsity = sparsity self.noise = noise self.n_topics = n_topics self.n_in = n_in self.trans_weight = trans_weight @property def weights(self): return self.forward.weights + self.backward.weights + self.trans.weights + self.emit.weights def transform(self, X, mask=None): Z_f, _ = self.forward.scanl(X, mask=mask) Z, _ = self.backward.scanr(Z_f, mask=mask) #, activation=softmax) return logsoftmax(self.linear(Z)) def loss(self, X, mask=None, flank=0, Z=None): if Z is None: Z = self.transform(self.noise(X), mask=mask) E = self.emit(Z) L = cross_entropy(E, X) C = confusion(T.argmax(E,axis=-1), X, E.shape[-1]) if mask is not None: L *= T.shape_padright(mask) C *= T.shape_padright(T.shape_padright(mask)) n = X.shape[0] return L[flank:n-flank], C[flank:n-flank] def gradient(self, X, mask=None, flank=0): Z = self.transform(self.noise(X), mask=mask) n = Z.shape[0] L, C = self.loss(X, mask=mask, flank=flank, Z=Z) loss = T.sum(L) #/ self.n_in Tr = self.trans(Z) if mask is not None: Tr *= mask if self.trans_weight > 0: loss -= self.trans_weight*T.sum(Tr[flank:n-flank]) #/ self.n_topics m = n-2*flank #loss += self.trans.regularizer()*m/self.n_topics if self.sparsity > 0: R = self.sparsity*Z if mask is not None: R *= T.shape_padright(mask) loss += T.sum(R[flank:n-flank]) gW = theano.grad(loss, self.weights, disconnected_inputs='warn') return gW, [L.sum(axis=[0,1]),C.sum(axis=[0,1])]
class TopicLSTM(object): def __init__(self, n_in, units, n_topics, sparsity=0, noise=NullNoise()): self.forward = LSTM(n_in, units) self.backward = LSTM(units, n_topics) self.trans = DirichletTransition(n_topics) self.emit = Emmission(n_topics, n_in) self.sparsity = sparsity self.noise = noise @property def weights(self): return self.forward.weights + self.backward.weights + self.trans.weights + self.emit.weights def transform(self, X, mask=None): Z_f = self.forward.scanl(X, mask=mask) Z = self.backward.scanr(Z_f, mask=None, activation=logsoftmax) return Z def loss(self, X, mask=None, flank=0, Z=None): if Z is None: Z = self.transform(self.noise(X), mask=mask) Tr = self.trans(Z) E = self.emit(Z) L = cross_entropy(T.shape_padright(Tr) + E, X) C = confusion(T.argmax(E,axis=-1), X, E.shape[-1]) if mask is not None: L *= T.shape_padright(mask) C *= T.shape_padright(T.shape_padright(mask)) n = X.shape[0] return L[flank:n-flank], C[flank:n-flank] def gradient(self, X, mask=None, flank=0): Z = self.transform(self.noise(X), mask=mask) L, C = self.loss(X, mask=mask, flank=flank, Z=Z) loss = T.sum(L) n = Z.shape[0] if self.sparcity > 0: R = self.sparcity*Z if mask is not None: R *= T.shape_padright(R) loss += T.sum(R[flank:n-flank]) gW = theano.grad(loss, self.weights) return gW, [L,C]