def lossAndGrads(self, batchAndTargets, computeGrads=True):
        nb = len(batchAndTargets)
        ctxtoks = [ctxtok for ctxtok,tgt in batchAndTargets]
        tgts = [tgt for ctxtok,tgt in batchAndTargets]
        context = self.wvec[ctxtoks,:].reshape((nb,-1))
        z = np.dot(context, self.Wz)
        a = relu(z)
        scores = np.dot(a, self.Wa)
        preds = np.argmax(scores, axis=1)

        loss, probs, dscores = softmaxLossAndGrads(scores, tgts)
        if not computeGrads:
            return preds, probs, loss

        dWa = np.dot(a.T, dscores)
        da = np.dot(dscores, self.Wa.T)

        dz = drelu(da, a)

        dWz = np.dot(context.T, dz)
        dcontext = np.dot(dz, self.Wz.T).reshape((nb,-1,self.wdim))
        dwvec = np.zeros_like(self.wvec)
        for b,ctxtok in enumerate(ctxtoks):
            for w,tok in enumerate(ctxtok):
                dwvec[tok] += dcontext[b,w]
        grads = {}
        grads['Wa'] = dWa + self.reg * self.Wa
        grads['Wz'] = dWz + self.reg * self.Wz
        grads['wvec'] = dwvec + self.reg * self.wvec * (dwvec != 0)

        return preds, probs, loss, grads
Exemple #2
0
    def lossAndGrads(self, batchAndTargets, computeGrads=True):
        nb = len(batchAndTargets)
        ctxtoks = [ctxtok for ctxtok, tgt in batchAndTargets]
        tgts = [tgt for ctxtok, tgt in batchAndTargets]
        context = self.wvec[ctxtoks, :].reshape((nb, -1))
        z = np.dot(context, self.Wz)
        a = relu(z)
        scores = np.dot(a, self.Wa)
        preds = np.argmax(scores, axis=1)

        loss, probs, dscores = softmaxLossAndGrads(scores, tgts)
        if not computeGrads:
            return preds, probs, loss

        dWa = np.dot(a.T, dscores)
        da = np.dot(dscores, self.Wa.T)

        dz = drelu(da, a)

        dWz = np.dot(context.T, dz)
        dcontext = np.dot(dz, self.Wz.T).reshape((nb, -1, self.wdim))
        dwvec = np.zeros_like(self.wvec)
        for b, ctxtok in enumerate(ctxtoks):
            for w, tok in enumerate(ctxtok):
                dwvec[tok] += dcontext[b, w]
        grads = {}
        grads['Wa'] = dWa + self.reg * self.Wa
        grads['Wz'] = dWz + self.reg * self.Wz
        grads['wvec'] = dwvec + self.reg * self.wvec * (dwvec != 0)

        return preds, probs, loss, grads
    def lossAndGrads(self, batchAndTargets, computeGrads=True):
        nb = len(batchAndTargets)
        ctxtoks = [ctxtok for ctxtok,tgt in batchAndTargets]
        tgts = [tgt for ctxtok,tgt in batchAndTargets]

        context = np.transpose(self.wvec[ctxtoks,:], (0,2,1))
        attnvec = context.dot(self.attn)
        scores = np.dot(attnvec, self.W)
        preds = np.argmax(scores, axis=1)

        loss, probs, dscores = softmaxLossAndGrads(scores, tgts)
        if not computeGrads:
            return preds, probs, loss

        dW = np.dot(attnvec.T, dscores)
        dattnvec = np.dot(dscores, self.W.T)

        dattn = np.tensordot(context, dattnvec, axes=([0,1],[0,1]))
        dcontext = dattnvec[...,None]*self.attn

        dwvec = np.zeros_like(self.wvec)
        for b,ctxtok in enumerate(ctxtoks):
            for w,tok in enumerate(ctxtok):
                dwvec[tok] += dcontext[b,:,w]
        grads = {}
        grads['W'] = dW + self.reg * self.W
        grads['attn'] = dattn + self.reg * self.attn
        grads['wvec'] = dwvec + self.reg * self.wvec * (dwvec != 0)

        return preds, probs, loss, grads
Exemple #4
0
    def lossAndGrads(self, batchAndTargets, computeGrads=True):
        nb = len(batchAndTargets)
        ctxtoks = [ctxtok for ctxtok, tgt in batchAndTargets]
        tgts = [tgt for ctxtok, tgt in batchAndTargets]

        context = np.transpose(self.wvec[ctxtoks, :], (0, 2, 1))
        attnvec = context.dot(self.attn)
        scores = np.dot(attnvec, self.W)
        preds = np.argmax(scores, axis=1)

        loss, probs, dscores = softmaxLossAndGrads(scores, tgts)
        if not computeGrads:
            return preds, probs, loss

        dW = np.dot(attnvec.T, dscores)
        dattnvec = np.dot(dscores, self.W.T)

        dattn = np.tensordot(context, dattnvec, axes=([0, 1], [0, 1]))
        dcontext = dattnvec[..., None] * self.attn

        dwvec = np.zeros_like(self.wvec)
        for b, ctxtok in enumerate(ctxtoks):
            for w, tok in enumerate(ctxtok):
                dwvec[tok] += dcontext[b, :, w]
        grads = {}
        grads['W'] = dW + self.reg * self.W
        grads['attn'] = dattn + self.reg * self.attn
        grads['wvec'] = dwvec + self.reg * self.wvec * (dwvec != 0)

        return preds, probs, loss, grads