Esempio n. 1
0
    def _pairwise_gradients(self, pxs, nxs):
        # indices of positive examples
        sp, pp, op = unzip_triples(pxs)
        # indices of negative examples
        sn, pn, on = unzip_triples(nxs)

        pscores = self.af.f(self._scores(sp, pp, op))
        nscores = self.af.f(self._scores(sn, pn, on))

        #print("avg = %f/%f, min = %f/%f, max = %f/%f" % (pscores.mean(), nscores.mean(), pscores.min(), nscores.min(), pscores.max(), nscores.max()))

        # find examples that violate margin
        ind = np.where(nscores + self.margin > pscores)[0]
        self.nviolations = len(ind)
        if len(ind) == 0:
            return

        # aux vars
        sp, sn = list(sp[ind]), list(sn[ind])
        op, on = list(op[ind]), list(on[ind])
        pp, pn = list(pp[ind]), list(pn[ind])
        # Increase dimension of scores by one and store it as column (np.newaxis)
        gpscores = -self.af.g_given_f(pscores[ind])[:, np.newaxis]
        gnscores = self.af.g_given_f(nscores[ind])[:, np.newaxis]

        # object role gradients
        ridx, Sm, n = grad_sum_matrix(pp + pn)
        start_ccorr = timeit.default_timer()
        ccorr(self.E[sp], self.E[op])
        elapsed_ccorr = timeit.default_timer() - start_ccorr
        #print("time to compute ccorr = %f us" % (elapsed_ccorr * 1000 * 1000))
        #print ("shapes : ", self.E[sp].shape)
        grp = gpscores * ccorr(self.E[sp], self.E[op])
        grn = gnscores * ccorr(self.E[sn], self.E[on])
        #gr = (Sm.dot(np.vstack((grp, grn))) + self.rparam * self.R[ridx]) / n

        # Because of dot product the gradient is calculated sum of n terms that were non-zero
        # Therefore, for the correct value, we should divide by n
        gr = Sm.dot(np.vstack((grp, grn))) / n
        gr += self.rparam * self.R[ridx]

        # filler gradients
        eidx, Sm, n = grad_sum_matrix(sp + sn + op + on)

        start_ccorr = timeit.default_timer()
        cconv(self.E[sp], self.R[pp])
        elapsed_ccorr = timeit.default_timer() - start_ccorr
        #print("time to compute cconv = %f us" % (elapsed_ccorr * 1000 * 1000))

        geip = gpscores * ccorr(self.R[pp], self.E[op])
        gein = gnscores * ccorr(self.R[pn], self.E[on])
        gejp = gpscores * cconv(self.E[sp], self.R[pp])
        gejn = gnscores * cconv(self.E[sn], self.R[pn])
        ge = Sm.dot(np.vstack((geip, gein, gejp, gejn))) / n
        #ge += self.rparam * self.E[eidx]

        return {'E': (ge, eidx), 'R': (gr, ridx)}
Esempio n. 2
0
    def _pairwise_gradients(self, pxs, nxs):
        # indices of positive triples
        sp, pp, op = unzip_triples(pxs)
        # indices of negative triples
        sn, pn, on = unzip_triples(nxs)

        pscores = self._scores(sp, pp, op)
        nscores = self._scores(sn, pn, on)
        ind = np.where(nscores + self.margin > pscores)[0]

        # all examples in batch satify margin criterion
        self.nviolations = len(ind)
        if len(ind) == 0:
            return

        sp = list(sp[ind])
        sn = list(sn[ind])
        pp = list(pp[ind])
        pn = list(pn[ind])
        op = list(op[ind])
        on = list(on[ind])

        #pg = self.E[sp] + self.R[pp] - self.E[op]
        #ng = self.E[sn] + self.R[pn] - self.E[on]
        pg = self.E[op] - self.R[pp] - self.E[sp]
        ng = self.E[on] - self.R[pn] - self.E[sn]

        if self.l1:
            pg = np.sign(-pg)
            ng = -np.sign(-ng)
        else:
            raise NotImplementedError()

        # entity gradients
        eidx, Sm, n = grad_sum_matrix(sp + op + sn + on)
        ge = Sm.dot(np.vstack((pg, -pg, ng, -ng))) / n

        # relation gradients
        ridx, Sm, n = grad_sum_matrix(pp + pn)
        gr = Sm.dot(np.vstack((pg, ng))) / n

        if self.updateOffsetE > 0:
            cond = np.where(eidx >= self.updateOffsetE)
            if len(cond[0]) > 0:
                ge = ge[cond[0][0]:]
                eidx = eidx[cond[0][0]:]
            else:
                ge = []
                eidx = []
            gr = []
            ridx = []

        return {'E': (ge, eidx), 'R': (gr, ridx)}
Esempio n. 3
0
    def _pairwise_gradients(self, pxs, nxs):
        # indices of positive examples
        sp, pp, op = unzip_triples(pxs)
        # indices of negative examples
        sn, pn, on = unzip_triples(nxs)

        pscores = self.af.f(self._scores(sp, pp, op))
        nscores = self.af.f(self._scores(sn, pn, on))

        #print("avg = %f/%f, min = %f/%f, max = %f/%f" % (pscores.mean(), nscores.mean(), pscores.min(), nscores.min(), pscores.max(), nscores.max()))

        # find examples that violate margin
        ind = np.where(nscores + self.margin > pscores)[0]
        self.nviolations = len(ind)
        if len(ind) == 0:
            return

        # aux vars
        sp, sn = list(sp[ind]), list(sn[ind])
        op, on = list(op[ind]), list(on[ind])
        pp, pn = list(pp[ind]), list(pn[ind])
        gpscores = -self.af.g_given_f(pscores[ind])[:, np.newaxis]
        gnscores = self.af.g_given_f(nscores[ind])[:, np.newaxis]

        # object role gradients
        ridx, Sm, n = grad_sum_matrix(pp + pn)
        grp = gpscores * ccorr(self.E[sp], self.E[op])
        grn = gnscores * ccorr(self.E[sn], self.E[on])
        #gr = (Sm.dot(np.vstack((grp, grn))) + self.rparam * self.R[ridx]) / n
        gr = Sm.dot(np.vstack((grp, grn))) / n
        gr += self.rparam * self.R[ridx]

        # filler gradients
        eidx, Sm, n = grad_sum_matrix(sp + sn + op + on)
        geip = gpscores * ccorr(self.R[pp], self.E[op])
        gein = gnscores * ccorr(self.R[pn], self.E[on])
        gejp = gpscores * cconv(self.E[sp], self.R[pp])
        gejn = gnscores * cconv(self.E[sn], self.R[pn])
        ge = Sm.dot(np.vstack((geip, gein, gejp, gejn))) / n
        #ge += self.rparam * self.E[eidx]

        if self.updateOffsetE > 0:
            cond = np.where(eidx >= self.updateOffsetE)
            if len(cond[0]) > 0:
                ge = ge[cond[0][0]:]
                eidx = eidx[cond[0][0]:]
            else:
                ge = []
                eidx = []
            gr = []
            ridx = []

        return {'E': (ge, eidx), 'R': (gr, ridx)}
Esempio n. 4
0
    def pairwise_gradients(self, pxs, nxs):
        # indices of positive triples
        sp, pp, op = unzip_triples(
            pxs)  # Separate out sub, pred, obj of positive triples
        # indices of negative triples
        sn, pn, on = unzip_triples(
            nxs)  # Separate out sub, pred, obj of negative triples

        pscores = self._scores(sp, pp, op)  # Compute loss for positive triples
        nscores = self._scores(sn, pn, on)  # Compute loss for negative triples
        ind = np.where(nscores + self.margin > pscores)[
            0]  # Get indexes where violation is happening

        # all examples in batch satify margin criterion
        self.nviolations = len(ind)  # Get the number of violations
        if len(ind) == 0: return  # If no violations -> no change required

        sp, sn = list(sp[ind]), list(
            sn[ind]
        )  # Getting violators subs, objs, preds for both +ve and -ve triples
        op, on = list(op[ind]), list(on[ind])
        pp, pn = list(pp[ind]), list(pn[ind])

        pg = self.E[sp] + self.R[pp] - self.E[
            op]  # Compute (h+l-t) for postive triples
        ng = self.E[sn] + self.R[pn] - self.E[
            on]  # Compute (h+l-t) for negative triples

        if self.l1:  # In case L1 norm is used
            pg = np.sign(
                pg)  # grad = sign(h+l-t) 	| (h+l-t   > 0)	:  1, else: -1
            ng = -np.sign(
                ng)  # grad = sign(h'+l-t') 	| (h'+l-t' > 0) : -1, else:  1
        else:
            raise NotImplementedError()

        # entity gradients
        eidx, Sm, n = grad_sum_matrix(sp + op + sn + on)
        ge = Sm.dot(np.vstack((pg, -pg, ng, -ng))) / n

        # relation gradients
        ridx, Sm, n = grad_sum_matrix(pp + pn)
        gr = Sm.dot(np.vstack((pg, ng))) / n
        return {'E': (ge, eidx), 'R': (gr, ridx)}
Esempio n. 5
0
    def _pairwise_gradients(self, pxs, nxs):
        # indices of positive examples
        sp, pp, op = unzip_triples(pxs)
        # indices of negative examples
        sn, pn, on = unzip_triples(nxs)

        pscores = self.af.f(self._scores(sp, pp, op))
        nscores = self.af.f(self._scores(sn, pn, on))

        #print("avg = %f/%f, min = %f/%f, max = %f/%f" % (pscores.mean(), nscores.mean(), pscores.min(), nscores.min(), pscores.max(), nscores.max()))

        # find examples that violate margin
        ind = np.where(nscores + self.margin > pscores)[0]
        self.nviolations = len(ind)
        if len(ind) == 0:
            return

        # aux vars
        sp, sn = list(sp[ind]), list(sn[ind])
        op, on = list(op[ind]), list(on[ind])
        pp, pn = list(pp[ind]), list(pn[ind])
        gpscores = -self.af.g_given_f(pscores[ind])[:, np.newaxis]
        gnscores = self.af.g_given_f(nscores[ind])[:, np.newaxis]

        # object role gradients
        ridx, Sm, n = grad_sum_matrix(pp + pn)
        grp = gpscores * ccorr(self.E[sp], self.E[op])
        grn = gnscores * ccorr(self.E[sn], self.E[on])
        #gr = (Sm.dot(np.vstack((grp, grn))) + self.rparam * self.R[ridx]) / n
        gr = Sm.dot(np.vstack((grp, grn))) / n
        gr += self.rparam * self.R[ridx]

        # filler gradients
        eidx, Sm, n = grad_sum_matrix(sp + sn + op + on)
        geip = gpscores * ccorr(self.R[pp], self.E[op])
        gein = gnscores * ccorr(self.R[pn], self.E[on])
        gejp = gpscores * cconv(self.E[sp], self.R[pp])
        gejn = gnscores * cconv(self.E[sn], self.R[pn])
        ge = Sm.dot(np.vstack((geip, gein, gejp, gejn))) / n
        #ge += self.rparam * self.E[eidx]

        return {'E': (ge, eidx), 'R':(gr, ridx)}
Esempio n. 6
0
    def _pairwise_gradients(self, pxs, nxs):
        # indices of positive triples
        sp, pp, op = unzip_triples(pxs)
        # indices of negative triples
        sn, pn, on = unzip_triples(nxs)

        pscores = self._scores(sp, pp, op)
        nscores = self._scores(sn, pn, on)
        ind = np.where(nscores + self.margin > pscores)[0]

        # all examples in batch satify margin criterion
        self.nviolations = len(ind)
        if len(ind) == 0:
            return

        sp = list(sp[ind])
        sn = list(sn[ind])
        pp = list(pp[ind])
        pn = list(pn[ind])
        op = list(op[ind])
        on = list(on[ind])

        #pg = self.E[sp] + self.R[pp] - self.E[op]
        #ng = self.E[sn] + self.R[pn] - self.E[on]
        pg = self.E[op] - self.R[pp] - self.E[sp]
        ng = self.E[on] - self.R[pn] - self.E[sn]

        if self.l1:
            pg = np.sign(-pg)
            ng = -np.sign(-ng)
        else:
            raise NotImplementedError()

        # entity gradients
        eidx, Sm, n = grad_sum_matrix(sp + op + sn + on)
        ge = Sm.dot(np.vstack((pg, -pg, ng, -ng))) / n

        # relation gradients
        ridx, Sm, n = grad_sum_matrix(pp + pn)
        gr = Sm.dot(np.vstack((pg, ng))) / n
        return {'E': (ge, eidx), 'R': (gr, ridx)}
Esempio n. 7
0
    def gradients(self, xys):
        ss, ps, os, ys = unzip_triples(
            xys, with_ys=True)  # Separates out list of sub, pred, obj, label

        # define memoized functions to cache expensive dot products
        # helps only if xys have repeated (s,p) or (p,o) pairs
        # this happens with sampling
        @memoized
        def _EW(s, o, p):
            return dot(self.E[s], self.W[p])

        @memoized
        def _WE(s, o, p):
            return dot(self.W[p], self.E[o])

        EW = np.array([_EW(*x) for (x, _) in xys])
        WE = np.array([_WE(*x) for (x, _) in xys])
        yscores = ys * np.sum(self.E[ss] * WE, axis=1)
        self.loss = np.sum(np.logaddexp(0, -yscores))
        fs = -(ys * af.Sigmoid.f(-yscores))[:, np.newaxis]
        #preds = af.Sigmoid.f(scores)
        #fs = -(ys / (1 + np.exp(yscores)))[:, np.newaxis]
        #self.loss -= np.sum(ys * np.log(preds))

        #fs = (scores - ys)[:, np.newaxis]
        #self.loss += np.sum(fs * fs)

        pidx = np.unique(ps)
        gw = np.zeros((len(pidx), self.ncomp, self.ncomp))
        for i in range(len(pidx)):
            p = pidx[i]
            ind = np.where(ps == p)[0]
            if len(ind) == 1:
                gw[i] += fs[ind] * np.outer(self.E[ss[ind]], self.E[os[ind]])
            else:
                gw[i] += dot(self.E[ss[ind]].T,
                             fs[ind] * self.E[os[ind]]) / len(ind)
            gw[i] += self.rparam * self.W[p]

        eidx, Sm, n = grad_sum_matrix(list(ss) + list(os))
        ge = Sm.dot(np.vstack((fs * WE, fs * EW))) / n
        ge += self.rparam * self.E[eidx]

        return {'E': (ge, eidx), 'W': (gw, pidx)}
Esempio n. 8
0
    def _gradients(self, xys):
        ss, ps, os, ys = unzip_triples(xys, with_ys=True)

        yscores = ys * self._scores(ss, ps, os)
        self.loss = np.sum(np.logaddexp(0, -yscores))
        # preds = af.Sigmoid.f(yscores)
        fs = -(ys * af.Sigmoid.f(-yscores))[:, np.newaxis]
        # self.loss -= np.sum(np.log(preds))

        ridx, Sm, n = grad_sum_matrix(ps)
        gr = Sm.dot(fs * ccorr(self.E[ss], self.E[os])) / n
        gr += self.rparam * self.R[ridx]

        eidx, Sm, n = grad_sum_matrix(list(ss) + list(os))
        ge = Sm.dot(
            np.vstack((fs * ccorr(self.R[ps], self.E[os]),
                       fs * cconv(self.E[ss], self.R[ps])))) / n
        ge += self.rparam * self.E[eidx]

        return {'E': (ge, eidx), 'R': (gr, ridx)}
Esempio n. 9
0
    def _gradients(self, xys):
        ss, ps, os, ys = unzip_triples(xys, with_ys=True)

        yscores = ys * self._scores(ss, ps, os)
        self.loss = np.sum(np.logaddexp(0, -yscores))
        #preds = af.Sigmoid.f(yscores)
        fs = -(ys * af.Sigmoid.f(-yscores))[:, np.newaxis]
        #self.loss -= np.sum(np.log(preds))

        ridx, Sm, n = grad_sum_matrix(ps)
        gr = Sm.dot(fs * ccorr(self.E[ss], self.E[os])) / n
        gr += self.rparam * self.R[ridx]

        eidx, Sm, n = grad_sum_matrix(list(ss) + list(os))
        ge = Sm.dot(np.vstack((
            fs * ccorr(self.R[ps], self.E[os]),
            fs * cconv(self.E[ss], self.R[ps])
        ))) / n
        ge += self.rparam * self.E[eidx]

        return {'E': (ge, eidx), 'R':(gr, ridx)}
Esempio n. 10
0
    def _gradients(self, xys):
        ss, ps, os, ys = unzip_triples(xys, with_ys=True)

        # define memoized functions to cache expensive dot products
        # helps only if xys have repeated (s,p) or (p,o) pairs
        # this happens with sampling
        @memoized
        def _EW(s, o, p): return dot(self.E[s], self.W[p])

        @memoized
        def _WE(s, o, p): return dot(self.W[p], self.E[o])

        EW = np.array([_EW(*x) for (x, _) in xys])
        WE = np.array([_WE(*x) for (x, _) in xys])
        yscores = ys * np.sum(self.E[ss] * WE, axis=1)
        self.loss = np.sum(np.logaddexp(0, -yscores))
        fs = -(ys * af.Sigmoid.f(-yscores))[:, np.newaxis]
        #preds = af.Sigmoid.f(scores)
        #fs = -(ys / (1 + np.exp(yscores)))[:, np.newaxis]
        #self.loss -= np.sum(ys * np.log(preds))

        #fs = (scores - ys)[:, np.newaxis]
        #self.loss += np.sum(fs * fs)

        pidx = np.unique(ps)
        gw = np.zeros((len(pidx), self.ncomp, self.ncomp))
        for i in range(len(pidx)):
            p = pidx[i]
            ind = np.where(ps == p)[0]
            if len(ind) == 1:
                gw[i] += fs[ind] * np.outer(self.E[ss[ind]], self.E[os[ind]])
            else:
                gw[i] += dot(self.E[ss[ind]].T, fs[ind] * self.E[os[ind]]) / len(ind)
            gw[i] += self.rparam * self.W[p]

        eidx, Sm, n = grad_sum_matrix(list(ss) + list(os))
        ge = Sm.dot(np.vstack((fs * WE, fs * EW))) / n
        ge += self.rparam * self.E[eidx]

        return {'E': (ge, eidx), 'W': (gw, pidx)}
Esempio n. 11
0
    def _pairwise_gradients(self, pxs, nxs):
        # indices of positive examples
        sp, pp, op = unzip_triples(pxs)
        # indices of negative examples
        sn, pn, on = unzip_triples(nxs)

        pxs, _ = np.array(list(zip(*pxs)))
        nxs, _ = np.array(list(zip(*nxs)))

        # define memoized functions to cache expensive dot products
        # helps only if xys have repeated (s,p) or (p,o) pairs
        # this happens with sampling
        @memoized
        def _EW(s, o, p):
            return dot(self.E[s], self.W[p])

        @memoized
        def _WE(s, o, p):
            return dot(self.W[p], self.E[o])

        WEp = np.array([_WE(*x) for x in pxs])
        WEn = np.array([_WE(*x) for x in nxs])
        pscores = self.af.f(np.sum(self.E[sp] * WEp, axis=1))
        nscores = self.af.f(np.sum(self.E[sn] * WEn, axis=1))

        #print("avg = %f/%f, min = %f/%f, max = %f/%f" % (pscores.mean(), nscores.mean(), pscores.min(), nscores.min(), pscores.max(), nscores.max()))

        # find examples that violate margin
        ind = np.where(nscores + self.margin > pscores)[0]
        self.nviolations = len(ind)
        if len(ind) == 0:
            return

        # aux vars
        gpscores = -self.af.g_given_f(pscores)[:, np.newaxis]
        gnscores = self.af.g_given_f(nscores)[:, np.newaxis]

        pidx = np.unique(list(pp) + list(pn))
        gw = np.zeros((len(pidx), self.ncomp, self.ncomp))
        for pid in range(len(pidx)):
            p = pidx[pid]
            ppidx = np.where(pp == p)
            npidx = np.where(pn == p)
            assert (len(ppidx) == len(npidx))
            if len(ppidx) == 0 and len(npidx) == 0:
                continue
            gw[pid] += dot(self.E[sp[ppidx]].T,
                           gpscores[ppidx] * self.E[op[ppidx]])
            gw[pid] += dot(self.E[sn[npidx]].T,
                           gnscores[npidx] * self.E[on[npidx]])
            gw[pid] += self.rparam * self.W[p]
            gw[pid] /= (len(ppidx) + len(npidx))

        # entity gradients
        sp, sn = list(sp[ind]), list(sn[ind])
        op, on = list(op[ind]), list(on[ind])
        gpscores, gnscores = gpscores[ind], gnscores[ind]
        EWp = np.array([_EW(*x) for x in pxs[ind]])
        EWn = np.array([_EW(*x) for x in nxs[ind]])
        eidx, Sm, n = grad_sum_matrix(sp + sn + op + on)
        ge = (Sm.dot(
            np.vstack(
                (gpscores * WEp[ind], gnscores * WEn[ind], gpscores * EWp,
                 gnscores * EWn))) + self.rparam * self.E[eidx]) / n

        return {'E': (ge, eidx), 'W': (gw, pidx)}
Esempio n. 12
0
    def _pairwise_gradients(self, pxs, nxs):
        # indices of positive triples
        #pdb.set_trace()
        sp, pp, op = unzip_triples(pxs)
        # indices of negative triples
        sn, pn, on = unzip_triples(nxs)

        # Calculate d(h+l, t) = ||h+l-t||
        #pdb.set_trace();
        pscores = self._scores(sp, pp, op)
        nscores = self._scores(sn, pn, on)
        
        #if not self.l1:
        #    pscores = pscores / 2
        #    nscores = nscores / 2

        # ind contains all violating embeddings
        # all triplets where margin > pscores - nscores
        # i.e. pscores - nscores <= margin
        # So the difference between positive and a negative triple is AT LEAST margin.
        # If it is less than or equal to margin, then that pair is violating the condition
        # In this case we want to move 
        # 1. positive sample's h in direction +X and positive sample's t in -Y
        # 2. negative sample's h in direction -X and negative sample's t in +Y

        ind = np.where(nscores + self.margin > pscores)[0]
        #pdb.set_trace();

        # Increase violation count for entities involved in a negative tuple
        # and in a positive tuple
        list_should_be_updated = set()
        for i in ind:
            unique_entities = list(set([sn[i], on[i], sp[i], op[i]]))
            for u in unique_entities:
                self.E.violations[u] += 1
                list_should_be_updated.add(u)
            
        self.nviolations = len(ind)

        #num_should_be_updated = len(list_should_be_updated)
        #pdb.set_trace()
        # all examples in batch satify margin criterion
        if len(ind) == 0:
            return

        sp = list(sp[ind])
        sn = list(sn[ind])
        pp = list(pp[ind])
        pn = list(pn[ind])
        op = list(op[ind])
        on = list(on[ind])

        #pg = self.E[sp] + self.R[pp] - self.E[op]
        #ng = self.E[sn] + self.R[pn] - self.E[on]
        #pdb.set_trace()
        pg = self.E[op] - self.R[pp] - self.E[sp]
        ng = self.E[on] - self.R[pn] - self.E[sn]
        #pdb.set_trace()

        if self.l1:
            # This part is crucial to understand the derivatives.
            # Because we are doing L1 norm in the score function, Partial derivative of any component (x1) is going to be 1
            # Here pg is the positive gradient, but because we already did +t-h-l (+o-p-s), 
            # we need to inverse the signs of derivatives (i.e. +1 for negative value and -1 for a positive)
            # The sign is nothing but direction we want to move the vector to. 
            # For ng, which is a negative gradient, derivatives correspond to the sign of components, because
            # the negative gradient is supposed to be +t-l-h (+o-p-s)
            pg = np.sign(-pg)
            #ng = -np.sign(-ng)
            ng = np.sign(ng)
        else:
            # Compute L2 norm derivatives which is (h1 + l1 - t1)
            pg = -pg
            ng = ng
            #raise NotImplementedError()


        # entity gradients
        # Sum of sp, op, sn, on = 4 X number of violating tuples
        #pdb.set_trace();
        eidx, Sm, n = grad_sum_matrix(sp + op + sn + on)
        #pdb.set_trace();
        # eidx is the array/list containing all unique entities
        # Sm has number of rows = eidx's length

        # dividing by n is the normalization
        # n contains the list of row sums of matrix Sm
        # This ensures that all values are x such that -1 <= x <=1 
        ge = Sm.dot(np.vstack((pg, -pg, ng, -ng))) / n

        '''
        Sm.shape = 5046 X 10932 
        G = np.vstack(pg,-pg,ng,-ng)
        pg.shape = (10932/4) X 5 (where 5 is number of components in the vector)


        Sm.dot(G) = matrix of shape (5046 X 5) = ge
        Here we have gradients for 5046 entity vectors that will be updated with AdaGrad
        update function

        '''
        #pdb.set_trace();

        # Add the gradient vectors to the list of all gradients for entities and relations
        # This is for instrumentation purpose.
        #for e,g in zip(eidx,ge):
        #    self.E.updateVectors[e].append(g)


        # relation gradients
        ridx, Sm, n = grad_sum_matrix(pp + pn)
        #pdb.set_trace();
        gr = Sm.dot(np.vstack((pg, ng))) / n

        #for r,g in zip(ridx, gr):
        #    self.R.updateVectors[r].append(g)
        #pdb.set_trace();
        return {'E': (ge, eidx), 'R': (gr, ridx)}
Esempio n. 13
0
    def _pairwise_gradients(self, pxs, nxs):
        # indices of positive examples
        sp, pp, op = unzip_triples(pxs)
        # indices of negative examples
        sn, pn, on = unzip_triples(nxs)

        pxs, _ = np.array(list(zip(*pxs)))
        nxs, _ = np.array(list(zip(*nxs)))

        # define memoized functions to cache expensive dot products
        # helps only if xys have repeated (s,p) or (p,o) pairs
        # this happens with sampling
        @memoized
        def _EW(s, o, p): return dot(self.E[s], self.W[p])

        @memoized
        def _WE(s, o, p): return dot(self.W[p], self.E[o])

        WEp = np.array([_WE(*x) for x in pxs])
        WEn = np.array([_WE(*x) for x in nxs])
        pscores = self.af.f(np.sum(self.E[sp] * WEp, axis=1))
        nscores = self.af.f(np.sum(self.E[sn] * WEn, axis=1))

        #print("avg = %f/%f, min = %f/%f, max = %f/%f" % (pscores.mean(), nscores.mean(), pscores.min(), nscores.min(), pscores.max(), nscores.max()))

        # find examples that violate margin
        ind = np.where(nscores + self.margin > pscores)[0]
        self.nviolations = len(ind)
        if len(ind) == 0:
            return

        # aux vars
        gpscores = -self.af.g_given_f(pscores)[:, np.newaxis]
        gnscores = self.af.g_given_f(nscores)[:, np.newaxis]

        pidx = np.unique(list(pp) + list(pn))
        gw = np.zeros((len(pidx), self.ncomp, self.ncomp))
        for pid in range(len(pidx)):
            p = pidx[pid]
            ppidx = np.where(pp == p)
            npidx = np.where(pn == p)
            assert(len(ppidx) == len(npidx))
            if len(ppidx) == 0 and len(npidx) == 0:
                continue
            gw[pid] += dot(self.E[sp[ppidx]].T, gpscores[ppidx] * self.E[op[ppidx]])
            gw[pid] += dot(self.E[sn[npidx]].T, gnscores[npidx] * self.E[on[npidx]])
            gw[pid] += self.rparam * self.W[p]
            gw[pid] /= (len(ppidx) + len(npidx))

        # entity gradients
        sp, sn = list(sp[ind]), list(sn[ind])
        op, on = list(op[ind]), list(on[ind])
        gpscores, gnscores = gpscores[ind], gnscores[ind]
        EWp = np.array([_EW(*x) for x in pxs[ind]])
        EWn = np.array([_EW(*x) for x in nxs[ind]])
        eidx, Sm, n = grad_sum_matrix(sp + sn + op + on)
        ge = (Sm.dot(np.vstack((
            gpscores * WEp[ind], gnscores * WEn[ind],
            gpscores * EWp, gnscores * EWn
        ))) + self.rparam * self.E[eidx]) / n

        return {'E': (ge, eidx), 'W': (gw, pidx)}