Ejemplo n.º 1
0
    def _cross_prod(self):
        s = self.ent_table
        r = self.att_table
        k = self.kfkds
        ns = k[0].shape[0]
        ds = s.shape[1]
        nr = [t.shape[0] for t in self.att_table]
        dr = [t.shape[1] for t in self.att_table]
        if not self.trans:
            if s.size > 0:
                res = self._t_cross(s)
            else:
                res = np.zeros((ns, ns), dtype=float, order='C')
            if all(map(sp.issparse, r)):
                cross_r = [self._t_cross(t).toarray() for t in r]
            else:
                cross_r = [self._t_cross(t) for t in r]
            comp.expand_add(ns, len(k), k, cross_r, nr, res)

            return res
        else:
            if all(map(sp.issparse, self.att_table)):
                other = np.ones((1, ns))
                v = [
                    np.zeros((1, t.shape[0]), dtype=float)
                    for t in self.att_table
                ]
                comp.group(ns, len(k), 1, k, nr, other, v)
                size = self.att_table[0].size
                data = np.empty(size)

                # part 2 and 3 are p.T and p
                comp.multiply_sparse(size, self.att_table[0].row,
                                     self.att_table[0].data, np.sqrt(v[0]),
                                     data)
                diag_part = self._cross(
                    sp.coo_matrix((data, (self.att_table[0].row,
                                          self.att_table[0].col))))
                if ds > 0:
                    m = np.zeros((nr[0], ds))
                    comp.group_left(ns, ds, s, k[0], m)
                    p = self._cross(self.att_table[0], m)
                    s_part = self._cross(self.ent_table)

                    res = sp.vstack((np.hstack(
                        (s_part, p.T)), sp.hstack((p, diag_part))))
                else:
                    res = diag_part

                # multi-table join
                for i in range(1, len(k)):
                    ps = []
                    if ds > 0:
                        m = np.zeros((nr[i], ds))
                        comp.group_left(ns, ds, s, k[i], m)
                        ps += [self._cross(self.att_table[i], m)]

                    # cp (KRi)
                    size = self.att_table[i].size
                    data = np.empty(size)
                    comp.multiply_sparse(size, self.att_table[i].row,
                                         self.att_table[i].data, np.sqrt(v[i]),
                                         data)
                    diag_part = self._cross(
                        sp.coo_matrix((data, (self.att_table[i].row,
                                              self.att_table[i].col))))

                    for j in range(i):
                        ps += [r[i].tocsr()[k[i]].T.dot(r[j].tocsr()[k[j]])]

                    res = sp.vstack((sp.hstack(
                        (res, sp.vstack([p.T for p in ps]))),
                                     sp.hstack(ps + [diag_part])))
            else:
                nt = self.ent_table.shape[1] + sum(
                    [att.shape[1] for att in self.att_table])
                other = np.ones((1, ns))
                v = [
                    np.zeros((1, t.shape[0]), dtype=float)
                    for t in self.att_table
                ]
                res = np.empty((nt, nt))

                data = np.empty(self.att_table[0].shape, order='C')
                comp.group(ns, len(k), 1, k, nr, other, v)
                comp.multiply(self.att_table[0].shape[0],
                              self.att_table[0].shape[1], self.att_table[0],
                              v[0], data)
                res[ds:ds + dr[0], ds:ds + dr[0]] = self._cross(data)

                if ds > 0:
                    m = np.zeros((nr[0], ds))
                    comp.group_left(ns, ds, s, k[0], m)
                    res[ds:ds + dr[0], :ds] = self._cross(self.att_table[0], m)
                    res[:ds, ds:ds + dr[0]] = res[ds:ds + dr[0], :ds].T
                    res[:ds, :ds] = self._cross(self.ent_table)

                # multi-table join
                for i in range(1, len(self.kfkds)):
                    if ds > 0:
                        m = np.zeros((nr[i], ds))
                        comp.group_left(ns, ds, s, k[i], m)
                        ni1 = ds + sum(
                            [t.shape[1] for t in self.att_table[:i]])
                        ni2 = ni1 + self.att_table[i].shape[1]
                        res[ni1:ni2, :ds] = self._cross(self.att_table[i], m)
                        res[:ds, ni1:ni2] = res[ni1:ni2, :ds].T

                    # cp(KRi)
                    data = np.empty(self.att_table[i].shape, order='C')
                    comp.multiply(self.att_table[i].shape[0],
                                  self.att_table[i].shape[1],
                                  self.att_table[i], v[i], data)
                    res[ni1:ni2, ni1:ni2] = self._cross(data)

                    for j in range(i):
                        dj1 = ds + sum(
                            [t.shape[1] for t in self.att_table[:j]])
                        dj2 = dj1 + self.att_table[j].shape[1]

                        if (ns * 1.0 / nr[j]) > (1 + nr[j] * 1.0 / dr[j]):
                            m = np.zeros((nr[i], nr[j]), order='C')
                            comp.group_k_by_k(nr[i], nr[j], ns, k[i], k[j], m)

                            res[ni1:ni2, dj1:dj2] = r[i].T.dot(m.T.dot(r[j]))
                            res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T
                        else:
                            res[ni1:ni2,
                                dj1:dj2] = r[i][k[i]].T.dot(r[j][k[j]])
                            res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T
            return res
Ejemplo n.º 2
0
    def _cross_prod_w(self, w):
        # Calculate X * A * X.T. A is a diagnalized matrix, and w is the array of diagnal of A.

        w = w.astype(float)

        s = self.ent_table
        r = self.att_table
        k = self.kfkds

        ns = k[0].shape[0]
        ds = s.shape[1]
        nr = [t.shape[0] for t in r]
        dr = [t.shape[1] for t in r]

        if not self.trans:
            if s.size > 0:

                res = self._t_cross_w(s, w[0:ds])
            else:
                res = np.zeros((ns, ns), dtype=float, order='C')

            count = ds
            cross_r = []
            for t in r:
                if all(map(sp.issparse, r)):
                    cross_r.append(
                        self._t_cross_w(t,
                                        w[count:count + t.shape[1]]).toarray())
                else:
                    cross_r.append(
                        self._t_cross_w(t, w[count:count + t.shape[1]]))

                count += t.shape[1]

            comp.expand_add(ns, len(k), k, cross_r, nr, res)
        else:

            if all(map(sp.issparse, r)):
                # change the 'other' as weight to group
                other = w.reshape((1, -1)).astype(float)
                s2 = w.reshape(-1, 1) * np.array(s)
                v = [np.zeros((1, t.shape[0]), dtype=float) for t in r]
                comp.group(ns, len(k), 1, k, nr, other, v)
                size = r[0].size
                data = np.empty(size)

                # part 2 and 3 are p.T and p
                comp.multiply_sparse(size, r[0].row, r[0].data, np.sqrt(v[0]),
                                     data)
                diag_part = self._cross(
                    sp.coo_matrix((data, (r[0].row, r[0].col))))
                if ds > 0:
                    m = np.zeros((nr[0], ds))
                    comp.group_left(ns, ds, s2, k[0], m)
                    p = self._cross(r[0], m)
                    s_part = self._cross(s, s2)

                    res = sp.vstack((np.hstack(
                        (s_part, p.T)), sp.hstack((p, diag_part))))
                else:
                    res = diag_part

                # multi-table join
                for i in range(1, len(k)):
                    ps = []
                    if ds > 0:
                        m = np.zeros((nr[i], ds))
                        comp.group_left(ns, ds, s2, k[i], m)
                        ps += [self._cross(r[i], m)]

                    # cp (KRi)
                    size = r[i].size
                    data = np.empty(size)
                    comp.multiply_sparse(size, r[i].row, r[i].data,
                                         np.sqrt(v[i]), data)
                    diag_part = self._cross(
                        sp.coo_matrix((data, (r[i].row, r[i].col))))

                    for j in range(i):
                        ps += [
                            r[i].tocsr()[k[i]].T.dot(
                                r[j].tocsr()[k[j]].multiply(w.reshape(-1, 1)))
                        ]

                    res = sp.vstack((sp.hstack(
                        (res, sp.vstack([p.T for p in ps]))),
                                     sp.hstack(ps + [diag_part])))

            else:
                nt = s.shape[1] + sum([att.shape[1] for att in r])
                other = w.reshape((1, -1)).astype(float)
                s2 = w.reshape(-1, 1) * np.array(s)
                v = [np.zeros((1, t.shape[0]), dtype=float) for t in r]
                res = np.empty((nt, nt))

                data = np.empty(r[0].shape, order='C')
                comp.group(ns, len(k), 1, k, nr, other, v)
                comp.multiply(r[0].shape[0], r[0].shape[1], r[0], v[0], data)
                res[ds:ds + dr[0], ds:ds + dr[0]] = self._cross(data)

                if ds > 0:
                    m = np.zeros((nr[0], ds))
                    comp.group_left(ns, ds, s2, k[0], m)
                    res[ds:ds + dr[0], :ds] = self._cross(r[0], m)
                    res[:ds, ds:ds + dr[0]] = res[ds:ds + dr[0], :ds].T
                    res[:ds, :ds] = self._cross(s, s2)

                # multi-table join
                for i in range(1, len(k)):

                    if ds > 0:
                        m = np.zeros((nr[i], ds))
                        comp.group_left(ns, ds, s2, k[i], m)
                        ni1 = ds + sum([t.shape[1] for t in r[:i]])
                        ni2 = ni1 + r[i].shape[1]
                        res[ni1:ni2, :ds] = self._cross(r[i], m)
                        res[:ds, ni1:ni2] = res[ni1:ni2, :ds].T

                    # cp(KRi)
                    data = np.empty(r[i].shape, order='C')
                    comp.multiply(r[i].shape[0], r[i].shape[1], r[i], v[i],
                                  data)
                    res[ni1:ni2, ni1:ni2] = self._cross(data)

                    for j in range(i):
                        dj1 = ds + sum([t.shape[1] for t in r[:j]])
                        dj2 = dj1 + r[j].shape[1]
                        if (ns * 1.0 / nr[j]) > (1 + nr[j] * 1.0 / dr[j]):
                            m = np.zeros((nr[i], nr[j]), order='C')
                            # Update in comp.cpp. When count the number in each group, add w instead of 1.
                            comp.group_k_by_k_w(nr[i], nr[j], ns, w, k[i],
                                                k[j], m)

                            res[ni1:ni2, dj1:dj2] = r[i].T.dot(m.T.dot(r[j]))
                            res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T
                        else:
                            res[ni1:ni2,
                                dj1:dj2] = (w.reshape(-1, 1) *
                                            np.array(r[i][k[i]])).T.dot(
                                                r[j][k[j]])
                            res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T

        return res
Ejemplo n.º 3
0
    def _cross_prod(self):
        s = self.ent_table
        r = self.att_table
        k = self.kfkds
        ns = k[0].shape[0]
        ds = s.shape[1]
        nr = [t.shape[0] for t in self.att_table]
        dr = [t.shape[1] for t in self.att_table]

        if not self.trans:
            if all(map(sp.issparse, self.att_table)):
                return NotImplemented
            else:
                if s.size > 0:
                    res = self._t_cross(s)
                else:
                    res = np.zeros((ns, ns), dtype=float, order='C')

                if all(map(sp.issparse, r)):
                    cross_r = [self._t_cross(t).toarray() for t in r]
                else:
                    cross_r = [self._t_cross(t) for t in r]
                comp.expand_add(ns, len(k), k, cross_r, nr, res)

                return res

        else:
            if all(map(sp.issparse, self.att_table)):
                other = np.ones((1, ns))
                v = [np.zeros((1, t.shape[0]), dtype=float) for t in self.att_table]
                comp.group(ns, len(k), 1, k, nr, other, v)
                size = self.att_table[0].size
                data = np.empty(size)

                # part 2 and 3 are p.T and p
                comp.multiply_sparse(size, self.att_table[0].row, self.att_table[0].data, np.sqrt(v[0]), data)
                diag_part = self._cross(sp.coo_matrix((data, (self.att_table[0].row, self.att_table[0].col))))
                if ds > 0:
                    m = np.zeros((nr[0], ds))
                    comp.group_left(ns, ds, s, k[0], m)
                    p = self._cross(self.att_table[0], m)
                    s_part = self._cross(self.ent_table)

                    res = sp.vstack((np.hstack((s_part, p.T)), sp.hstack((p, diag_part))))
                else:
                    res = diag_part

                # multi-table join
                for i in range(1, len(k)):
                    ps = []
                    if ds > 0:
                        m = np.zeros((nr[i], ds))
                        comp.group_left(ns, ds, s, k[i], m)
                        ps += [self._cross(self.att_table[i], m)]

                    # cp (KRi)
                    size = self.att_table[i].size
                    data = np.empty(size)
                    comp.multiply_sparse(size, self.att_table[i].row, self.att_table[i].data, np.sqrt(v[i]), data)
                    diag_part = self._cross(sp.coo_matrix((data, (self.att_table[i].row, self.att_table[i].col))))

                    for j in range(i):
                        ps += [r[i].tocsr()[k[i]].T.dot(r[j].tocsr()[k[j]])]

                    res = sp.vstack((sp.hstack((res, sp.vstack([p.T for p in ps]))), sp.hstack(ps + [diag_part])))
            else:
                s = np.ascontiguousarray(s)
                if self.second_order:
                    nt = self.shape[0]
                    other = np.ones((1, ns))
                    v = [np.zeros((1, t.shape[0]), dtype=float) for t in r]

                    res = np.empty((nt, nt))
                    comp.group(ns, len(k), 1, k, nr, other, v)
                    data = sp.diags(np.sqrt(v[0]), [0]) * r[0]
                    res[ds:ds+dr[0], ds:ds+dr[0]] = self._cross(data)

                    if ds > 0:
                        # p1
                        m1 = np.zeros((nr[0], ds), dtype=float)
                        comp.group_left(ns, ds, s, k[0], m1)
                        res[ds:ds + dr[0], :ds] = r[0].T.dot(m1)
                        res[:ds, ds:ds + dr[0]] = res[ds:ds + dr[0], :ds].T
                        # s
                        res[:ds, :ds] = self._cross(self.ent_table)

                        r_p = base.data_interaction_rr(self.shadow_ent_table, self.shadow_att_table[0][k[0]])
                        # p2
                        res[:ds, ds+dr[0]:] = s.T.dot(r_p)
                        res[ds+dr[0]:, :ds] = res[:ds, ds+dr[0]:].T
                        # p3
                        m = np.zeros((nr[0], self.shadow_ent_table.shape[1]), dtype=float)
                        comp.group_left(ns, self.shadow_ent_table.shape[1], self.shadow_ent_table, k[0], m)
                        ksr = base.data_interaction_rr(m, self.shadow_att_table[0])
                        res[ds:ds+dr[0], ds+dr[0]:] = self._cross(r[0], ksr)
                        res[ds+dr[0]:, ds:ds+dr[0]] = res[ds:ds+dr[0], ds+dr[0]:].T
                        rrx = base.data_interaction_rr(self.shadow_att_table[0], self.shadow_att_table[0])
                        ssx = base.data_interaction_rr(self.shadow_ent_table, self.shadow_ent_table)
                        dss = self.shadow_ent_table.shape[1] * self.shadow_ent_table.shape[1]
                        kss = np.zeros((nr[0], dss), dtype=float)
                        comp.group_left(ns, dss, ssx, k[0], kss)
                        res[ds + dr[0]:, ds + dr[0]:] = rrx.T.dot(kss).reshape((r_p.shape[1], r_p.shape[1]))
                else:
                    nt = self.ent_table.shape[1] + self.att_table[0].shape[1]
                    other = np.ones((1, ns))
                    v = [np.zeros((1, t.shape[0]), dtype=float) for t in self.att_table]
                    comp.group(ns, len(k), 1, k, nr, other, v)

                    res = np.empty((nt, nt))
                    data = sp.diags(np.sqrt(v[0]), [0]) * r[0]
                    res[ds:, ds:] = self._cross(data)
                    if ds > 0:
                        m = np.zeros((nr[0], ds))
                        comp.group_left(ns, ds, s, k[0], m)
                        res[ds:, :ds] = self._cross(self.att_table[0], m)
                        res[:ds, ds:] = res[ds:, :ds].T
                        res[:ds, :ds] = self._cross(self.ent_table)

                    # multi-table join
                    for i in range(1, len(self.kfkds)):
                        if ds > 0:
                            m = np.zeros((nr[i], ds))
                            comp.group_left(ns, ds, s, k[i], m)
                            ni1 = ds + sum([t.shape[1] for t in self.att_table[:i]])
                            ni2 = ni1 + self.att_table[i].shape[1]
                            res[ni1:ni2, :ds] = self._cross(self.att_table[i], m)
                            res[:ds, ni1:ni2] = res[ni1:ni2, :ds].T

                        # cp(KRi)
                        data = np.empty(self.att_table[i].shape, order='C')
                        comp.multiply(self.att_table[i].shape[0], self.att_table[i].shape[1], self.att_table[i], v[i],
                                      data)
                        res[ni1:ni2, ni1:ni2] = self._cross(data)

                        for j in range(i):
                            dj1 = ds + sum([t.shape[1] for t in self.att_table[:j]])
                            dj2 = dj1 + self.att_table[j].shape[1]

                            if (ns * 1.0 / nr[j]) > (1 + nr[j] * 1.0 / dr[j]):
                                m = np.zeros((nr[i], nr[j]), order='C')
                                comp.group_k_by_k(nr[i], nr[j], ns, k[i], k[j], m)

                                res[ni1:ni2, dj1:dj2] = r[i].T.dot(m.T.dot(r[j]))
                                res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T
                            else:
                                res[ni1:ni2, dj1:dj2] = r[i][k[i]].T.dot(r[j][k[j]])
                                res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T

            if self.a != 1.0:
                res = res * np.power(self.a, 2)
            if self.b != 0.0:
                return NotImplemented

            return res
Ejemplo n.º 4
0
    def _cross_prod_hess(self, w):
        """Calculate X * A * X.T. A is a diagnalized matrix, and w is the array of diagnal of A.

        Parameters
        ----------
        w : ndarray, shape (num_samples,)
            array of diagnal of A

        Returns
        -------
        res : X * A * X.T

        Examples
        --------
        T = Entity Table:
                [[ 1.  2.]
                 [ 4.  3.]
                 [ 5.  6.]
                 [ 8.  7.]
                 [ 9.  1.]]
            Attribute Table:
                [[ 1.1  2.2]
                 [ 3.3  4.4]]
            K:
                [[0, 1, 1, 0, 1]]
        >>> T._cross_prod_hess(np.arange(5))
            [[ 582.,  276.,  174.,  248.],
             [ 276.,  232.,   78.,  118.],
             [ 174.,   78.,   66.,   90.],
             [ 248.,  118.,   90.,  124.]]

        """

        w = w.astype(float)
        s = self.ent_table
        r = self.att_table
        k = self.kfkds
        ns = k[0].shape[0]
        ds = s.shape[1]
        nr = [t.shape[0] for t in r]
        dr = [t.shape[1] for t in r]

        if not self.trans:
            if s.size > 0:
                res = self._t_cross_w(s, w[0:ds])
            else:
                res = np.zeros((ns, ns), dtype=float, order='C')
            count = ds
            cross_r = []
            for t in r:
                if all(map(sp.issparse, r)):
                    cross_r.append(
                        self._t_cross_w(t,
                                        w[count:count + t.shape[1]]).toarray())
                else:
                    cross_r.append(
                        self._t_cross_w(t, w[count:count + t.shape[1]]))
                count += t.shape[1]
            comp.expand_add(ns, len(k), k, cross_r, nr, res)
        else:

            if all(map(sp.issparse, r)):
                # change the 'other' as weight to group
                other = w.reshape((1, -1)).astype(float)
                s2 = w.reshape(-1, 1) * np.array(s)
                v = [np.zeros((1, t.shape[0]), dtype=float) for t in r]
                comp.group(ns, len(k), 1, k, nr, other, v)
                size = r[0].size
                data = np.empty(size)

                comp.multiply_sparse(size, r[0].row, r[0].data, np.sqrt(v[0]),
                                     data)
                diag_part = self._cross(
                    sp.coo_matrix((data, (r[0].row, r[0].col))))
                if ds > 0:
                    m = np.zeros((nr[0], ds))
                    comp.group_left(ns, ds, s2, k[0], m)
                    p = self._cross(r[0], m)
                    s_part = self._cross(s, s2)

                    res = sp.vstack((np.hstack(
                        (s_part, p.T)), sp.hstack((p, diag_part))))
                else:
                    res = diag_part

                # multi-table join
                for i in range(1, len(k)):
                    ps = []
                    if ds > 0:
                        m = np.zeros((nr[i], ds))
                        comp.group_left(ns, ds, s2, k[i], m)
                        ps += [self._cross(r[i], m)]

                    size = r[i].size
                    data = np.empty(size)
                    comp.multiply_sparse(size, r[i].row, r[i].data,
                                         np.sqrt(v[i]), data)
                    diag_part = self._cross(
                        sp.coo_matrix((data, (r[i].row, r[i].col))))

                    for j in range(i):
                        ps += [
                            r[i].tocsr()[k[i]].T.dot(
                                r[j].tocsr()[k[j]].multiply(w.reshape(-1, 1)))
                        ]

                    res = sp.vstack((sp.hstack(
                        (res, sp.vstack([p.T for p in ps]))),
                                     sp.hstack(ps + [diag_part])))

            else:
                nt = s.shape[1] + sum([att.shape[1] for att in r])
                other = w.reshape((1, -1)).astype(float)
                s2 = w.reshape(-1, 1) * np.array(s)
                v = [np.zeros((1, t.shape[0]), dtype=float) for t in r]
                res = np.empty((nt, nt))

                data = np.empty(r[0].shape, order='C')
                comp.group(ns, len(k), 1, k, nr, other, v)
                comp.multiply(r[0].shape[0], r[0].shape[1], r[0], v[0], data)
                res[ds:ds + dr[0], ds:ds + dr[0]] = self._cross(data)

                if ds > 0:
                    m = np.zeros((nr[0], ds))
                    comp.group_left(ns, ds, s2, k[0], m)
                    res[ds:ds + dr[0], :ds] = self._cross(r[0], m)
                    res[:ds, ds:ds + dr[0]] = res[ds:ds + dr[0], :ds].T
                    res[:ds, :ds] = self._cross(s, s2)

                # multi-table join
                for i in range(1, len(k)):

                    if ds > 0:
                        m = np.zeros((nr[i], ds))
                        comp.group_left(ns, ds, s2, k[i], m)
                        ni1 = ds + sum([t.shape[1] for t in r[:i]])
                        ni2 = ni1 + r[i].shape[1]
                        res[ni1:ni2, :ds] = self._cross(r[i], m)
                        res[:ds, ni1:ni2] = res[ni1:ni2, :ds].T

                    data = np.empty(r[i].shape, order='C')
                    comp.multiply(r[i].shape[0], r[i].shape[1], r[i], v[i],
                                  data)
                    res[ni1:ni2, ni1:ni2] = self._cross(data)

                    for j in range(i):
                        dj1 = ds + sum([t.shape[1] for t in r[:j]])
                        dj2 = dj1 + r[j].shape[1]
                        if (ns * 1.0 / nr[j]) > (1 + nr[j] * 1.0 / dr[j]):
                            m = np.zeros((nr[i], nr[j]), order='C')
                            # Update in comp.cpp. When count the number in each group, add w instead of 1.
                            comp.group_k_by_k_w(nr[i], nr[j], ns, w, k[i],
                                                k[j], m)

                            res[ni1:ni2, dj1:dj2] = r[i].T.dot(m.T.dot(r[j]))
                            res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T
                        else:
                            res[ni1:ni2,
                                dj1:dj2] = (w.reshape(-1, 1) *
                                            np.array(r[i][k[i]])).T.dot(
                                                r[j][k[j]])
                            res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T

        return res