def _cross_prod(self): s = self.ent_table r = self.att_table k = self.kfkds ns = k[0].shape[0] ds = s.shape[1] nr = [t.shape[0] for t in self.att_table] dr = [t.shape[1] for t in self.att_table] if not self.trans: if s.size > 0: res = self._t_cross(s) else: res = np.zeros((ns, ns), dtype=float, order='C') if all(map(sp.issparse, r)): cross_r = [self._t_cross(t).toarray() for t in r] else: cross_r = [self._t_cross(t) for t in r] comp.expand_add(ns, len(k), k, cross_r, nr, res) return res else: if all(map(sp.issparse, self.att_table)): other = np.ones((1, ns)) v = [ np.zeros((1, t.shape[0]), dtype=float) for t in self.att_table ] comp.group(ns, len(k), 1, k, nr, other, v) size = self.att_table[0].size data = np.empty(size) # part 2 and 3 are p.T and p comp.multiply_sparse(size, self.att_table[0].row, self.att_table[0].data, np.sqrt(v[0]), data) diag_part = self._cross( sp.coo_matrix((data, (self.att_table[0].row, self.att_table[0].col)))) if ds > 0: m = np.zeros((nr[0], ds)) comp.group_left(ns, ds, s, k[0], m) p = self._cross(self.att_table[0], m) s_part = self._cross(self.ent_table) res = sp.vstack((np.hstack( (s_part, p.T)), sp.hstack((p, diag_part)))) else: res = diag_part # multi-table join for i in range(1, len(k)): ps = [] if ds > 0: m = np.zeros((nr[i], ds)) comp.group_left(ns, ds, s, k[i], m) ps += [self._cross(self.att_table[i], m)] # cp (KRi) size = self.att_table[i].size data = np.empty(size) comp.multiply_sparse(size, self.att_table[i].row, self.att_table[i].data, np.sqrt(v[i]), data) diag_part = self._cross( sp.coo_matrix((data, (self.att_table[i].row, self.att_table[i].col)))) for j in range(i): ps += [r[i].tocsr()[k[i]].T.dot(r[j].tocsr()[k[j]])] res = sp.vstack((sp.hstack( (res, sp.vstack([p.T for p in ps]))), sp.hstack(ps + [diag_part]))) else: nt = self.ent_table.shape[1] + sum( [att.shape[1] for att in self.att_table]) other = np.ones((1, ns)) v = [ np.zeros((1, t.shape[0]), dtype=float) for t in self.att_table ] res = np.empty((nt, nt)) data = np.empty(self.att_table[0].shape, order='C') comp.group(ns, len(k), 1, k, nr, other, v) comp.multiply(self.att_table[0].shape[0], self.att_table[0].shape[1], self.att_table[0], v[0], data) res[ds:ds + dr[0], ds:ds + dr[0]] = self._cross(data) if ds > 0: m = np.zeros((nr[0], ds)) comp.group_left(ns, ds, s, k[0], m) res[ds:ds + dr[0], :ds] = self._cross(self.att_table[0], m) res[:ds, ds:ds + dr[0]] = res[ds:ds + dr[0], :ds].T res[:ds, :ds] = self._cross(self.ent_table) # multi-table join for i in range(1, len(self.kfkds)): if ds > 0: m = np.zeros((nr[i], ds)) comp.group_left(ns, ds, s, k[i], m) ni1 = ds + sum( [t.shape[1] for t in self.att_table[:i]]) ni2 = ni1 + self.att_table[i].shape[1] res[ni1:ni2, :ds] = self._cross(self.att_table[i], m) res[:ds, ni1:ni2] = res[ni1:ni2, :ds].T # cp(KRi) data = np.empty(self.att_table[i].shape, order='C') comp.multiply(self.att_table[i].shape[0], self.att_table[i].shape[1], self.att_table[i], v[i], data) res[ni1:ni2, ni1:ni2] = self._cross(data) for j in range(i): dj1 = ds + sum( [t.shape[1] for t in self.att_table[:j]]) dj2 = dj1 + self.att_table[j].shape[1] if (ns * 1.0 / nr[j]) > (1 + nr[j] * 1.0 / dr[j]): m = np.zeros((nr[i], nr[j]), order='C') comp.group_k_by_k(nr[i], nr[j], ns, k[i], k[j], m) res[ni1:ni2, dj1:dj2] = r[i].T.dot(m.T.dot(r[j])) res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T else: res[ni1:ni2, dj1:dj2] = r[i][k[i]].T.dot(r[j][k[j]]) res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T return res
def _cross_prod(self): s = self.ent_table r = self.att_table k = self.kfkds ns = k[0].shape[0] ds = s.shape[1] nr = [t.shape[0] for t in self.att_table] dr = [t.shape[1] for t in self.att_table] if not self.trans: if all(map(sp.issparse, self.att_table)): return NotImplemented else: if s.size > 0: res = self._t_cross(s) else: res = np.zeros((ns, ns), dtype=float, order='C') if all(map(sp.issparse, r)): cross_r = [self._t_cross(t).toarray() for t in r] else: cross_r = [self._t_cross(t) for t in r] comp.expand_add(ns, len(k), k, cross_r, nr, res) return res else: if all(map(sp.issparse, self.att_table)): other = np.ones((1, ns)) v = [np.zeros((1, t.shape[0]), dtype=float) for t in self.att_table] comp.group(ns, len(k), 1, k, nr, other, v) size = self.att_table[0].size data = np.empty(size) # part 2 and 3 are p.T and p comp.multiply_sparse(size, self.att_table[0].row, self.att_table[0].data, np.sqrt(v[0]), data) diag_part = self._cross(sp.coo_matrix((data, (self.att_table[0].row, self.att_table[0].col)))) if ds > 0: m = np.zeros((nr[0], ds)) comp.group_left(ns, ds, s, k[0], m) p = self._cross(self.att_table[0], m) s_part = self._cross(self.ent_table) res = sp.vstack((np.hstack((s_part, p.T)), sp.hstack((p, diag_part)))) else: res = diag_part # multi-table join for i in range(1, len(k)): ps = [] if ds > 0: m = np.zeros((nr[i], ds)) comp.group_left(ns, ds, s, k[i], m) ps += [self._cross(self.att_table[i], m)] # cp (KRi) size = self.att_table[i].size data = np.empty(size) comp.multiply_sparse(size, self.att_table[i].row, self.att_table[i].data, np.sqrt(v[i]), data) diag_part = self._cross(sp.coo_matrix((data, (self.att_table[i].row, self.att_table[i].col)))) for j in range(i): ps += [r[i].tocsr()[k[i]].T.dot(r[j].tocsr()[k[j]])] res = sp.vstack((sp.hstack((res, sp.vstack([p.T for p in ps]))), sp.hstack(ps + [diag_part]))) else: s = np.ascontiguousarray(s) if self.second_order: nt = self.shape[0] other = np.ones((1, ns)) v = [np.zeros((1, t.shape[0]), dtype=float) for t in r] res = np.empty((nt, nt)) comp.group(ns, len(k), 1, k, nr, other, v) data = sp.diags(np.sqrt(v[0]), [0]) * r[0] res[ds:ds+dr[0], ds:ds+dr[0]] = self._cross(data) if ds > 0: # p1 m1 = np.zeros((nr[0], ds), dtype=float) comp.group_left(ns, ds, s, k[0], m1) res[ds:ds + dr[0], :ds] = r[0].T.dot(m1) res[:ds, ds:ds + dr[0]] = res[ds:ds + dr[0], :ds].T # s res[:ds, :ds] = self._cross(self.ent_table) r_p = base.data_interaction_rr(self.shadow_ent_table, self.shadow_att_table[0][k[0]]) # p2 res[:ds, ds+dr[0]:] = s.T.dot(r_p) res[ds+dr[0]:, :ds] = res[:ds, ds+dr[0]:].T # p3 m = np.zeros((nr[0], self.shadow_ent_table.shape[1]), dtype=float) comp.group_left(ns, self.shadow_ent_table.shape[1], self.shadow_ent_table, k[0], m) ksr = base.data_interaction_rr(m, self.shadow_att_table[0]) res[ds:ds+dr[0], ds+dr[0]:] = self._cross(r[0], ksr) res[ds+dr[0]:, ds:ds+dr[0]] = res[ds:ds+dr[0], ds+dr[0]:].T rrx = base.data_interaction_rr(self.shadow_att_table[0], self.shadow_att_table[0]) ssx = base.data_interaction_rr(self.shadow_ent_table, self.shadow_ent_table) dss = self.shadow_ent_table.shape[1] * self.shadow_ent_table.shape[1] kss = np.zeros((nr[0], dss), dtype=float) comp.group_left(ns, dss, ssx, k[0], kss) res[ds + dr[0]:, ds + dr[0]:] = rrx.T.dot(kss).reshape((r_p.shape[1], r_p.shape[1])) else: nt = self.ent_table.shape[1] + self.att_table[0].shape[1] other = np.ones((1, ns)) v = [np.zeros((1, t.shape[0]), dtype=float) for t in self.att_table] comp.group(ns, len(k), 1, k, nr, other, v) res = np.empty((nt, nt)) data = sp.diags(np.sqrt(v[0]), [0]) * r[0] res[ds:, ds:] = self._cross(data) if ds > 0: m = np.zeros((nr[0], ds)) comp.group_left(ns, ds, s, k[0], m) res[ds:, :ds] = self._cross(self.att_table[0], m) res[:ds, ds:] = res[ds:, :ds].T res[:ds, :ds] = self._cross(self.ent_table) # multi-table join for i in range(1, len(self.kfkds)): if ds > 0: m = np.zeros((nr[i], ds)) comp.group_left(ns, ds, s, k[i], m) ni1 = ds + sum([t.shape[1] for t in self.att_table[:i]]) ni2 = ni1 + self.att_table[i].shape[1] res[ni1:ni2, :ds] = self._cross(self.att_table[i], m) res[:ds, ni1:ni2] = res[ni1:ni2, :ds].T # cp(KRi) data = np.empty(self.att_table[i].shape, order='C') comp.multiply(self.att_table[i].shape[0], self.att_table[i].shape[1], self.att_table[i], v[i], data) res[ni1:ni2, ni1:ni2] = self._cross(data) for j in range(i): dj1 = ds + sum([t.shape[1] for t in self.att_table[:j]]) dj2 = dj1 + self.att_table[j].shape[1] if (ns * 1.0 / nr[j]) > (1 + nr[j] * 1.0 / dr[j]): m = np.zeros((nr[i], nr[j]), order='C') comp.group_k_by_k(nr[i], nr[j], ns, k[i], k[j], m) res[ni1:ni2, dj1:dj2] = r[i].T.dot(m.T.dot(r[j])) res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T else: res[ni1:ni2, dj1:dj2] = r[i][k[i]].T.dot(r[j][k[j]]) res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T if self.a != 1.0: res = res * np.power(self.a, 2) if self.b != 0.0: return NotImplemented return res
def _cross_prod_w(self, w): # Calculate X * A * X.T. A is a diagnalized matrix, and w is the array of diagnal of A. w = w.astype(float) s = self.ent_table r = self.att_table k = self.kfkds ns = k[0].shape[0] ds = s.shape[1] nr = [t.shape[0] for t in r] dr = [t.shape[1] for t in r] if not self.trans: if s.size > 0: res = self._t_cross_w(s, w[0:ds]) else: res = np.zeros((ns, ns), dtype=float, order='C') count = ds cross_r = [] for t in r: if all(map(sp.issparse, r)): cross_r.append( self._t_cross_w(t, w[count:count + t.shape[1]]).toarray()) else: cross_r.append( self._t_cross_w(t, w[count:count + t.shape[1]])) count += t.shape[1] comp.expand_add(ns, len(k), k, cross_r, nr, res) else: if all(map(sp.issparse, r)): # change the 'other' as weight to group other = w.reshape((1, -1)).astype(float) s2 = w.reshape(-1, 1) * np.array(s) v = [np.zeros((1, t.shape[0]), dtype=float) for t in r] comp.group(ns, len(k), 1, k, nr, other, v) size = r[0].size data = np.empty(size) # part 2 and 3 are p.T and p comp.multiply_sparse(size, r[0].row, r[0].data, np.sqrt(v[0]), data) diag_part = self._cross( sp.coo_matrix((data, (r[0].row, r[0].col)))) if ds > 0: m = np.zeros((nr[0], ds)) comp.group_left(ns, ds, s2, k[0], m) p = self._cross(r[0], m) s_part = self._cross(s, s2) res = sp.vstack((np.hstack( (s_part, p.T)), sp.hstack((p, diag_part)))) else: res = diag_part # multi-table join for i in range(1, len(k)): ps = [] if ds > 0: m = np.zeros((nr[i], ds)) comp.group_left(ns, ds, s2, k[i], m) ps += [self._cross(r[i], m)] # cp (KRi) size = r[i].size data = np.empty(size) comp.multiply_sparse(size, r[i].row, r[i].data, np.sqrt(v[i]), data) diag_part = self._cross( sp.coo_matrix((data, (r[i].row, r[i].col)))) for j in range(i): ps += [ r[i].tocsr()[k[i]].T.dot( r[j].tocsr()[k[j]].multiply(w.reshape(-1, 1))) ] res = sp.vstack((sp.hstack( (res, sp.vstack([p.T for p in ps]))), sp.hstack(ps + [diag_part]))) else: nt = s.shape[1] + sum([att.shape[1] for att in r]) other = w.reshape((1, -1)).astype(float) s2 = w.reshape(-1, 1) * np.array(s) v = [np.zeros((1, t.shape[0]), dtype=float) for t in r] res = np.empty((nt, nt)) data = np.empty(r[0].shape, order='C') comp.group(ns, len(k), 1, k, nr, other, v) comp.multiply(r[0].shape[0], r[0].shape[1], r[0], v[0], data) res[ds:ds + dr[0], ds:ds + dr[0]] = self._cross(data) if ds > 0: m = np.zeros((nr[0], ds)) comp.group_left(ns, ds, s2, k[0], m) res[ds:ds + dr[0], :ds] = self._cross(r[0], m) res[:ds, ds:ds + dr[0]] = res[ds:ds + dr[0], :ds].T res[:ds, :ds] = self._cross(s, s2) # multi-table join for i in range(1, len(k)): if ds > 0: m = np.zeros((nr[i], ds)) comp.group_left(ns, ds, s2, k[i], m) ni1 = ds + sum([t.shape[1] for t in r[:i]]) ni2 = ni1 + r[i].shape[1] res[ni1:ni2, :ds] = self._cross(r[i], m) res[:ds, ni1:ni2] = res[ni1:ni2, :ds].T # cp(KRi) data = np.empty(r[i].shape, order='C') comp.multiply(r[i].shape[0], r[i].shape[1], r[i], v[i], data) res[ni1:ni2, ni1:ni2] = self._cross(data) for j in range(i): dj1 = ds + sum([t.shape[1] for t in r[:j]]) dj2 = dj1 + r[j].shape[1] if (ns * 1.0 / nr[j]) > (1 + nr[j] * 1.0 / dr[j]): m = np.zeros((nr[i], nr[j]), order='C') # Update in comp.cpp. When count the number in each group, add w instead of 1. comp.group_k_by_k_w(nr[i], nr[j], ns, w, k[i], k[j], m) res[ni1:ni2, dj1:dj2] = r[i].T.dot(m.T.dot(r[j])) res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T else: res[ni1:ni2, dj1:dj2] = (w.reshape(-1, 1) * np.array(r[i][k[i]])).T.dot( r[j][k[j]]) res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T return res
def _right_matrix_multiplication(self, n_matrix, other, permute=None): other = other.astype(np.float, order='C') s = n_matrix.ent_table k = n_matrix.kfkds r = n_matrix.att_table ns = k[0].shape[0] nr = [t.shape[0] for t in r] nk = len(k) nw = other.shape[0] res = [np.zeros((nw, t.shape[0]), dtype=float) for t in r] comp.group(ns, nk, nw, k, nr, other, res) rr = [] if self.second_order: if self.shadow_ent_table is not None: for i in range(len(r)): # sr ss = self.shadow_ent_table r1 = self.shadow_att_table[i] k1 = self.kfkds[i] # TODO: 1. avoid materializing a map 2. use 2d array in transformation if not sp.issparse(s) and not sp.issparse(r1): u = np.zeros((r1.shape[0], ss.shape[1] * nw), dtype=float, order='C') comp.group_left(ss.shape[0], ss.shape[1] * nw, np.ascontiguousarray(base.data_interaction_rr(ss, np.matrix(other).T)), k1, u) if permute is None: rr += [np.matrix(base.data_interaction_rr(r1, u).sum(axis=0).reshape(nw, -1))] else: rr += [np.matrix(base.data_interaction_rr(r1, u).sum(axis=0)[permute].reshape(nw, -1))] else: u = np.zeros((nw, r1.shape[1] * ss.shape[1]), dtype=float, order='C') comp.data_interaction_group_sparse(ns, len(ss.data), ss.shape[0], ss.shape[1], len(r1.data), r1.shape[0], r1.shape[1], nw, ss.row, ss.col, ss.data, r1.row, r1.col, r1.data, k1, k1, np.ascontiguousarray(other), u) for i in range(len(r)): for j in range(i+1, len(r)): r1, r2 = self.shadow_att_table[i], self.shadow_att_table[j] k1, k2 = self.kfkds[i], self.kfkds[j] r1, r2 = r2, r1 k1, k2 = k2, k1 if not sp.issparse(r1) and not sp.issparse(r2): u = np.zeros((r2.shape[0], r1.shape[1] * nw), dtype=float, order='C') comp.data_interaction_group(ns, r1.shape[1], nw, k1, k2, r1, np.ascontiguousarray(other), u) rr += [np.matrix(base.data_interaction_rr(r2, u).sum(axis=0).reshape(nw, -1))] else: u = np.zeros((nw, r1.shape[1] * r2.shape[1]), dtype=float, order='C') r1 = r1.tocsr()[k1].tocoo() comp.data_interaction_group_sparse(ns, len(r1.data), r1.shape[0], r1.shape[1], len(r2.data), r2.shape[0], r2.shape[1], nw, r1.row, r1.col, r1.data, r2.row, r2.col, r2.data, k1, k2, np.ascontiguousarray(other), u) rr += [u] if self.identity: l = ([other * s if sp.issparse(s) else other.dot(s)] if s.shape[1] != 0 else []) for i, t in enumerate(r): l.append(np.matrix(res[i])) l.append(res[i] * t if sp.issparse(t) else res[i].dot(t)) l += rr total = np.hstack(l) else: total = np.hstack(([other * s if sp.issparse(s) else other.dot(s)] if s.shape[1] != 0 else []) + [(res[i] * t if sp.issparse(t) else res[i].dot(t)) for i, t in enumerate(r)] + rr) if self.a != 1.0: total = total * self.a if self.b != 0.0: total = total + self.b * other.sum(axis=1) return total
def _cross_prod_hess(self, w): """Calculate X * A * X.T. A is a diagnalized matrix, and w is the array of diagnal of A. Parameters ---------- w : ndarray, shape (num_samples,) array of diagnal of A Returns ------- res : X * A * X.T Examples -------- T = Entity Table: [[ 1. 2.] [ 4. 3.] [ 5. 6.] [ 8. 7.] [ 9. 1.]] Attribute Table: [[ 1.1 2.2] [ 3.3 4.4]] K: [[0, 1, 1, 0, 1]] >>> T._cross_prod_hess(np.arange(5)) [[ 582., 276., 174., 248.], [ 276., 232., 78., 118.], [ 174., 78., 66., 90.], [ 248., 118., 90., 124.]] """ w = w.astype(float) s = self.ent_table r = self.att_table k = self.kfkds ns = k[0].shape[0] ds = s.shape[1] nr = [t.shape[0] for t in r] dr = [t.shape[1] for t in r] if not self.trans: if s.size > 0: res = self._t_cross_w(s, w[0:ds]) else: res = np.zeros((ns, ns), dtype=float, order='C') count = ds cross_r = [] for t in r: if all(map(sp.issparse, r)): cross_r.append( self._t_cross_w(t, w[count:count + t.shape[1]]).toarray()) else: cross_r.append( self._t_cross_w(t, w[count:count + t.shape[1]])) count += t.shape[1] comp.expand_add(ns, len(k), k, cross_r, nr, res) else: if all(map(sp.issparse, r)): # change the 'other' as weight to group other = w.reshape((1, -1)).astype(float) s2 = w.reshape(-1, 1) * np.array(s) v = [np.zeros((1, t.shape[0]), dtype=float) for t in r] comp.group(ns, len(k), 1, k, nr, other, v) size = r[0].size data = np.empty(size) comp.multiply_sparse(size, r[0].row, r[0].data, np.sqrt(v[0]), data) diag_part = self._cross( sp.coo_matrix((data, (r[0].row, r[0].col)))) if ds > 0: m = np.zeros((nr[0], ds)) comp.group_left(ns, ds, s2, k[0], m) p = self._cross(r[0], m) s_part = self._cross(s, s2) res = sp.vstack((np.hstack( (s_part, p.T)), sp.hstack((p, diag_part)))) else: res = diag_part # multi-table join for i in range(1, len(k)): ps = [] if ds > 0: m = np.zeros((nr[i], ds)) comp.group_left(ns, ds, s2, k[i], m) ps += [self._cross(r[i], m)] size = r[i].size data = np.empty(size) comp.multiply_sparse(size, r[i].row, r[i].data, np.sqrt(v[i]), data) diag_part = self._cross( sp.coo_matrix((data, (r[i].row, r[i].col)))) for j in range(i): ps += [ r[i].tocsr()[k[i]].T.dot( r[j].tocsr()[k[j]].multiply(w.reshape(-1, 1))) ] res = sp.vstack((sp.hstack( (res, sp.vstack([p.T for p in ps]))), sp.hstack(ps + [diag_part]))) else: nt = s.shape[1] + sum([att.shape[1] for att in r]) other = w.reshape((1, -1)).astype(float) s2 = w.reshape(-1, 1) * np.array(s) v = [np.zeros((1, t.shape[0]), dtype=float) for t in r] res = np.empty((nt, nt)) data = np.empty(r[0].shape, order='C') comp.group(ns, len(k), 1, k, nr, other, v) comp.multiply(r[0].shape[0], r[0].shape[1], r[0], v[0], data) res[ds:ds + dr[0], ds:ds + dr[0]] = self._cross(data) if ds > 0: m = np.zeros((nr[0], ds)) comp.group_left(ns, ds, s2, k[0], m) res[ds:ds + dr[0], :ds] = self._cross(r[0], m) res[:ds, ds:ds + dr[0]] = res[ds:ds + dr[0], :ds].T res[:ds, :ds] = self._cross(s, s2) # multi-table join for i in range(1, len(k)): if ds > 0: m = np.zeros((nr[i], ds)) comp.group_left(ns, ds, s2, k[i], m) ni1 = ds + sum([t.shape[1] for t in r[:i]]) ni2 = ni1 + r[i].shape[1] res[ni1:ni2, :ds] = self._cross(r[i], m) res[:ds, ni1:ni2] = res[ni1:ni2, :ds].T data = np.empty(r[i].shape, order='C') comp.multiply(r[i].shape[0], r[i].shape[1], r[i], v[i], data) res[ni1:ni2, ni1:ni2] = self._cross(data) for j in range(i): dj1 = ds + sum([t.shape[1] for t in r[:j]]) dj2 = dj1 + r[j].shape[1] if (ns * 1.0 / nr[j]) > (1 + nr[j] * 1.0 / dr[j]): m = np.zeros((nr[i], nr[j]), order='C') # Update in comp.cpp. When count the number in each group, add w instead of 1. comp.group_k_by_k_w(nr[i], nr[j], ns, w, k[i], k[j], m) res[ni1:ni2, dj1:dj2] = r[i].T.dot(m.T.dot(r[j])) res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T else: res[ni1:ni2, dj1:dj2] = (w.reshape(-1, 1) * np.array(r[i][k[i]])).T.dot( r[j][k[j]]) res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T return res