def _cross_prod(self): s = self.ent_table r = self.att_table k = self.kfkds ns = k[0].shape[0] ds = s.shape[1] nr = [t.shape[0] for t in self.att_table] dr = [t.shape[1] for t in self.att_table] if not self.trans: if s.size > 0: res = self._t_cross(s) else: res = np.zeros((ns, ns), dtype=float, order='C') if all(map(sp.issparse, r)): cross_r = [self._t_cross(t).toarray() for t in r] else: cross_r = [self._t_cross(t) for t in r] comp.expand_add(ns, len(k), k, cross_r, nr, res) return res else: if all(map(sp.issparse, self.att_table)): other = np.ones((1, ns)) v = [ np.zeros((1, t.shape[0]), dtype=float) for t in self.att_table ] comp.group(ns, len(k), 1, k, nr, other, v) size = self.att_table[0].size data = np.empty(size) # part 2 and 3 are p.T and p comp.multiply_sparse(size, self.att_table[0].row, self.att_table[0].data, np.sqrt(v[0]), data) diag_part = self._cross( sp.coo_matrix((data, (self.att_table[0].row, self.att_table[0].col)))) if ds > 0: m = np.zeros((nr[0], ds)) comp.group_left(ns, ds, s, k[0], m) p = self._cross(self.att_table[0], m) s_part = self._cross(self.ent_table) res = sp.vstack((np.hstack( (s_part, p.T)), sp.hstack((p, diag_part)))) else: res = diag_part # multi-table join for i in range(1, len(k)): ps = [] if ds > 0: m = np.zeros((nr[i], ds)) comp.group_left(ns, ds, s, k[i], m) ps += [self._cross(self.att_table[i], m)] # cp (KRi) size = self.att_table[i].size data = np.empty(size) comp.multiply_sparse(size, self.att_table[i].row, self.att_table[i].data, np.sqrt(v[i]), data) diag_part = self._cross( sp.coo_matrix((data, (self.att_table[i].row, self.att_table[i].col)))) for j in range(i): ps += [r[i].tocsr()[k[i]].T.dot(r[j].tocsr()[k[j]])] res = sp.vstack((sp.hstack( (res, sp.vstack([p.T for p in ps]))), sp.hstack(ps + [diag_part]))) else: nt = self.ent_table.shape[1] + sum( [att.shape[1] for att in self.att_table]) other = np.ones((1, ns)) v = [ np.zeros((1, t.shape[0]), dtype=float) for t in self.att_table ] res = np.empty((nt, nt)) data = np.empty(self.att_table[0].shape, order='C') comp.group(ns, len(k), 1, k, nr, other, v) comp.multiply(self.att_table[0].shape[0], self.att_table[0].shape[1], self.att_table[0], v[0], data) res[ds:ds + dr[0], ds:ds + dr[0]] = self._cross(data) if ds > 0: m = np.zeros((nr[0], ds)) comp.group_left(ns, ds, s, k[0], m) res[ds:ds + dr[0], :ds] = self._cross(self.att_table[0], m) res[:ds, ds:ds + dr[0]] = res[ds:ds + dr[0], :ds].T res[:ds, :ds] = self._cross(self.ent_table) # multi-table join for i in range(1, len(self.kfkds)): if ds > 0: m = np.zeros((nr[i], ds)) comp.group_left(ns, ds, s, k[i], m) ni1 = ds + sum( [t.shape[1] for t in self.att_table[:i]]) ni2 = ni1 + self.att_table[i].shape[1] res[ni1:ni2, :ds] = self._cross(self.att_table[i], m) res[:ds, ni1:ni2] = res[ni1:ni2, :ds].T # cp(KRi) data = np.empty(self.att_table[i].shape, order='C') comp.multiply(self.att_table[i].shape[0], self.att_table[i].shape[1], self.att_table[i], v[i], data) res[ni1:ni2, ni1:ni2] = self._cross(data) for j in range(i): dj1 = ds + sum( [t.shape[1] for t in self.att_table[:j]]) dj2 = dj1 + self.att_table[j].shape[1] if (ns * 1.0 / nr[j]) > (1 + nr[j] * 1.0 / dr[j]): m = np.zeros((nr[i], nr[j]), order='C') comp.group_k_by_k(nr[i], nr[j], ns, k[i], k[j], m) res[ni1:ni2, dj1:dj2] = r[i].T.dot(m.T.dot(r[j])) res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T else: res[ni1:ni2, dj1:dj2] = r[i][k[i]].T.dot(r[j][k[j]]) res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T return res
def _cross_prod_w(self, w): # Calculate X * A * X.T. A is a diagnalized matrix, and w is the array of diagnal of A. w = w.astype(float) s = self.ent_table r = self.att_table k = self.kfkds ns = k[0].shape[0] ds = s.shape[1] nr = [t.shape[0] for t in r] dr = [t.shape[1] for t in r] if not self.trans: if s.size > 0: res = self._t_cross_w(s, w[0:ds]) else: res = np.zeros((ns, ns), dtype=float, order='C') count = ds cross_r = [] for t in r: if all(map(sp.issparse, r)): cross_r.append( self._t_cross_w(t, w[count:count + t.shape[1]]).toarray()) else: cross_r.append( self._t_cross_w(t, w[count:count + t.shape[1]])) count += t.shape[1] comp.expand_add(ns, len(k), k, cross_r, nr, res) else: if all(map(sp.issparse, r)): # change the 'other' as weight to group other = w.reshape((1, -1)).astype(float) s2 = w.reshape(-1, 1) * np.array(s) v = [np.zeros((1, t.shape[0]), dtype=float) for t in r] comp.group(ns, len(k), 1, k, nr, other, v) size = r[0].size data = np.empty(size) # part 2 and 3 are p.T and p comp.multiply_sparse(size, r[0].row, r[0].data, np.sqrt(v[0]), data) diag_part = self._cross( sp.coo_matrix((data, (r[0].row, r[0].col)))) if ds > 0: m = np.zeros((nr[0], ds)) comp.group_left(ns, ds, s2, k[0], m) p = self._cross(r[0], m) s_part = self._cross(s, s2) res = sp.vstack((np.hstack( (s_part, p.T)), sp.hstack((p, diag_part)))) else: res = diag_part # multi-table join for i in range(1, len(k)): ps = [] if ds > 0: m = np.zeros((nr[i], ds)) comp.group_left(ns, ds, s2, k[i], m) ps += [self._cross(r[i], m)] # cp (KRi) size = r[i].size data = np.empty(size) comp.multiply_sparse(size, r[i].row, r[i].data, np.sqrt(v[i]), data) diag_part = self._cross( sp.coo_matrix((data, (r[i].row, r[i].col)))) for j in range(i): ps += [ r[i].tocsr()[k[i]].T.dot( r[j].tocsr()[k[j]].multiply(w.reshape(-1, 1))) ] res = sp.vstack((sp.hstack( (res, sp.vstack([p.T for p in ps]))), sp.hstack(ps + [diag_part]))) else: nt = s.shape[1] + sum([att.shape[1] for att in r]) other = w.reshape((1, -1)).astype(float) s2 = w.reshape(-1, 1) * np.array(s) v = [np.zeros((1, t.shape[0]), dtype=float) for t in r] res = np.empty((nt, nt)) data = np.empty(r[0].shape, order='C') comp.group(ns, len(k), 1, k, nr, other, v) comp.multiply(r[0].shape[0], r[0].shape[1], r[0], v[0], data) res[ds:ds + dr[0], ds:ds + dr[0]] = self._cross(data) if ds > 0: m = np.zeros((nr[0], ds)) comp.group_left(ns, ds, s2, k[0], m) res[ds:ds + dr[0], :ds] = self._cross(r[0], m) res[:ds, ds:ds + dr[0]] = res[ds:ds + dr[0], :ds].T res[:ds, :ds] = self._cross(s, s2) # multi-table join for i in range(1, len(k)): if ds > 0: m = np.zeros((nr[i], ds)) comp.group_left(ns, ds, s2, k[i], m) ni1 = ds + sum([t.shape[1] for t in r[:i]]) ni2 = ni1 + r[i].shape[1] res[ni1:ni2, :ds] = self._cross(r[i], m) res[:ds, ni1:ni2] = res[ni1:ni2, :ds].T # cp(KRi) data = np.empty(r[i].shape, order='C') comp.multiply(r[i].shape[0], r[i].shape[1], r[i], v[i], data) res[ni1:ni2, ni1:ni2] = self._cross(data) for j in range(i): dj1 = ds + sum([t.shape[1] for t in r[:j]]) dj2 = dj1 + r[j].shape[1] if (ns * 1.0 / nr[j]) > (1 + nr[j] * 1.0 / dr[j]): m = np.zeros((nr[i], nr[j]), order='C') # Update in comp.cpp. When count the number in each group, add w instead of 1. comp.group_k_by_k_w(nr[i], nr[j], ns, w, k[i], k[j], m) res[ni1:ni2, dj1:dj2] = r[i].T.dot(m.T.dot(r[j])) res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T else: res[ni1:ni2, dj1:dj2] = (w.reshape(-1, 1) * np.array(r[i][k[i]])).T.dot( r[j][k[j]]) res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T return res
def _cross_prod(self): s = self.ent_table r = self.att_table k = self.kfkds ns = k[0].shape[0] ds = s.shape[1] nr = [t.shape[0] for t in self.att_table] dr = [t.shape[1] for t in self.att_table] if not self.trans: if all(map(sp.issparse, self.att_table)): return NotImplemented else: if s.size > 0: res = self._t_cross(s) else: res = np.zeros((ns, ns), dtype=float, order='C') if all(map(sp.issparse, r)): cross_r = [self._t_cross(t).toarray() for t in r] else: cross_r = [self._t_cross(t) for t in r] comp.expand_add(ns, len(k), k, cross_r, nr, res) return res else: if all(map(sp.issparse, self.att_table)): other = np.ones((1, ns)) v = [np.zeros((1, t.shape[0]), dtype=float) for t in self.att_table] comp.group(ns, len(k), 1, k, nr, other, v) size = self.att_table[0].size data = np.empty(size) # part 2 and 3 are p.T and p comp.multiply_sparse(size, self.att_table[0].row, self.att_table[0].data, np.sqrt(v[0]), data) diag_part = self._cross(sp.coo_matrix((data, (self.att_table[0].row, self.att_table[0].col)))) if ds > 0: m = np.zeros((nr[0], ds)) comp.group_left(ns, ds, s, k[0], m) p = self._cross(self.att_table[0], m) s_part = self._cross(self.ent_table) res = sp.vstack((np.hstack((s_part, p.T)), sp.hstack((p, diag_part)))) else: res = diag_part # multi-table join for i in range(1, len(k)): ps = [] if ds > 0: m = np.zeros((nr[i], ds)) comp.group_left(ns, ds, s, k[i], m) ps += [self._cross(self.att_table[i], m)] # cp (KRi) size = self.att_table[i].size data = np.empty(size) comp.multiply_sparse(size, self.att_table[i].row, self.att_table[i].data, np.sqrt(v[i]), data) diag_part = self._cross(sp.coo_matrix((data, (self.att_table[i].row, self.att_table[i].col)))) for j in range(i): ps += [r[i].tocsr()[k[i]].T.dot(r[j].tocsr()[k[j]])] res = sp.vstack((sp.hstack((res, sp.vstack([p.T for p in ps]))), sp.hstack(ps + [diag_part]))) else: s = np.ascontiguousarray(s) if self.second_order: nt = self.shape[0] other = np.ones((1, ns)) v = [np.zeros((1, t.shape[0]), dtype=float) for t in r] res = np.empty((nt, nt)) comp.group(ns, len(k), 1, k, nr, other, v) data = sp.diags(np.sqrt(v[0]), [0]) * r[0] res[ds:ds+dr[0], ds:ds+dr[0]] = self._cross(data) if ds > 0: # p1 m1 = np.zeros((nr[0], ds), dtype=float) comp.group_left(ns, ds, s, k[0], m1) res[ds:ds + dr[0], :ds] = r[0].T.dot(m1) res[:ds, ds:ds + dr[0]] = res[ds:ds + dr[0], :ds].T # s res[:ds, :ds] = self._cross(self.ent_table) r_p = base.data_interaction_rr(self.shadow_ent_table, self.shadow_att_table[0][k[0]]) # p2 res[:ds, ds+dr[0]:] = s.T.dot(r_p) res[ds+dr[0]:, :ds] = res[:ds, ds+dr[0]:].T # p3 m = np.zeros((nr[0], self.shadow_ent_table.shape[1]), dtype=float) comp.group_left(ns, self.shadow_ent_table.shape[1], self.shadow_ent_table, k[0], m) ksr = base.data_interaction_rr(m, self.shadow_att_table[0]) res[ds:ds+dr[0], ds+dr[0]:] = self._cross(r[0], ksr) res[ds+dr[0]:, ds:ds+dr[0]] = res[ds:ds+dr[0], ds+dr[0]:].T rrx = base.data_interaction_rr(self.shadow_att_table[0], self.shadow_att_table[0]) ssx = base.data_interaction_rr(self.shadow_ent_table, self.shadow_ent_table) dss = self.shadow_ent_table.shape[1] * self.shadow_ent_table.shape[1] kss = np.zeros((nr[0], dss), dtype=float) comp.group_left(ns, dss, ssx, k[0], kss) res[ds + dr[0]:, ds + dr[0]:] = rrx.T.dot(kss).reshape((r_p.shape[1], r_p.shape[1])) else: nt = self.ent_table.shape[1] + self.att_table[0].shape[1] other = np.ones((1, ns)) v = [np.zeros((1, t.shape[0]), dtype=float) for t in self.att_table] comp.group(ns, len(k), 1, k, nr, other, v) res = np.empty((nt, nt)) data = sp.diags(np.sqrt(v[0]), [0]) * r[0] res[ds:, ds:] = self._cross(data) if ds > 0: m = np.zeros((nr[0], ds)) comp.group_left(ns, ds, s, k[0], m) res[ds:, :ds] = self._cross(self.att_table[0], m) res[:ds, ds:] = res[ds:, :ds].T res[:ds, :ds] = self._cross(self.ent_table) # multi-table join for i in range(1, len(self.kfkds)): if ds > 0: m = np.zeros((nr[i], ds)) comp.group_left(ns, ds, s, k[i], m) ni1 = ds + sum([t.shape[1] for t in self.att_table[:i]]) ni2 = ni1 + self.att_table[i].shape[1] res[ni1:ni2, :ds] = self._cross(self.att_table[i], m) res[:ds, ni1:ni2] = res[ni1:ni2, :ds].T # cp(KRi) data = np.empty(self.att_table[i].shape, order='C') comp.multiply(self.att_table[i].shape[0], self.att_table[i].shape[1], self.att_table[i], v[i], data) res[ni1:ni2, ni1:ni2] = self._cross(data) for j in range(i): dj1 = ds + sum([t.shape[1] for t in self.att_table[:j]]) dj2 = dj1 + self.att_table[j].shape[1] if (ns * 1.0 / nr[j]) > (1 + nr[j] * 1.0 / dr[j]): m = np.zeros((nr[i], nr[j]), order='C') comp.group_k_by_k(nr[i], nr[j], ns, k[i], k[j], m) res[ni1:ni2, dj1:dj2] = r[i].T.dot(m.T.dot(r[j])) res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T else: res[ni1:ni2, dj1:dj2] = r[i][k[i]].T.dot(r[j][k[j]]) res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T if self.a != 1.0: res = res * np.power(self.a, 2) if self.b != 0.0: return NotImplemented return res
def _cross_prod_hess(self, w): """Calculate X * A * X.T. A is a diagnalized matrix, and w is the array of diagnal of A. Parameters ---------- w : ndarray, shape (num_samples,) array of diagnal of A Returns ------- res : X * A * X.T Examples -------- T = Entity Table: [[ 1. 2.] [ 4. 3.] [ 5. 6.] [ 8. 7.] [ 9. 1.]] Attribute Table: [[ 1.1 2.2] [ 3.3 4.4]] K: [[0, 1, 1, 0, 1]] >>> T._cross_prod_hess(np.arange(5)) [[ 582., 276., 174., 248.], [ 276., 232., 78., 118.], [ 174., 78., 66., 90.], [ 248., 118., 90., 124.]] """ w = w.astype(float) s = self.ent_table r = self.att_table k = self.kfkds ns = k[0].shape[0] ds = s.shape[1] nr = [t.shape[0] for t in r] dr = [t.shape[1] for t in r] if not self.trans: if s.size > 0: res = self._t_cross_w(s, w[0:ds]) else: res = np.zeros((ns, ns), dtype=float, order='C') count = ds cross_r = [] for t in r: if all(map(sp.issparse, r)): cross_r.append( self._t_cross_w(t, w[count:count + t.shape[1]]).toarray()) else: cross_r.append( self._t_cross_w(t, w[count:count + t.shape[1]])) count += t.shape[1] comp.expand_add(ns, len(k), k, cross_r, nr, res) else: if all(map(sp.issparse, r)): # change the 'other' as weight to group other = w.reshape((1, -1)).astype(float) s2 = w.reshape(-1, 1) * np.array(s) v = [np.zeros((1, t.shape[0]), dtype=float) for t in r] comp.group(ns, len(k), 1, k, nr, other, v) size = r[0].size data = np.empty(size) comp.multiply_sparse(size, r[0].row, r[0].data, np.sqrt(v[0]), data) diag_part = self._cross( sp.coo_matrix((data, (r[0].row, r[0].col)))) if ds > 0: m = np.zeros((nr[0], ds)) comp.group_left(ns, ds, s2, k[0], m) p = self._cross(r[0], m) s_part = self._cross(s, s2) res = sp.vstack((np.hstack( (s_part, p.T)), sp.hstack((p, diag_part)))) else: res = diag_part # multi-table join for i in range(1, len(k)): ps = [] if ds > 0: m = np.zeros((nr[i], ds)) comp.group_left(ns, ds, s2, k[i], m) ps += [self._cross(r[i], m)] size = r[i].size data = np.empty(size) comp.multiply_sparse(size, r[i].row, r[i].data, np.sqrt(v[i]), data) diag_part = self._cross( sp.coo_matrix((data, (r[i].row, r[i].col)))) for j in range(i): ps += [ r[i].tocsr()[k[i]].T.dot( r[j].tocsr()[k[j]].multiply(w.reshape(-1, 1))) ] res = sp.vstack((sp.hstack( (res, sp.vstack([p.T for p in ps]))), sp.hstack(ps + [diag_part]))) else: nt = s.shape[1] + sum([att.shape[1] for att in r]) other = w.reshape((1, -1)).astype(float) s2 = w.reshape(-1, 1) * np.array(s) v = [np.zeros((1, t.shape[0]), dtype=float) for t in r] res = np.empty((nt, nt)) data = np.empty(r[0].shape, order='C') comp.group(ns, len(k), 1, k, nr, other, v) comp.multiply(r[0].shape[0], r[0].shape[1], r[0], v[0], data) res[ds:ds + dr[0], ds:ds + dr[0]] = self._cross(data) if ds > 0: m = np.zeros((nr[0], ds)) comp.group_left(ns, ds, s2, k[0], m) res[ds:ds + dr[0], :ds] = self._cross(r[0], m) res[:ds, ds:ds + dr[0]] = res[ds:ds + dr[0], :ds].T res[:ds, :ds] = self._cross(s, s2) # multi-table join for i in range(1, len(k)): if ds > 0: m = np.zeros((nr[i], ds)) comp.group_left(ns, ds, s2, k[i], m) ni1 = ds + sum([t.shape[1] for t in r[:i]]) ni2 = ni1 + r[i].shape[1] res[ni1:ni2, :ds] = self._cross(r[i], m) res[:ds, ni1:ni2] = res[ni1:ni2, :ds].T data = np.empty(r[i].shape, order='C') comp.multiply(r[i].shape[0], r[i].shape[1], r[i], v[i], data) res[ni1:ni2, ni1:ni2] = self._cross(data) for j in range(i): dj1 = ds + sum([t.shape[1] for t in r[:j]]) dj2 = dj1 + r[j].shape[1] if (ns * 1.0 / nr[j]) > (1 + nr[j] * 1.0 / dr[j]): m = np.zeros((nr[i], nr[j]), order='C') # Update in comp.cpp. When count the number in each group, add w instead of 1. comp.group_k_by_k_w(nr[i], nr[j], ns, w, k[i], k[j], m) res[ni1:ni2, dj1:dj2] = r[i].T.dot(m.T.dot(r[j])) res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T else: res[ni1:ni2, dj1:dj2] = (w.reshape(-1, 1) * np.array(r[i][k[i]])).T.dot( r[j][k[j]]) res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T return res