def calculateError(partition): """ Calculate Frobenius Norm of difference between tensor slices and decomposed tensor. """ ret = [] rows = list(partition) normX = 0.0 error = 0.0 for row in rows: Xi = row[1] Ci = row[2] normX = normX + np.square(norm(Xi, 2)) error = error + np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2)) ''' (Ki,I,J) = Xi.shape for i in range(0,I): for j in range(0,J): for k in range(0,Ki): sum = 0.0 for r in range(0,R): sum = sum + A.item(i,r) * B.item(j,r) * Ci.item(k,r) x = Xi.item((k,i,j)) error = error + np.square(sum) - (2.0*sum*x) normX = normX + np.square(x) ''' ret.append(['error',error]) ret.append(['normX',normX]) return ret
def createTensorSlice(partition): ret = [] rows = list(partition) rowCount = len(rows) stepSize = rowCount for row in rows: if c > 0: Ci = createCollinearMatrix(Ki, R, c) else: Ci = np.random.rand(Ki, R) #Xi = outerProduct (A, B, Ci) Xi = kruskal_to_tensor([Ci, A, B]) N1 = np.random.randn(Ki, I, J) N2 = np.random.randn(Ki, I, J) normXi = norm(Xi, 2) normN1 = norm(N1, 2) normN2 = norm(N2, 2) filename = 'X-' + str(row * Ki) for l1 in l1Range: for l2 in l2Range: add = '-C' + str(c) + '-L1_' + str(l1) + '-L2_' + str( l2) + '-' + str(globalN) + '/' newOutputDir = outputDir + add newHDFSDir = hdfsDir + add if l1 > 0: Xi1 = Xi + math.pow( ((100 / l1) - 1), -0.5) * (normXi / normN1) * N1 else: Xi1 = Xi if l2 > 0: N2Xi1 = N2 * Xi1 Xi2 = Xi1 + math.pow( ((100 / l2) - 1), -0.5) * (norm(Xi1, 2) / norm(N2Xi1, 2)) * N2Xi1 else: Xi2 = Xi1 np.save(newOutputDir + filename, Xi2) subprocess.call([ 'hadoop fs -moveFromLocal ' + newOutputDir + filename + '.npy ' + newHDFSDir ], shell=True) # print Xi.shape ret.append(row) return ret
def saveFactorMatrices(partition): """ Spark job to solve for and save each Ci factor matrix. """ ret = [] rows = list(partition) error = 0.0 for row in rows: label = row[0] Xi = row[1] Ki = Xi.shape[0] dashIdx = label.rindex('-') dotIdx = label.rindex('.') labelId = int(label[dashIdx + 1:dotIdx]) # solve for Ci Ci = np.zeros((Ki, R)) ZiTZic = tensorOps.ZTZ(A, B) XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0)) if regularization > 0: ZiTZic = ZiTZic + regulParam * eye Ci = solve(ZiTZic.T, XiZic.T).T #print Ci if outputDir != '': # save Ci filename = './Ci-' + str(labelId) np.save(filename, Ci) # save A & B if labelId == 0: filename = './A' np.save(filename, A) filename = './B' np.save(filename, B) error = error + np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2)) if outputDir != '': subprocess.call(['hadoop fs -moveFromLocal ' + './*.npy ' + outputDir], shell=True) ret.append(['error', error]) return ret
import numpy as np from tensorly.kruskal import kruskal_to_tensor from tensorly.tenalg import norm from tensortools.cpfit import _compute_squared_recon_error_naive, _compute_squared_recon_error # make factors dims = [20, 30, 40] ndim = len(dims) rank = 5 factors = [np.random.randn(n, rank) for n in dims] # make data tensor = kruskal_to_tensor(factors) norm_tensor = norm(tensor, 2) err1 = _compute_squared_recon_error_naive(tensor, factors, norm_tensor) err2 = _compute_squared_recon_error(tensor, factors, norm_tensor) f2 = [np.random.randn(n, rank) for n in dims] err3 = _compute_squared_recon_error_naive(tensor, f2, norm_tensor) err4 = _compute_squared_recon_error(tensor, f2, norm_tensor) assert (np.abs(err1 - err2) < 1e-6) assert (np.abs(err3 - err4) < 1e-6)
def singleModeALSstep(partition): """ Runs a single step of Alternating Least Squares to solve for one of A (mode = 1), B (mode = 2), or C (mode = 3) matrix. """ ''' if decompMode == 1: print 'Solving for A....' elif decompMode == 2: print 'Solving for B....' elif decompMode == 3: print 'Solving for Ci...' ''' ret = [] rows = list(partition) ZiTZi = 0 XiZi = 0 error = 0.0 for row in rows: label = row[0] Xi = row[1] Ki = Xi.shape[0] # make sure not to skip over slice if we're calculating error on full tensor # if (sketching > 0 or (decompMode == 3 and errorCalcSketchingRate < 1)) and not (decompMode == 3 and errorCalcSketchingRate == 1) and not (decompMode == 3 and onUpdateWeightLoop): if ((sketching > 0 and sketchingRate < 1.0) or (decompMode == 3 and errorCalcSketchingRate < 1)) and not (decompMode == 3 and errorCalcSketchingRate == 1) and not (decompMode == 3 and onUpdateWeightLoop): dashIdx=label.rindex('-') dotIdx=label.rindex('.') labelId=int(label[dashIdx+1:dotIdx]) minIndex = labelId maxIndex = labelId + Ki - 1 # dalia - IS THIS A PROBLEM? THIS WILL SELECT ROWS OF C WHEN CALCULATING FULL ERROR, BUT NOT SURE THESE ROWS ARE USED selectRowsC = sketchingRowsC[(sketchingRowsC >= minIndex) & (sketchingRowsC <= maxIndex)] selectRowsC = selectRowsC - minIndex if len(selectRowsC) == 0: continue; # always solve for Ci first! Ci = np.zeros((Ki,R)) # if sketching == 1 or sketching == 3: # if (decompMode < 3 and (sketching == 1 or sketching >= 3)) or (decompMode == 3 and 0 < errorCalcSketchingRate < 1) and not onUpdateWeightLoop: if (decompMode < 3 and (sketching == 1 or sketching >= 3) and sketchingRate < 1.0) or (decompMode == 3 and 0 < errorCalcSketchingRate < 1) and not onUpdateWeightLoop: ZiTZic = tensorOps.ZTZ(A[sketchingRowsA,:], B[sketchingRowsB,:]) XiZic = np.dot(unfold(Xi[:,sketchingRowsA,:][:,:,sketchingRowsB], 0), khatri_rao([Ci, A[sketchingRowsA,:], B[sketchingRowsB,:]], skip_matrix=0)) ''' if (decompMode == 3): print 'Solving for partial C' ''' # don't need a sketching == 2, since else is the same else: ''' if (decompMode == 3): print 'Solving for full C' ''' ZiTZic = tensorOps.ZTZ(A, B) XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0)) #ZiTZic = tensorOps.ZTZ(A, B) #XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0)) if regularization > 0: ZiTZic = ZiTZic + regulParam * eye # I don't have Ci yet... #if regularization == 2: # XiZi = XiZi + regulParam * Ci Ci = solve(ZiTZic.T, XiZic.T).T # print 'Xi=\n',Xi # print 'new Ci=\n',Ci if decompMode == 1: # if sketching == 1 or sketching >= 3: if (sketching == 1 or sketching >= 3) and sketchingRate < 1.0: ZiTZi = ZiTZi + tensorOps.ZTZ(B[sketchingRowsB,:], Ci[selectRowsC,:]) XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:][:,:,sketchingRowsB], 1), khatri_rao([Ci[selectRowsC,:], A, B[sketchingRowsB,:]], skip_matrix=1)) elif sketching == 2: ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci[selectRowsC,:]) XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:], 1), khatri_rao([Ci[selectRowsC,:], A, B], skip_matrix=1)) else: ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci) # XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, B, I, J, Ki, R) XiZi = XiZi + np.dot(unfold(Xi, 1), khatri_rao([Ci, A, B], skip_matrix=1)) elif decompMode == 2: # if sketching == 1 or sketching >= 3: if (sketching == 1 or sketching >= 3) and sketchingRate < 1.0: ZiTZi = ZiTZi + tensorOps.ZTZ(A[sketchingRowsA,:], Ci[selectRowsC,:]) XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:][:,sketchingRowsA,:], 2), khatri_rao([Ci[selectRowsC,:], A[sketchingRowsA,:], B], skip_matrix=2)) elif sketching == 2: ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci[selectRowsC,:]) XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:], 2), khatri_rao([Ci[selectRowsC,:], A, B], skip_matrix=2)) else: ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci) # XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, A, I, J, Ki, R) XiZi = XiZi + np.dot(unfold(Xi, 2), khatri_rao([Ci, A, B], skip_matrix=2)) elif decompMode == 3: # if sketching == 1 or sketching == 3: if 0 < errorCalcSketchingRate < 1 and not onUpdateWeightLoop: error = error + np.square(norm(Xi[selectRowsC,:,:][:,sketchingRowsA,:][:,:,sketchingRowsB] - kruskal_to_tensor([Ci[selectRowsC,:], A[sketchingRowsA,:], B[sketchingRowsB,:]]), 2)) #print 'Error calc with partial C' elif sketching == 2: error = error + np.square(norm(Xi[selectRowsC,:,:] - kruskal_to_tensor([Ci[selectRowsC,:], A, B]), 2)) else: #print 'Error calc with full C' error = error + np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2)) #print 'local error =',np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2)) else: print 'Unknown decomposition mode. Catastrophic error. Failing now...' if (len(rows) > 0) and (decompMode < 3): ret.append(['ZTZ',ZiTZi]) ret.append(['XZ',XiZi]) elif (decompMode == 3): ret.append(['error',error]) # print 'cumulative error =',error del ZiTZi, XiZi return ret
def singleModeALSstep(partition): """ Runs a single step of Alternating Least Squares to solve for one of A (mode = 1), B (mode = 2), or C (mode = 3) matrix. """ ''' if decompMode == 1: print 'Solving for A....' elif decompMode == 2: print 'Solving for B....' elif decompMode == 3: print 'Solving for Ci...' ''' ret = [] rows = list(partition) ZiTZi = 0 XiZi = 0 error = 0.0 for row in rows: label = row[0] Xi = row[1] Ki = Xi.shape[0] if sketching > 0: dashIdx = label.rindex('-') dotIdx = label.rindex('.') labelId = int(label[dashIdx + 1:dotIdx]) minIndex = labelId maxIndex = labelId + Ki - 1 selectRowsC = sketchingRowsC[(sketchingRowsC >= minIndex) & (sketchingRowsC <= maxIndex)] selectRowsC = selectRowsC - minIndex if len(selectRowsC) == 0: continue # always solve for Ci first! Ci = np.zeros((Ki, R)) if sketching == 1 or sketching == 3: ZiTZic = tensorOps.ZTZ(A[sketchingRowsA, :], B[sketchingRowsB, :]) XiZic = np.dot( unfold(Xi[:, sketchingRowsA, :][:, :, sketchingRowsB], 0), khatri_rao([Ci, A[sketchingRowsA, :], B[sketchingRowsB, :]], skip_matrix=0)) # don't need a sketching == 2, since else is the same else: ZiTZic = tensorOps.ZTZ(A, B) XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0)) #ZiTZic = tensorOps.ZTZ(A, B) #XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0)) if regularization > 0: ZiTZic = ZiTZic + regulParam * eye # I don't have Ci yet... #if regularization == 2: # XiZi = XiZi + regulParam * Ci Ci = solve(ZiTZic.T, XiZic.T).T if decompMode == 1: if sketching == 1 or sketching == 3: ZiTZi = ZiTZi + tensorOps.ZTZ(B[sketchingRowsB, :], Ci[selectRowsC, :]) XiZi = XiZi + np.dot( unfold(Xi[selectRowsC, :, :][:, :, sketchingRowsB], 1), khatri_rao([Ci[selectRowsC, :], A, B[sketchingRowsB, :]], skip_matrix=1)) elif sketching == 2: ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci[selectRowsC, :]) XiZi = XiZi + np.dot( unfold(Xi[selectRowsC, :, :], 1), khatri_rao([Ci[selectRowsC, :], A, B], skip_matrix=1)) else: ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci) # XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, B, I, J, Ki, R) XiZi = XiZi + np.dot(unfold(Xi, 1), khatri_rao([Ci, A, B], skip_matrix=1)) elif decompMode == 2: if sketching == 1 or sketching == 3: ZiTZi = ZiTZi + tensorOps.ZTZ(A[sketchingRowsA, :], Ci[selectRowsC, :]) XiZi = XiZi + np.dot( unfold(Xi[selectRowsC, :, :][:, sketchingRowsA, :], 2), khatri_rao([Ci[selectRowsC, :], A[sketchingRowsA, :], B], skip_matrix=2)) elif sketching == 2: ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci[selectRowsC, :]) XiZi = XiZi + np.dot( unfold(Xi[selectRowsC, :, :], 2), khatri_rao([Ci[selectRowsC, :], A, B], skip_matrix=2)) else: ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci) # XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, A, I, J, Ki, R) XiZi = XiZi + np.dot(unfold(Xi, 2), khatri_rao([Ci, A, B], skip_matrix=2)) elif decompMode == 3: if sketching == 1 or sketching == 3: error = error + np.square( norm( Xi[selectRowsC, :, :][:, sketchingRowsA, :] [:, :, sketchingRowsB] - kruskal_to_tensor([ Ci[selectRowsC, :], A[sketchingRowsA, :], B[sketchingRowsB, :] ]), 2)) elif sketching == 2: error = error + np.square( norm( Xi[selectRowsC, :, :] - kruskal_to_tensor([Ci[selectRowsC, :], A, B]), 2)) else: error = error + np.square( norm(Xi - kruskal_to_tensor([Ci, A, B]), 2)) else: print 'Unknown decomposition mode. Catastrophic error. Failing now...' if (len(rows) > 0) and (decompMode < 3): ret.append(['ZTZ', ZiTZi]) ret.append(['XZ', XiZi]) elif (decompMode == 3): ret.append(['error', error]) del ZiTZi, XiZi return ret
def calculateErrorTensorly(tensor, A, B, C): return norm(tensor - kruskal_to_tensor([C, A, B]), 2) / calculateFNormXTensorly(tensor)
def _compute_squared_recon_error(tensor, kruskal_factors, norm_tensor): """ Computes norm of residuals divided by norm of data. """ return tensorly.tenalg.norm(tensor - kruskal_to_tensor(kruskal_factors), 2) / norm_tensor