def saveFactorMatrices(partition): """ Spark job to solve for and save each Ci factor matrix. """ ret = [] rows = list(partition) error = 0.0 for row in rows: label = row[0] Xi = row[1] Ki = Xi.shape[0] dashIdx = label.rindex('-') dotIdx = label.rindex('.') labelId = int(label[dashIdx + 1:dotIdx]) # solve for Ci Ci = np.zeros((Ki, R)) ZiTZic = tensorOps.ZTZ(A, B) XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0)) if regularization > 0: ZiTZic = ZiTZic + regulParam * eye Ci = solve(ZiTZic.T, XiZic.T).T #print Ci if outputDir != '': # save Ci filename = './Ci-' + str(labelId) np.save(filename, Ci) # save A & B if labelId == 0: filename = './A' np.save(filename, A) filename = './B' np.save(filename, B) error = error + np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2)) if outputDir != '': subprocess.call(['hadoop fs -moveFromLocal ' + './*.npy ' + outputDir], shell=True) ret.append(['error', error]) return ret
def rowNormCMatrix(partition): """ Calculate squared row norm of C factor matrices """ ret = [] rows = list(partition) # dalia for row in rows: label = row[0] Xi = row[1] Ki = Xi.shape[0] Ci = np.zeros((Ki,R)) ZiTZic = tensorOps.ZTZ(A, B) XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0)) if regularization > 0: ZiTZic = ZiTZic + regulParam * eye Ci = solve(ZiTZic.T, XiZic.T).T dashIdx=label.rindex('-') dotIdx=label.rindex('.') labelId=int(label[dashIdx+1:dotIdx]) rowNormCi = np.square(np.linalg.norm(Ci, axis=1)) ret.append([labelId, rowNormCi]) return ret
def singleModeALSstep(partition): """ Runs a single step of Alternating Least Squares to solve for one of A (mode = 1), B (mode = 2), or C (mode = 3) matrix. """ ''' if decompMode == 1: print 'Solving for A....' elif decompMode == 2: print 'Solving for B....' elif decompMode == 3: print 'Solving for Ci...' ''' ret = [] rows = list(partition) ZiTZi = 0 XiZi = 0 error = 0.0 for row in rows: label = row[0] Xi = row[1] Ki = Xi.shape[0] # make sure not to skip over slice if we're calculating error on full tensor # if (sketching > 0 or (decompMode == 3 and errorCalcSketchingRate < 1)) and not (decompMode == 3 and errorCalcSketchingRate == 1) and not (decompMode == 3 and onUpdateWeightLoop): if ((sketching > 0 and sketchingRate < 1.0) or (decompMode == 3 and errorCalcSketchingRate < 1)) and not (decompMode == 3 and errorCalcSketchingRate == 1) and not (decompMode == 3 and onUpdateWeightLoop): dashIdx=label.rindex('-') dotIdx=label.rindex('.') labelId=int(label[dashIdx+1:dotIdx]) minIndex = labelId maxIndex = labelId + Ki - 1 # dalia - IS THIS A PROBLEM? THIS WILL SELECT ROWS OF C WHEN CALCULATING FULL ERROR, BUT NOT SURE THESE ROWS ARE USED selectRowsC = sketchingRowsC[(sketchingRowsC >= minIndex) & (sketchingRowsC <= maxIndex)] selectRowsC = selectRowsC - minIndex if len(selectRowsC) == 0: continue; # always solve for Ci first! Ci = np.zeros((Ki,R)) # if sketching == 1 or sketching == 3: # if (decompMode < 3 and (sketching == 1 or sketching >= 3)) or (decompMode == 3 and 0 < errorCalcSketchingRate < 1) and not onUpdateWeightLoop: if (decompMode < 3 and (sketching == 1 or sketching >= 3) and sketchingRate < 1.0) or (decompMode == 3 and 0 < errorCalcSketchingRate < 1) and not onUpdateWeightLoop: ZiTZic = tensorOps.ZTZ(A[sketchingRowsA,:], B[sketchingRowsB,:]) XiZic = np.dot(unfold(Xi[:,sketchingRowsA,:][:,:,sketchingRowsB], 0), khatri_rao([Ci, A[sketchingRowsA,:], B[sketchingRowsB,:]], skip_matrix=0)) ''' if (decompMode == 3): print 'Solving for partial C' ''' # don't need a sketching == 2, since else is the same else: ''' if (decompMode == 3): print 'Solving for full C' ''' ZiTZic = tensorOps.ZTZ(A, B) XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0)) #ZiTZic = tensorOps.ZTZ(A, B) #XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0)) if regularization > 0: ZiTZic = ZiTZic + regulParam * eye # I don't have Ci yet... #if regularization == 2: # XiZi = XiZi + regulParam * Ci Ci = solve(ZiTZic.T, XiZic.T).T # print 'Xi=\n',Xi # print 'new Ci=\n',Ci if decompMode == 1: # if sketching == 1 or sketching >= 3: if (sketching == 1 or sketching >= 3) and sketchingRate < 1.0: ZiTZi = ZiTZi + tensorOps.ZTZ(B[sketchingRowsB,:], Ci[selectRowsC,:]) XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:][:,:,sketchingRowsB], 1), khatri_rao([Ci[selectRowsC,:], A, B[sketchingRowsB,:]], skip_matrix=1)) elif sketching == 2: ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci[selectRowsC,:]) XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:], 1), khatri_rao([Ci[selectRowsC,:], A, B], skip_matrix=1)) else: ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci) # XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, B, I, J, Ki, R) XiZi = XiZi + np.dot(unfold(Xi, 1), khatri_rao([Ci, A, B], skip_matrix=1)) elif decompMode == 2: # if sketching == 1 or sketching >= 3: if (sketching == 1 or sketching >= 3) and sketchingRate < 1.0: ZiTZi = ZiTZi + tensorOps.ZTZ(A[sketchingRowsA,:], Ci[selectRowsC,:]) XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:][:,sketchingRowsA,:], 2), khatri_rao([Ci[selectRowsC,:], A[sketchingRowsA,:], B], skip_matrix=2)) elif sketching == 2: ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci[selectRowsC,:]) XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:], 2), khatri_rao([Ci[selectRowsC,:], A, B], skip_matrix=2)) else: ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci) # XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, A, I, J, Ki, R) XiZi = XiZi + np.dot(unfold(Xi, 2), khatri_rao([Ci, A, B], skip_matrix=2)) elif decompMode == 3: # if sketching == 1 or sketching == 3: if 0 < errorCalcSketchingRate < 1 and not onUpdateWeightLoop: error = error + np.square(norm(Xi[selectRowsC,:,:][:,sketchingRowsA,:][:,:,sketchingRowsB] - kruskal_to_tensor([Ci[selectRowsC,:], A[sketchingRowsA,:], B[sketchingRowsB,:]]), 2)) #print 'Error calc with partial C' elif sketching == 2: error = error + np.square(norm(Xi[selectRowsC,:,:] - kruskal_to_tensor([Ci[selectRowsC,:], A, B]), 2)) else: #print 'Error calc with full C' error = error + np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2)) #print 'local error =',np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2)) else: print 'Unknown decomposition mode. Catastrophic error. Failing now...' if (len(rows) > 0) and (decompMode < 3): ret.append(['ZTZ',ZiTZi]) ret.append(['XZ',XiZi]) elif (decompMode == 3): ret.append(['error',error]) # print 'cumulative error =',error del ZiTZi, XiZi return ret
def singleModeALSstep(partition): """ Runs a single step of Alternating Least Squares to solve for one of A (mode = 1), B (mode = 2), or C (mode = 3) matrix. """ ''' if decompMode == 1: print 'Solving for A....' elif decompMode == 2: print 'Solving for B....' elif decompMode == 3: print 'Solving for Ci...' ''' ret = [] rows = list(partition) ZiTZi = 0 XiZi = 0 error = 0.0 for row in rows: label = row[0] Xi = row[1] Ki = Xi.shape[0] if sketching > 0: dashIdx = label.rindex('-') dotIdx = label.rindex('.') labelId = int(label[dashIdx + 1:dotIdx]) minIndex = labelId maxIndex = labelId + Ki - 1 selectRowsC = sketchingRowsC[(sketchingRowsC >= minIndex) & (sketchingRowsC <= maxIndex)] selectRowsC = selectRowsC - minIndex if len(selectRowsC) == 0: continue # always solve for Ci first! Ci = np.zeros((Ki, R)) if sketching == 1 or sketching == 3: ZiTZic = tensorOps.ZTZ(A[sketchingRowsA, :], B[sketchingRowsB, :]) XiZic = np.dot( unfold(Xi[:, sketchingRowsA, :][:, :, sketchingRowsB], 0), khatri_rao([Ci, A[sketchingRowsA, :], B[sketchingRowsB, :]], skip_matrix=0)) # don't need a sketching == 2, since else is the same else: ZiTZic = tensorOps.ZTZ(A, B) XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0)) #ZiTZic = tensorOps.ZTZ(A, B) #XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0)) if regularization > 0: ZiTZic = ZiTZic + regulParam * eye # I don't have Ci yet... #if regularization == 2: # XiZi = XiZi + regulParam * Ci Ci = solve(ZiTZic.T, XiZic.T).T if decompMode == 1: if sketching == 1 or sketching == 3: ZiTZi = ZiTZi + tensorOps.ZTZ(B[sketchingRowsB, :], Ci[selectRowsC, :]) XiZi = XiZi + np.dot( unfold(Xi[selectRowsC, :, :][:, :, sketchingRowsB], 1), khatri_rao([Ci[selectRowsC, :], A, B[sketchingRowsB, :]], skip_matrix=1)) elif sketching == 2: ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci[selectRowsC, :]) XiZi = XiZi + np.dot( unfold(Xi[selectRowsC, :, :], 1), khatri_rao([Ci[selectRowsC, :], A, B], skip_matrix=1)) else: ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci) # XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, B, I, J, Ki, R) XiZi = XiZi + np.dot(unfold(Xi, 1), khatri_rao([Ci, A, B], skip_matrix=1)) elif decompMode == 2: if sketching == 1 or sketching == 3: ZiTZi = ZiTZi + tensorOps.ZTZ(A[sketchingRowsA, :], Ci[selectRowsC, :]) XiZi = XiZi + np.dot( unfold(Xi[selectRowsC, :, :][:, sketchingRowsA, :], 2), khatri_rao([Ci[selectRowsC, :], A[sketchingRowsA, :], B], skip_matrix=2)) elif sketching == 2: ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci[selectRowsC, :]) XiZi = XiZi + np.dot( unfold(Xi[selectRowsC, :, :], 2), khatri_rao([Ci[selectRowsC, :], A, B], skip_matrix=2)) else: ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci) # XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, A, I, J, Ki, R) XiZi = XiZi + np.dot(unfold(Xi, 2), khatri_rao([Ci, A, B], skip_matrix=2)) elif decompMode == 3: if sketching == 1 or sketching == 3: error = error + np.square( norm( Xi[selectRowsC, :, :][:, sketchingRowsA, :] [:, :, sketchingRowsB] - kruskal_to_tensor([ Ci[selectRowsC, :], A[sketchingRowsA, :], B[sketchingRowsB, :] ]), 2)) elif sketching == 2: error = error + np.square( norm( Xi[selectRowsC, :, :] - kruskal_to_tensor([Ci[selectRowsC, :], A, B]), 2)) else: error = error + np.square( norm(Xi - kruskal_to_tensor([Ci, A, B]), 2)) else: print 'Unknown decomposition mode. Catastrophic error. Failing now...' if (len(rows) > 0) and (decompMode < 3): ret.append(['ZTZ', ZiTZi]) ret.append(['XZ', XiZi]) elif (decompMode == 3): ret.append(['error', error]) del ZiTZi, XiZi return ret