예제 #1
0
파일: CPD-MWU.py 프로젝트: kaggour/CPD-MWU
def calculateError(partition):
    """
    Calculate Frobenius Norm of difference between tensor slices and decomposed tensor.
    """
    ret = []
    rows = list(partition)
    normX = 0.0
    error = 0.0
    for row in rows:
        Xi = row[1]
        Ci = row[2]
        normX = normX + np.square(norm(Xi, 2))
        error = error + np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2))
        '''
        (Ki,I,J) = Xi.shape
        for i in range(0,I):
            for j in range(0,J):
                for k in range(0,Ki):
                    sum = 0.0
                    for r in range(0,R):
                        sum = sum + A.item(i,r) * B.item(j,r) * Ci.item(k,r)
                    x = Xi.item((k,i,j))
                    error = error + np.square(sum) - (2.0*sum*x)
                    normX = normX + np.square(x)
        '''
    ret.append(['error',error])
    ret.append(['normX',normX])
    return ret
예제 #2
0
def createTensorSlice(partition):
    ret = []
    rows = list(partition)

    rowCount = len(rows)
    stepSize = rowCount

    for row in rows:
        if c > 0:
            Ci = createCollinearMatrix(Ki, R, c)
        else:
            Ci = np.random.rand(Ki, R)
        #Xi = outerProduct (A, B, Ci)
        Xi = kruskal_to_tensor([Ci, A, B])
        N1 = np.random.randn(Ki, I, J)
        N2 = np.random.randn(Ki, I, J)
        normXi = norm(Xi, 2)
        normN1 = norm(N1, 2)
        normN2 = norm(N2, 2)

        filename = 'X-' + str(row * Ki)

        for l1 in l1Range:
            for l2 in l2Range:
                add = '-C' + str(c) + '-L1_' + str(l1) + '-L2_' + str(
                    l2) + '-' + str(globalN) + '/'
                newOutputDir = outputDir + add
                newHDFSDir = hdfsDir + add
                if l1 > 0:
                    Xi1 = Xi + math.pow(
                        ((100 / l1) - 1), -0.5) * (normXi / normN1) * N1
                else:
                    Xi1 = Xi
                if l2 > 0:
                    N2Xi1 = N2 * Xi1
                    Xi2 = Xi1 + math.pow(
                        ((100 / l2) - 1),
                        -0.5) * (norm(Xi1, 2) / norm(N2Xi1, 2)) * N2Xi1
                else:
                    Xi2 = Xi1

                np.save(newOutputDir + filename, Xi2)
                subprocess.call([
                    'hadoop fs -moveFromLocal ' + newOutputDir + filename +
                    '.npy ' + newHDFSDir
                ],
                                shell=True)


#        print Xi.shape
        ret.append(row)
    return ret
예제 #3
0
def saveFactorMatrices(partition):
    """
    Spark job to solve for and save each Ci factor matrix.
    """
    ret = []
    rows = list(partition)
    error = 0.0
    for row in rows:
        label = row[0]
        Xi = row[1]
        Ki = Xi.shape[0]
        dashIdx = label.rindex('-')
        dotIdx = label.rindex('.')
        labelId = int(label[dashIdx + 1:dotIdx])

        # solve for Ci
        Ci = np.zeros((Ki, R))
        ZiTZic = tensorOps.ZTZ(A, B)
        XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0))
        if regularization > 0:
            ZiTZic = ZiTZic + regulParam * eye
        Ci = solve(ZiTZic.T, XiZic.T).T
        #print Ci

        if outputDir != '':
            # save Ci
            filename = './Ci-' + str(labelId)
            np.save(filename, Ci)

            # save A & B
            if labelId == 0:
                filename = './A'
                np.save(filename, A)
                filename = './B'
                np.save(filename, B)

        error = error + np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2))

    if outputDir != '':
        subprocess.call(['hadoop fs -moveFromLocal ' + './*.npy ' + outputDir],
                        shell=True)

    ret.append(['error', error])
    return ret
예제 #4
0
import numpy as np
from tensorly.kruskal import kruskal_to_tensor
from tensorly.tenalg import norm
from tensortools.cpfit import _compute_squared_recon_error_naive, _compute_squared_recon_error

# make factors
dims = [20, 30, 40]
ndim = len(dims)
rank = 5
factors = [np.random.randn(n, rank) for n in dims]

# make data
tensor = kruskal_to_tensor(factors)
norm_tensor = norm(tensor, 2)

err1 = _compute_squared_recon_error_naive(tensor, factors, norm_tensor)
err2 = _compute_squared_recon_error(tensor, factors, norm_tensor)

f2 = [np.random.randn(n, rank) for n in dims]

err3 = _compute_squared_recon_error_naive(tensor, f2, norm_tensor)
err4 = _compute_squared_recon_error(tensor, f2, norm_tensor)

assert (np.abs(err1 - err2) < 1e-6)
assert (np.abs(err3 - err4) < 1e-6)
예제 #5
0
파일: CPD-MWU.py 프로젝트: kaggour/CPD-MWU
def singleModeALSstep(partition):
    """
    Runs a single step of Alternating Least Squares to solve for one of A (mode = 1),
    B (mode = 2), or C (mode = 3) matrix.
    """
    '''
    if decompMode == 1:
        print 'Solving for A....'
    elif decompMode == 2:
        print 'Solving for B....'
    elif decompMode == 3:
        print 'Solving for Ci...'
    '''
    ret = []
    rows = list(partition)
    ZiTZi = 0
    XiZi = 0

    error = 0.0

    for row in rows:
        label = row[0]
        Xi = row[1]
        Ki = Xi.shape[0]
	# make sure not to skip over slice if we're calculating error on full tensor
#	if (sketching > 0 or (decompMode == 3 and errorCalcSketchingRate < 1)) and not (decompMode == 3 and errorCalcSketchingRate == 1) and not (decompMode == 3 and onUpdateWeightLoop):
	if ((sketching > 0 and sketchingRate < 1.0) or (decompMode == 3 and errorCalcSketchingRate < 1)) and not (decompMode == 3 and errorCalcSketchingRate == 1) and not (decompMode == 3 and onUpdateWeightLoop):
	    dashIdx=label.rindex('-')
	    dotIdx=label.rindex('.')
	    labelId=int(label[dashIdx+1:dotIdx])
	    minIndex = labelId
	    maxIndex = labelId + Ki - 1
# dalia - IS THIS A PROBLEM? THIS WILL SELECT ROWS OF C WHEN CALCULATING FULL ERROR, BUT NOT SURE THESE ROWS ARE USED
	    selectRowsC = sketchingRowsC[(sketchingRowsC >= minIndex) & (sketchingRowsC <= maxIndex)]
	    selectRowsC = selectRowsC - minIndex
	    if len(selectRowsC) == 0:
		continue;

	# always solve for Ci first!
	Ci = np.zeros((Ki,R))
#	if sketching == 1 or sketching == 3:
#	if (decompMode < 3 and (sketching == 1 or sketching >= 3)) or (decompMode == 3 and 0 < errorCalcSketchingRate < 1) and not onUpdateWeightLoop:
	if (decompMode < 3 and (sketching == 1 or sketching >= 3) and sketchingRate < 1.0) or (decompMode == 3 and 0 < errorCalcSketchingRate < 1) and not onUpdateWeightLoop:
            ZiTZic = tensorOps.ZTZ(A[sketchingRowsA,:], B[sketchingRowsB,:])
            XiZic = np.dot(unfold(Xi[:,sketchingRowsA,:][:,:,sketchingRowsB], 0), khatri_rao([Ci, A[sketchingRowsA,:], B[sketchingRowsB,:]], skip_matrix=0))
	    '''
	    if (decompMode == 3):
		print 'Solving for partial C'
	    '''
	# don't need a sketching == 2, since else is the same
	else:
	    '''
	    if (decompMode == 3):
		print 'Solving for full C'
	    '''
            ZiTZic = tensorOps.ZTZ(A, B)
            XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0))
        #ZiTZic = tensorOps.ZTZ(A, B)
        #XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0))
        if regularization > 0:
            ZiTZic = ZiTZic + regulParam * eye
	# I don't have Ci yet...
	#if regularization == 2:
	#    XiZi = XiZi + regulParam * Ci
        Ci = solve(ZiTZic.T, XiZic.T).T
#	print 'Xi=\n',Xi
#	print 'new Ci=\n',Ci

        if decompMode == 1:
#	    if sketching == 1 or sketching >= 3:
	    if (sketching == 1 or sketching >= 3) and sketchingRate < 1.0:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B[sketchingRowsB,:], Ci[selectRowsC,:])
                XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:][:,:,sketchingRowsB], 1), khatri_rao([Ci[selectRowsC,:], A, B[sketchingRowsB,:]], skip_matrix=1))
	    elif sketching == 2:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci[selectRowsC,:])
                XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:], 1), khatri_rao([Ci[selectRowsC,:], A, B], skip_matrix=1))
	    else:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci)
#                XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, B, I, J, Ki, R)
                XiZi = XiZi + np.dot(unfold(Xi, 1), khatri_rao([Ci, A, B], skip_matrix=1))
        elif decompMode == 2:
#	    if sketching == 1 or sketching >= 3:
	    if (sketching == 1 or sketching >= 3) and sketchingRate < 1.0:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A[sketchingRowsA,:], Ci[selectRowsC,:])
                XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:][:,sketchingRowsA,:], 2), khatri_rao([Ci[selectRowsC,:], A[sketchingRowsA,:], B], skip_matrix=2))
	    elif sketching == 2:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci[selectRowsC,:])
                XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:], 2), khatri_rao([Ci[selectRowsC,:], A, B], skip_matrix=2))
	    else:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci)
#                XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, A, I, J, Ki, R)
                XiZi = XiZi + np.dot(unfold(Xi, 2), khatri_rao([Ci, A, B], skip_matrix=2))
        elif decompMode == 3:
#	    if sketching == 1 or sketching == 3:
	    if 0 < errorCalcSketchingRate < 1 and not onUpdateWeightLoop:
		error = error + np.square(norm(Xi[selectRowsC,:,:][:,sketchingRowsA,:][:,:,sketchingRowsB] - kruskal_to_tensor([Ci[selectRowsC,:], A[sketchingRowsA,:], B[sketchingRowsB,:]]), 2))
		#print 'Error calc with partial C'
	    elif sketching == 2:
		error = error + np.square(norm(Xi[selectRowsC,:,:] - kruskal_to_tensor([Ci[selectRowsC,:], A, B]), 2))
	    else:
		#print 'Error calc with full C'
		error = error + np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2))
		#print 'local error =',np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2))
        else:
            print 'Unknown decomposition mode. Catastrophic error. Failing now...'

    if (len(rows) > 0) and (decompMode < 3):
        ret.append(['ZTZ',ZiTZi])
        ret.append(['XZ',XiZi])
    elif (decompMode == 3):
        ret.append(['error',error])
#	print 'cumulative error =',error
    del ZiTZi, XiZi
    return ret
예제 #6
0
def singleModeALSstep(partition):
    """
    Runs a single step of Alternating Least Squares to solve for one of A (mode = 1),
    B (mode = 2), or C (mode = 3) matrix.
    """
    '''
    if decompMode == 1:
        print 'Solving for A....'
    elif decompMode == 2:
        print 'Solving for B....'
    elif decompMode == 3:
        print 'Solving for Ci...'
    '''
    ret = []
    rows = list(partition)
    ZiTZi = 0
    XiZi = 0

    error = 0.0

    for row in rows:
        label = row[0]
        Xi = row[1]
        Ki = Xi.shape[0]
        if sketching > 0:
            dashIdx = label.rindex('-')
            dotIdx = label.rindex('.')
            labelId = int(label[dashIdx + 1:dotIdx])
            minIndex = labelId
            maxIndex = labelId + Ki - 1
            selectRowsC = sketchingRowsC[(sketchingRowsC >= minIndex)
                                         & (sketchingRowsC <= maxIndex)]
            selectRowsC = selectRowsC - minIndex
            if len(selectRowsC) == 0:
                continue

# always solve for Ci first!
        Ci = np.zeros((Ki, R))
        if sketching == 1 or sketching == 3:
            ZiTZic = tensorOps.ZTZ(A[sketchingRowsA, :], B[sketchingRowsB, :])
            XiZic = np.dot(
                unfold(Xi[:, sketchingRowsA, :][:, :, sketchingRowsB], 0),
                khatri_rao([Ci, A[sketchingRowsA, :], B[sketchingRowsB, :]],
                           skip_matrix=0))
# don't need a sketching == 2, since else is the same
        else:
            ZiTZic = tensorOps.ZTZ(A, B)
            XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B],
                                                     skip_matrix=0))
        #ZiTZic = tensorOps.ZTZ(A, B)
        #XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0))
        if regularization > 0:
            ZiTZic = ZiTZic + regulParam * eye

# I don't have Ci yet...
#if regularization == 2:
#    XiZi = XiZi + regulParam * Ci
        Ci = solve(ZiTZic.T, XiZic.T).T

        if decompMode == 1:
            if sketching == 1 or sketching == 3:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B[sketchingRowsB, :],
                                              Ci[selectRowsC, :])
                XiZi = XiZi + np.dot(
                    unfold(Xi[selectRowsC, :, :][:, :, sketchingRowsB], 1),
                    khatri_rao([Ci[selectRowsC, :], A, B[sketchingRowsB, :]],
                               skip_matrix=1))
            elif sketching == 2:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci[selectRowsC, :])
                XiZi = XiZi + np.dot(
                    unfold(Xi[selectRowsC, :, :], 1),
                    khatri_rao([Ci[selectRowsC, :], A, B], skip_matrix=1))
            else:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci)
                #                XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, B, I, J, Ki, R)
                XiZi = XiZi + np.dot(unfold(Xi, 1),
                                     khatri_rao([Ci, A, B], skip_matrix=1))
        elif decompMode == 2:
            if sketching == 1 or sketching == 3:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A[sketchingRowsA, :],
                                              Ci[selectRowsC, :])
                XiZi = XiZi + np.dot(
                    unfold(Xi[selectRowsC, :, :][:, sketchingRowsA, :], 2),
                    khatri_rao([Ci[selectRowsC, :], A[sketchingRowsA, :], B],
                               skip_matrix=2))
            elif sketching == 2:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci[selectRowsC, :])
                XiZi = XiZi + np.dot(
                    unfold(Xi[selectRowsC, :, :], 2),
                    khatri_rao([Ci[selectRowsC, :], A, B], skip_matrix=2))
            else:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci)
                #                XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, A, I, J, Ki, R)
                XiZi = XiZi + np.dot(unfold(Xi, 2),
                                     khatri_rao([Ci, A, B], skip_matrix=2))
        elif decompMode == 3:
            if sketching == 1 or sketching == 3:
                error = error + np.square(
                    norm(
                        Xi[selectRowsC, :, :][:, sketchingRowsA, :]
                        [:, :, sketchingRowsB] - kruskal_to_tensor([
                            Ci[selectRowsC, :], A[sketchingRowsA, :],
                            B[sketchingRowsB, :]
                        ]), 2))
            elif sketching == 2:
                error = error + np.square(
                    norm(
                        Xi[selectRowsC, :, :] -
                        kruskal_to_tensor([Ci[selectRowsC, :], A, B]), 2))
            else:
                error = error + np.square(
                    norm(Xi - kruskal_to_tensor([Ci, A, B]), 2))
        else:
            print 'Unknown decomposition mode. Catastrophic error. Failing now...'

    if (len(rows) > 0) and (decompMode < 3):
        ret.append(['ZTZ', ZiTZi])
        ret.append(['XZ', XiZi])
    elif (decompMode == 3):
        ret.append(['error', error])
    del ZiTZi, XiZi
    return ret
예제 #7
0
def calculateErrorTensorly(tensor, A, B, C):
    return norm(tensor - kruskal_to_tensor([C, A, B]),
                2) / calculateFNormXTensorly(tensor)
예제 #8
0
def _compute_squared_recon_error(tensor, kruskal_factors, norm_tensor):
    """ Computes norm of residuals divided by norm of data.
    """
    return tensorly.tenalg.norm(tensor - kruskal_to_tensor(kruskal_factors), 2) / norm_tensor