Example #1
0
def saveFactorMatrices(partition):
    """
    Spark job to solve for and save each Ci factor matrix.
    """
    ret = []
    rows = list(partition)
    error = 0.0
    for row in rows:
        label = row[0]
        Xi = row[1]
        Ki = Xi.shape[0]
        dashIdx = label.rindex('-')
        dotIdx = label.rindex('.')
        labelId = int(label[dashIdx + 1:dotIdx])

        # solve for Ci
        Ci = np.zeros((Ki, R))
        ZiTZic = tensorOps.ZTZ(A, B)
        XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0))
        if regularization > 0:
            ZiTZic = ZiTZic + regulParam * eye
        Ci = solve(ZiTZic.T, XiZic.T).T
        #print Ci

        if outputDir != '':
            # save Ci
            filename = './Ci-' + str(labelId)
            np.save(filename, Ci)

            # save A & B
            if labelId == 0:
                filename = './A'
                np.save(filename, A)
                filename = './B'
                np.save(filename, B)

        error = error + np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2))

    if outputDir != '':
        subprocess.call(['hadoop fs -moveFromLocal ' + './*.npy ' + outputDir],
                        shell=True)

    ret.append(['error', error])
    return ret
Example #2
0
def rowNormCMatrix(partition):
    """
    Calculate squared row norm of C factor matrices
    """
    ret = []
    rows = list(partition)
# dalia
    for row in rows:
	label = row[0]
	Xi = row[1]
        Ki = Xi.shape[0]
	Ci = np.zeros((Ki,R))
	ZiTZic = tensorOps.ZTZ(A, B)
	XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0))
	if regularization > 0:
	    ZiTZic = ZiTZic + regulParam * eye
	Ci = solve(ZiTZic.T, XiZic.T).T
	dashIdx=label.rindex('-')
	dotIdx=label.rindex('.')
	labelId=int(label[dashIdx+1:dotIdx])
	rowNormCi = np.square(np.linalg.norm(Ci, axis=1))
	ret.append([labelId, rowNormCi])
    return ret
Example #3
0
def singleModeALSstep(partition):
    """
    Runs a single step of Alternating Least Squares to solve for one of A (mode = 1),
    B (mode = 2), or C (mode = 3) matrix.
    """
    '''
    if decompMode == 1:
        print 'Solving for A....'
    elif decompMode == 2:
        print 'Solving for B....'
    elif decompMode == 3:
        print 'Solving for Ci...'
    '''
    ret = []
    rows = list(partition)
    ZiTZi = 0
    XiZi = 0

    error = 0.0

    for row in rows:
        label = row[0]
        Xi = row[1]
        Ki = Xi.shape[0]
	# make sure not to skip over slice if we're calculating error on full tensor
#	if (sketching > 0 or (decompMode == 3 and errorCalcSketchingRate < 1)) and not (decompMode == 3 and errorCalcSketchingRate == 1) and not (decompMode == 3 and onUpdateWeightLoop):
	if ((sketching > 0 and sketchingRate < 1.0) or (decompMode == 3 and errorCalcSketchingRate < 1)) and not (decompMode == 3 and errorCalcSketchingRate == 1) and not (decompMode == 3 and onUpdateWeightLoop):
	    dashIdx=label.rindex('-')
	    dotIdx=label.rindex('.')
	    labelId=int(label[dashIdx+1:dotIdx])
	    minIndex = labelId
	    maxIndex = labelId + Ki - 1
# dalia - IS THIS A PROBLEM? THIS WILL SELECT ROWS OF C WHEN CALCULATING FULL ERROR, BUT NOT SURE THESE ROWS ARE USED
	    selectRowsC = sketchingRowsC[(sketchingRowsC >= minIndex) & (sketchingRowsC <= maxIndex)]
	    selectRowsC = selectRowsC - minIndex
	    if len(selectRowsC) == 0:
		continue;

	# always solve for Ci first!
	Ci = np.zeros((Ki,R))
#	if sketching == 1 or sketching == 3:
#	if (decompMode < 3 and (sketching == 1 or sketching >= 3)) or (decompMode == 3 and 0 < errorCalcSketchingRate < 1) and not onUpdateWeightLoop:
	if (decompMode < 3 and (sketching == 1 or sketching >= 3) and sketchingRate < 1.0) or (decompMode == 3 and 0 < errorCalcSketchingRate < 1) and not onUpdateWeightLoop:
            ZiTZic = tensorOps.ZTZ(A[sketchingRowsA,:], B[sketchingRowsB,:])
            XiZic = np.dot(unfold(Xi[:,sketchingRowsA,:][:,:,sketchingRowsB], 0), khatri_rao([Ci, A[sketchingRowsA,:], B[sketchingRowsB,:]], skip_matrix=0))
	    '''
	    if (decompMode == 3):
		print 'Solving for partial C'
	    '''
	# don't need a sketching == 2, since else is the same
	else:
	    '''
	    if (decompMode == 3):
		print 'Solving for full C'
	    '''
            ZiTZic = tensorOps.ZTZ(A, B)
            XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0))
        #ZiTZic = tensorOps.ZTZ(A, B)
        #XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0))
        if regularization > 0:
            ZiTZic = ZiTZic + regulParam * eye
	# I don't have Ci yet...
	#if regularization == 2:
	#    XiZi = XiZi + regulParam * Ci
        Ci = solve(ZiTZic.T, XiZic.T).T
#	print 'Xi=\n',Xi
#	print 'new Ci=\n',Ci

        if decompMode == 1:
#	    if sketching == 1 or sketching >= 3:
	    if (sketching == 1 or sketching >= 3) and sketchingRate < 1.0:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B[sketchingRowsB,:], Ci[selectRowsC,:])
                XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:][:,:,sketchingRowsB], 1), khatri_rao([Ci[selectRowsC,:], A, B[sketchingRowsB,:]], skip_matrix=1))
	    elif sketching == 2:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci[selectRowsC,:])
                XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:], 1), khatri_rao([Ci[selectRowsC,:], A, B], skip_matrix=1))
	    else:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci)
#                XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, B, I, J, Ki, R)
                XiZi = XiZi + np.dot(unfold(Xi, 1), khatri_rao([Ci, A, B], skip_matrix=1))
        elif decompMode == 2:
#	    if sketching == 1 or sketching >= 3:
	    if (sketching == 1 or sketching >= 3) and sketchingRate < 1.0:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A[sketchingRowsA,:], Ci[selectRowsC,:])
                XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:][:,sketchingRowsA,:], 2), khatri_rao([Ci[selectRowsC,:], A[sketchingRowsA,:], B], skip_matrix=2))
	    elif sketching == 2:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci[selectRowsC,:])
                XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:], 2), khatri_rao([Ci[selectRowsC,:], A, B], skip_matrix=2))
	    else:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci)
#                XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, A, I, J, Ki, R)
                XiZi = XiZi + np.dot(unfold(Xi, 2), khatri_rao([Ci, A, B], skip_matrix=2))
        elif decompMode == 3:
#	    if sketching == 1 or sketching == 3:
	    if 0 < errorCalcSketchingRate < 1 and not onUpdateWeightLoop:
		error = error + np.square(norm(Xi[selectRowsC,:,:][:,sketchingRowsA,:][:,:,sketchingRowsB] - kruskal_to_tensor([Ci[selectRowsC,:], A[sketchingRowsA,:], B[sketchingRowsB,:]]), 2))
		#print 'Error calc with partial C'
	    elif sketching == 2:
		error = error + np.square(norm(Xi[selectRowsC,:,:] - kruskal_to_tensor([Ci[selectRowsC,:], A, B]), 2))
	    else:
		#print 'Error calc with full C'
		error = error + np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2))
		#print 'local error =',np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2))
        else:
            print 'Unknown decomposition mode. Catastrophic error. Failing now...'

    if (len(rows) > 0) and (decompMode < 3):
        ret.append(['ZTZ',ZiTZi])
        ret.append(['XZ',XiZi])
    elif (decompMode == 3):
        ret.append(['error',error])
#	print 'cumulative error =',error
    del ZiTZi, XiZi
    return ret
Example #4
0
def singleModeALSstep(partition):
    """
    Runs a single step of Alternating Least Squares to solve for one of A (mode = 1),
    B (mode = 2), or C (mode = 3) matrix.
    """
    '''
    if decompMode == 1:
        print 'Solving for A....'
    elif decompMode == 2:
        print 'Solving for B....'
    elif decompMode == 3:
        print 'Solving for Ci...'
    '''
    ret = []
    rows = list(partition)
    ZiTZi = 0
    XiZi = 0

    error = 0.0

    for row in rows:
        label = row[0]
        Xi = row[1]
        Ki = Xi.shape[0]
        if sketching > 0:
            dashIdx = label.rindex('-')
            dotIdx = label.rindex('.')
            labelId = int(label[dashIdx + 1:dotIdx])
            minIndex = labelId
            maxIndex = labelId + Ki - 1
            selectRowsC = sketchingRowsC[(sketchingRowsC >= minIndex)
                                         & (sketchingRowsC <= maxIndex)]
            selectRowsC = selectRowsC - minIndex
            if len(selectRowsC) == 0:
                continue

# always solve for Ci first!
        Ci = np.zeros((Ki, R))
        if sketching == 1 or sketching == 3:
            ZiTZic = tensorOps.ZTZ(A[sketchingRowsA, :], B[sketchingRowsB, :])
            XiZic = np.dot(
                unfold(Xi[:, sketchingRowsA, :][:, :, sketchingRowsB], 0),
                khatri_rao([Ci, A[sketchingRowsA, :], B[sketchingRowsB, :]],
                           skip_matrix=0))
# don't need a sketching == 2, since else is the same
        else:
            ZiTZic = tensorOps.ZTZ(A, B)
            XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B],
                                                     skip_matrix=0))
        #ZiTZic = tensorOps.ZTZ(A, B)
        #XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0))
        if regularization > 0:
            ZiTZic = ZiTZic + regulParam * eye

# I don't have Ci yet...
#if regularization == 2:
#    XiZi = XiZi + regulParam * Ci
        Ci = solve(ZiTZic.T, XiZic.T).T

        if decompMode == 1:
            if sketching == 1 or sketching == 3:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B[sketchingRowsB, :],
                                              Ci[selectRowsC, :])
                XiZi = XiZi + np.dot(
                    unfold(Xi[selectRowsC, :, :][:, :, sketchingRowsB], 1),
                    khatri_rao([Ci[selectRowsC, :], A, B[sketchingRowsB, :]],
                               skip_matrix=1))
            elif sketching == 2:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci[selectRowsC, :])
                XiZi = XiZi + np.dot(
                    unfold(Xi[selectRowsC, :, :], 1),
                    khatri_rao([Ci[selectRowsC, :], A, B], skip_matrix=1))
            else:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci)
                #                XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, B, I, J, Ki, R)
                XiZi = XiZi + np.dot(unfold(Xi, 1),
                                     khatri_rao([Ci, A, B], skip_matrix=1))
        elif decompMode == 2:
            if sketching == 1 or sketching == 3:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A[sketchingRowsA, :],
                                              Ci[selectRowsC, :])
                XiZi = XiZi + np.dot(
                    unfold(Xi[selectRowsC, :, :][:, sketchingRowsA, :], 2),
                    khatri_rao([Ci[selectRowsC, :], A[sketchingRowsA, :], B],
                               skip_matrix=2))
            elif sketching == 2:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci[selectRowsC, :])
                XiZi = XiZi + np.dot(
                    unfold(Xi[selectRowsC, :, :], 2),
                    khatri_rao([Ci[selectRowsC, :], A, B], skip_matrix=2))
            else:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci)
                #                XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, A, I, J, Ki, R)
                XiZi = XiZi + np.dot(unfold(Xi, 2),
                                     khatri_rao([Ci, A, B], skip_matrix=2))
        elif decompMode == 3:
            if sketching == 1 or sketching == 3:
                error = error + np.square(
                    norm(
                        Xi[selectRowsC, :, :][:, sketchingRowsA, :]
                        [:, :, sketchingRowsB] - kruskal_to_tensor([
                            Ci[selectRowsC, :], A[sketchingRowsA, :],
                            B[sketchingRowsB, :]
                        ]), 2))
            elif sketching == 2:
                error = error + np.square(
                    norm(
                        Xi[selectRowsC, :, :] -
                        kruskal_to_tensor([Ci[selectRowsC, :], A, B]), 2))
            else:
                error = error + np.square(
                    norm(Xi - kruskal_to_tensor([Ci, A, B]), 2))
        else:
            print 'Unknown decomposition mode. Catastrophic error. Failing now...'

    if (len(rows) > 0) and (decompMode < 3):
        ret.append(['ZTZ', ZiTZi])
        ret.append(['XZ', XiZi])
    elif (decompMode == 3):
        ret.append(['error', error])
    del ZiTZi, XiZi
    return ret