예제 #1
0
def useHier(topX, regX, R, hierIters, hierInner, regIters, regInner,
            tensorInfo):
    topY1, top1stats, top1mstats = CP_APR.cp_apr(topX,
                                                 R,
                                                 maxiters=hierIters,
                                                 maxinner=hierInner)
    # reduce them to probability and then just sort them
    topY1.normalize_sort(1)
    topY1 = pmdTools.zeroSmallFactors(topY1, 1e-4)
    ### Use the factors to populate the factors
    Udiag = np.zeros((len(tensorInfo['diag']), R))
    Umed = np.zeros((len(tensorInfo['med']), R))
    ### Patient factors stays the same
    for idx, diag in enumerate(tensorInfo['diag']):
        topDiagIdx = tensorInfo['diagHier'][diag]
        diagCount = tensorInfo['diagHierCount'][topDiagIdx]
        Udiag[idx, :] = topY1.U[1][topDiagIdx, :] / diagCount
    for idx, med in enumerate(tensorInfo['med']):
        topMedIdx = tensorInfo['medHier'][med]
        medCount = tensorInfo['medHierCount'][topMedIdx]
        Umed[idx, :] = topY1.U[2][topMedIdx, :] / medCount
    Mtop = ktensor.ktensor(np.ones(R), [topY1.U[0].copy(), Udiag, Umed])
    Y1, ystats, mstats = CP_APR.cp_apr(X1,
                                       R,
                                       Minit=Mtop,
                                       maxiters=regIters,
                                       maxinner=regInner)
    return Y1, topY1, top1stats, top1mstats, ystats, mstats
예제 #2
0
    def evaluatePredictionAUC_1(self,experCount):
        run = 0
        sumBaseAUC=0.0
        sumCprAUC=0.0
        MCPR, cpstats, mstats = CP_APR.cp_apr(self.X, self.R, maxiters=1, maxinner=7)

        #MCPR.normalize_sort(1)
        MCPR.redistribute(0)
        ## scale by summing across the rows
        totWeight = np.sum(MCPR.U[0], axis=1)
        zeroIdx = np.where(totWeight < 1e-100)[0]
        if len(zeroIdx) > 0:
            # for the zero ones we're going to evenly distribute
            evenDist = np.repeat(1.0 / self.R, len(zeroIdx)*self.R)
            MCPR.U[0][zeroIdx, :] = evenDist.reshape((len(zeroIdx), self.R))
            totWeight = np.sum(MCPR.U[0], axis=1)
        twMat = np.repeat(totWeight, self.R).reshape(self.X.shape[0], self.R)
        MCPR.U[0] = MCPR.U[0] / twMat

        rawXfile=self.data_dir+'experiment_runprecess/rawdataX_'+str(experCount)+'.csv'
        rawYfile=self.data_dir+'experiment_runprecess/rawdataY_'+str(experCount)+'.csv'
        cprXfile=self.data_dir+'experiment_runprecess/cprdataX_'+str(experCount)+'.csv'
        cprYfile=self.data_dir+'experiment_runprecess/cprdataY_'+str(experCount)+'.csv'
        np.savetxt(rawXfile,self.rawFeatures)
        np.savetxt(rawYfile,self.Y)
        np.savetxt(cprXfile, MCPR.U[0])
        np.savetxt(cprYfile,self.Y)
예제 #3
0
def factorTensor(X):
    # set the seed for the same initialization
    np.random.seed(seed)
    Y, iterStats, mstats = CP_APR.cp_apr(X, R, tol=tol, maxiters=outerIters, maxinner=innerIters)
    Y.normalize_sort(1)
    Y = decompTools.zeroSmallFactors(Y, zeroThr=zeroThr)
    return Y
예제 #4
0
def factorTensor(X):
    # set the seed for the same initialization
    np.random.seed(seed)
    Y, iterStats, mstats = CP_APR.cp_apr(X, R, tol=tol, maxiters=outerIters, maxinner=innerIters)
    Y.normalize_sort(1)
    Y = decompTools.zeroSmallFactors(Y, zeroThr=zeroThr)
    return Y
예제 #5
0
 def findFactors(self, trainX, zeroThr=1e-4):
     """ Find the factor basis for this tensor """
     M, cpstats, mstats = CP_APR.cp_apr(trainX, R=self.R, maxiters=self.outerIter, maxinner=self.innerIter)
     M.normalize_sort(1)
     # zero out the small factors
     for n in range(M.ndims()):
         zeroIdx = np.where(M.U[n] < zeroThr)
         M.U[n][zeroIdx] = 0
     return KLProjection.KLProjection(M.U, self.R)
예제 #6
0
def findFactors(X, R=100, outerIter=70, innerIter=10, zeroThr=1e-4):
    """ Find the factor basis for this tensor """
    M, cpstats, mstats = CP_APR.cp_apr(X,
                                       R=R,
                                       maxiters=outerIter,
                                       maxinner=innerIter)
    M.normalize_sort(1)
    M = decompTools.zeroSmallFactors(M, zeroThr)
    return KLProjection.KLProjection(M.U, R), M, mstats
예제 #7
0
 def findFactors(self, trainX, zeroThr=1e-4):
     """ Find the factor basis for this tensor """
     M, cpstats, mstats = CP_APR.cp_apr(trainX, R=self.R, maxiters=self.outerIter, maxinner=self.innerIter)
     M.normalize_sort(1)
     # zero out the small factors
     for n in range(M.ndims()):
         zeroIdx = np.where(M.U[n] < zeroThr)
         M.U[n][zeroIdx] = 0
     return KLProjection.KLProjection(M.U, self.R)
예제 #8
0
def useHier(topX, regX, R, hierIters, hierInner, regIters, regInner, tensorInfo):
    topY1, top1stats, top1mstats = CP_APR.cp_apr(topX, R, maxiters=hierIters, maxinner=hierInner)
    # reduce them to probability and then just sort them
    topY1.normalize_sort(1)
    topY1 = pmdTools.zeroSmallFactors(topY1, 1e-4)
    ### Use the factors to populate the factors
    Udiag = np.zeros((len(tensorInfo['diag']), R))
    Umed = np.zeros((len(tensorInfo['med']), R))
    ### Patient factors stays the same
    for idx, diag in enumerate(tensorInfo['diag']):
        topDiagIdx = tensorInfo['diagHier'][diag]
        diagCount = tensorInfo['diagHierCount'][topDiagIdx]
        Udiag[idx,:] = topY1.U[1][topDiagIdx,:] / diagCount
    for idx, med in enumerate(tensorInfo['med']):
        topMedIdx = tensorInfo['medHier'][med]
        medCount = tensorInfo['medHierCount'][topMedIdx]        
        Umed[idx,:] = topY1.U[2][topMedIdx,:] / medCount
    Mtop = ktensor.ktensor(np.ones(R), [topY1.U[0].copy(), Udiag, Umed])
    Y1, ystats, mstats = CP_APR.cp_apr(X1, R, Minit=Mtop, maxiters=regIters, maxinner=regInner)
    return Y1, topY1, top1stats, top1mstats, ystats, mstats
예제 #9
0
def solveSharedMode(X, M, R, sm, isConverged, maxInnerIters=10, epsilon=1e-10, tol=1e-4):
    row = sm.shape[0]
    firstI = sm[0,0]    # first i that should be updated
    firstN = sm[0,1]    # first n that should be updated
    for iter in range(maxInnerIters):
        Phi = np.zeros(M[firstI].U[firstN].shape)
        for k in range(row):
            j = sm[k, 0]
            n = sm[k, 1]
            # calculate Pi
            Pi = CP_APR.calculatePi(X[j], M[j], n)
            Phi = Phi + CP_APR.calculatePhi(X[j], M[j], n, Pi, epsilon)
        # check for convergence
        kktModeViolation = np.max(np.abs(np.minimum(M[firstI].U[firstN], 1-PhiHat).flatten()))
        if (kktModeViolation < tol):
            break
        M[firstI].U[firstN] = np.multiply(M[firstI].U[firstN], Phi)
        M[firstI].normalize_mode(firstN, 1)
        # update the shared factors
        M = updateSharedFactors(M, sm)
    if (iter > 0):
        isConverged = False
    return M, PhiHat, iter, kktModeViolation, isConverged
예제 #10
0
 def projectSlice(self, X, n, iters=100, epsilon=1e-10, convTol=1e-4):
     """ 
     Project a slice, solving for the factors of the nth mode
     
     Parameters
     ------------
     X : the tensor to project onto the basis
     n : the mode to project onto
     iters : the max number of inner iterations
     epsilon : parameter to avoid dividing by zero
     convTol : the convergence tolerance
     
     Output
     -----------
     the projection matrix
     """
     ## Setup the 'initial guess'
     F = []
     for m in range(X.ndims()):
         if m == n:
             F.append(np.random.rand(X.shape[m], self.R))
         else:
             ## double check the shape is the right dimensions
             if (self.basis[m].shape[0] != X.shape[m]):
                 raise ValueError("Shape of the tensor X is incorrect")
             F.append(self.basis[m])
     #print(F)
     M = ktensor.ktensor(np.ones(self.R), F)
     #print(M)
     ## Solve for the subproblem
     M, Phi, totIter, kktMV = CP_APR.solveForModeB(X, M, n, iters, epsilon,
                                                   convTol)
     #print(M)
     ## scale by summing across the rows
     totWeight = np.sum(M.U[n], axis=1)
     print totWeight.shape
     zeroIdx = np.where(totWeight < 1e-100)[0]
     if len(zeroIdx) > 0:
         # for the zero ones we're going to evenly distribute
         evenDist = np.repeat(1.0 / self.R, len(zeroIdx) * self.R)
         M.U[n][zeroIdx, :] = evenDist.reshape((len(zeroIdx), self.R))
         totWeight = np.sum(M.U[n], axis=1)
     twMat = np.repeat(totWeight, self.R).reshape(X.shape[n], self.R)
     M.U[n] = M.U[n] / twMat
     #print(M)
     return M.U[n]
예제 #11
0
 def projectSlice(self, X, n, iters=10, epsilon=1e-10, convTol=1e-4):
     """ 
     Project a slice, solving for the factors of the nth mode
     
     Parameters
     ------------
     X : the tensor to project onto the basis
     n : the mode to project onto
     iters : the max number of inner iterations
     epsilon : parameter to avoid dividing by zero
     convTol : the convergence tolerance
     
     Output
     -----------
     the projection matrix
     """
     ## Setup the 'initial guess'
     F = []
     for m in range(X.ndims()):
         if m == n:
             F.append(np.random.rand(X.shape[m], self.R));
         else:
             ## double check the shape is the right dimensions
             if (self.basis[m].shape[0] != X.shape[m]):
                 raise ValueError("Shape of the tensor X is incorrect");
             F.append(self.basis[m])
     M = ktensor.ktensor(np.ones(self.R), F);
     ## Solve for the subproblem
     M, Phi, totIter, kktMV = CP_APR.solveForModeB(X, M, n, iters, epsilon, convTol)
     ## scale by summing across the rows
     totWeight = np.sum(M.U[n], axis=1)
     zeroIdx = np.where(totWeight < 1e-100)[0]
     if len(zeroIdx) > 0:
         # for the zero ones we're going to evenly distribute
         evenDist = np.repeat(1.0 / self.R, len(zeroIdx)*self.R)
         M.U[n][zeroIdx, :] = evenDist.reshape((len(zeroIdx), self.R))
         totWeight = np.sum(M.U[n], axis=1)
     twMat = np.repeat(totWeight, self.R).reshape(X.shape[n], self.R)
     M.U[n] = M.U[n] / twMat
     return M.U[n]
예제 #12
0
 def solveUnsharedMode(self, mode, isConverged):
     """ 
     Solve the unshared mode problem
     This is simply the same as the MM approach for CP-APR
     
     Parameters
     ----------
     mode : a length 2 array that contains the ith tensor in position 0 and the nth mode in position 1
     isConverged : passing along the convergence parameter
     """
     i = mode[0]
     n = mode[1]
     ## Shift the weight in factorization M(i) from lambda_i to mode n
     self.M[i].redistribute(n)
     self.M[i], Phi, iter, kttModeViolation = CP_APR.solveForModeB(self.X[i], self.M[i], n, self.maxInnerIters, self.epsilon, self.tol)
     if (iter > 0):
         isConverged = False
     # Shift weight from mode n back to lambda
     self.M[i].normalize_mode(n,1)
     ## Normalize the lambda to all the others
     self.shareLambda(i)
     return Phi, iter, kttModeViolation, isConverged
예제 #13
0
def decomposeCountTensor(filename, R, outerIters=20, innerIters=10, convergeTol=1e-2, zeroTol=1e-4):
    """
    Given a file, load the tensor data and then 
    From a file, load the tensor data and 
    then decompose using CP_APR with specified rank
    
    Parameters:
    filename - the file that stores the sparse tensor representation using numpy
    R - the rank of the tensor
    outerIters - the maximum number of outer iterations
    innerIters - the maximum number of inner iterations
    convergeTol - the convergence tolerance
    zeroTol - the amount to zero out the factors
    
    Output:
    
    """
    X = sptensor.loadTensor(filename)
    Y, iterStats, modelStats = CP_APR.cp_apr(X, R, tol=convergeTol, maxiters=outerIters, maxinner=innerIters)
    # normalize the factors using the 1 norm and then sort in descending order
    Y.normalize_sort(1)
    Y = zeroSmallFactors(Y, zeroThr=zeroTol)
    return Y, iterStats, modelStats
예제 #14
0
import CP_APR
import ktensor
""" 
Test file associated with the CP decomposition using APR
"""
""" Test factorization of sparse matrix """
subs = np.array([[0, 3, 1], [1, 0, 1], [1, 2, 1], [1, 3, 1], [3, 0, 0]])
vals = np.array([[1], [1], [1], [1], [3]])
siz = np.array([5, 5, 2])  # 5x5x2 tensor
X = sptensor.sptensor(subs, vals, siz)
U0 = np.array([[0.7689, 0.8843, 0.7487, 0.0900],
               [0.1673, 0.5880, 0.8256, 0.1117],
               [0.8620, 0.1548, 0.7900, 0.1363],
               [0.9899, 0.1999, 0.3185, 0.6787],
               [0.5144, 0.4070, 0.5341, 0.4952]])
U1 = np.array([[0.1897, 0.5606, 0.8790, 0.9900],
               [0.4950, 0.9296, 0.9889, 0.5277],
               [0.1476, 0.6967, 0.0006, 0.4795],
               [0.0550, 0.5828, 0.8654, 0.8013],
               [0.8507, 0.8154, 0.6126, 0.2278]])
U2 = np.array([[0.4981, 0.5747, 0.7386, 0.2467],
               [0.9009, 0.8452, 0.5860, 0.6664]])
Minit = ktensor.ktensor(np.ones(4), [U0, U1, U2])
fms = Minit.fms(Minit)

Y, cpstats, modelStats = CP_APR.cp_apr(X, 4, Minit=Minit, maxiters=100)
Y.normalize_sort(1)
""" Test factorization of regular matrix """
X = tensor.tensor(range(1, 25), [3, 4, 2])
print CP_APR.cp_apr(X, 4)
예제 #15
0
    startTime = time.time()
    spntf = SP_NTF.SP_NTF(X,
                          R=R,
                          alpha=alpha,
                          maxinner=INNER_ITER,
                          maxiters=MAX_ITER)
    Yinfo = spntf.computeDecomp(gamma=gamma)
    ## calculate all the request entries
    marbleElapse = time.time() - startTime
    marbleFMS, marbleFOS, marbleNNZ = calculateValues(
        TM, spntf.M[SP_NTF.REG_LOCATION])

    np.random.seed(seed)
    startTime = time.time()
    YCP, ycpstats, mstats = CP_APR.cp_apr(X,
                                          R=R,
                                          maxinner=INNER_ITER,
                                          maxiters=MAX_ITER)
    cpaprElapse = time.time() - startTime
    cpaprFMS, cpaprFOS, cpaprNNZ = calculateValues(TM, YCP)

    for n in range(YCP.ndims()):
        YCP.U[n] = tensorTools.hardThresholdMatrix(YCP.U[n], gamma[n])
    limestoneFMS, limestoneFOS, limestoneNNZ = calculateValues(TM, YCP)

    sampleResult = {
        "Order": ["Marble", "CPAPR", "Limestone"],
        "FMS": [marbleFMS, cpaprFMS, limestoneFMS],
        "FOS": [marbleFOS, cpaprFOS, limestoneFOS],
        "CompTime": [marbleElapse, cpaprElapse, cpaprElapse],
        "NNZ": [marbleNNZ, cpaprNNZ, limestoneNNZ]
    }
예제 #16
0
## calculate diagnosis-medication combination
diagMed = [[a, b] for a, b in itertools.product(yaxis[1], yaxis[2])] 

def getDBEntry(featureName, m):
    output = np.zeros((1, 4))
    for r in range(R):
        # get the nonzero indices
        idx = np.flatnonzero(m[:, r])
        tmp = np.column_stack((np.array(diagMed)[idx], np.repeat(r, len(idx)), m[idx, r]))
        output = np.vstack((output, tmp))
    output = np.delete(output, (0), axis=0)
    output = np.column_stack((np.repeat(exptID, output.shape[0]), np.repeat(featureName, output.shape[0]), output))
    return output

np.random.seed(seed)
M, cpstats, mstats = CP_APR.cp_apr(X, R, maxiters=iters, maxinner=innerIter)
M.normalize_sort(1)
## Threshold the values
for n in range(1,2):
    zeroIdx = np.where(M.U[n] < modeThr)
    M.U[n][zeroIdx] = 0
## Get the diagnosis-medication matrix
ptfMatrix = khatrirao.khatrirao(M.U[1], M.U[2])
dbOutput = getDBEntry("CP-APR", ptfMatrix)

flatX = sptenmat.sptenmat(X, [0]).tocsrmat() # matricize along the first mode
nmfModel = nimfa.mf(flatX, method="nmf", max_iter=iters, rank=R)
nmfResult = nimfa.mf_run(nmfModel)
nmfBasis = nmfResult.coef().transpose()
nmfBasis = preprocessing.normalize(nmfBasis, norm="l1", axis=0)
nmfBasis = nmfBasis.toarray()
예제 #17
0
    diagIdx = idx / 470
    medIdx = idx % 470
    return axisList[1][diagIdx] + axisList[2][medIdx]
    
for train, test in ttss:
    if n != nSample:
        n = n + 1
        continue
    else:
        trainShape = list(X.shape)
        train[0] = len(train)
        trainX = predictionModel.tensorSubset(X, train, trainShape)
        
        ## Do the tensor factorization
        np.random.seed(seed)
        M, cpstats, mstats = CP_APR.cp_apr(trainX, R, maxiters=outerIter, maxinner=10)
        M.normalize_sort(1)
        M.writeRawFile(factorFile)
        Yout = decompTools.getDBOutput(M, yaxis)
        Yout = np.column_stack((np.repeat(exptID, Yout.shape[0]), Yout))
        np.savetxt(Youtfile, Yout, fmt="%s", delimiter="|")
        sqlOut = file(Ysqlfile, "w")
        sqlOut.write("load data local infile '/home/joyce/workspace/Health/analysis/tensor/{0}' into table tensor_factors fields terminated by '|'  ;\n".format(Youtfile))
        sqlOut.write("insert into tensor_models(expt_ID, label_ID, description, rank, iterations, inner_iterations, seed, least_squares, log_likelihood, kkt_violation) values({0}, {1}, \'{2}\', {3}, {4}, {5}, {6}, {7}, {8}, {9});\n".format(exptID, labelID, exptDesc, R, outerIter, innerIter, seed, mstats['LS'], mstats['LL'], mstats['KKT']))

        klp = KLProjection.KLProjection(M.U, M.R)
        ptfFeat = klp.projectSlice(X, 0)
        trainY = Y[train]
        predModel.fit(ptfFeat[train, :], trainY)
        ptfPred = predModel.predict_proba(ptfFeat[test,:])
        fpr, tpr, thresholds = metrics.roc_curve(Y[test], ptfPred[:, 1], pos_label=1)
예제 #18
0
flatX = sptenmat.sptenmat(xprime, [0]).tocsrmat() # matricize along the first mode
stats = np.zeros((1,6))

## NMF Timing
for k in range(samples):
    startTime = time.time()
    nmfModel = nimfa.mf(flatX, method="nmf", max_iter=iters, rank=R)
    nmfResult = nimfa.mf_run(nmfModel)
    elapsed = time.time() - startTime
    stats = np.vstack((stats, np.array([R, iters, pn, k, "NMF", elapsed])))
    
## PCA Timing
for k in range(samples):
    startTime = time.time()
    pcaModel.fit(flatX)
    elapsed = time.time() - startTime
    stats = np.vstack((stats, np.array([R, iters, pn, k, "PCA", elapsed])))

## Tensor factorization timing
for k in range(samples):
    startTime = time.time()
    CP_APR.cp_apr(xprime, R, maxiters=iters)
    elapsed = time.time() - startTime
    stats = np.vstack((stats, np.array([R, iters, pn, k, "CP_APR", elapsed])))
    
stats = np.delete(stats, (0), axis=0)

outFile = "results/patient-cpu-{0}.csv".format(pn)
np.savetxt(outFile, stats,  fmt="%s", delimiter="|")
print "load data local infile '/home/joyce/workspace/Health/analysis/tensor/{0}' into table comp_metrics fields terminated by '|'  ;\n".format(outFile)
예제 #19
0
def findFactors(X, R=100, outerIter=70, innerIter=10, zeroThr=1e-4):
    """ Find the factor basis for this tensor """
    M, cpstats, mstats = CP_APR.cp_apr(X, R=R, maxiters=outerIter, maxinner=innerIter)
    M.normalize_sort(1)
    M = decompTools.zeroSmallFactors(M, zeroThr)
    return KLProjection.KLProjection(M.U, R), M, mstats
예제 #20
0
R = args.rank
seed = args.seed
iter = args.iterations
innerIter = 10
tol = 1e-2
zeroThr = 1e-5

# input file and output file
inputFile = args.inputFile.format("data")
yaxis = decompTools.loadAxisInfo(args.inputFile.format("info"))

print "Starting Tensor Factorization with ID:{0}".format(exptID)
X = sptensor.loadTensor(inputFile)
np.random.seed(seed)
Y, ls = CP_ALS.cp_als(X, R, tol=tol, maxiters=iter)
ll = CP_APR.loglikelihood(X, Y)

# normalize the factors using the 1 norm and then sort in descending order
Y.normalize_sort(1)
Y = decompTools.zeroSmallFactors(Y, zeroThr=zeroThr)

Y.writeRawFile("results/als-raw-{0}.dat".format(exptID))
Youtfile = "results/als-db-{0}-{1}.csv".format(exptID, iter)
Ysqlfile = "results/als-sql-{0}.sql".format(exptID)
# save the decomposition into the format
Yout = decompTools.getDBOutput(Y, yaxis)
Yout = np.column_stack((np.repeat(exptID, Yout.shape[0]), Yout))
np.savetxt(Youtfile, Yout, fmt="%s", delimiter="|")

sqlOut = file(Ysqlfile, "w")
sqlOut.write("load client from /home/joyceho/workspace/tensor/{0} of del modified by coldel| insert into joyceho.tensor_factors;\n".format(Youtfile))
예제 #21
0
compOut = []

for train, test in ttss:
    if n != nSample:
        n = n + 1
        continue
    else:
        trainShape = list(X.shape)
        train[0] = len(train)
        trainX = predictionModel.tensorSubset(X, train, trainShape)

        ## Do the tensor factorization
        np.random.seed(seed)
        startTime = time.time()
        M, cpstats, mstats = CP_APR.cp_apr(trainX,
                                           R,
                                           maxiters=outerIter,
                                           maxinner=10)
        M.normalize_sort(1)
        # zero out the small factors
        for n in range(1, 2):
            zeroIdx = np.where(M.U[n] < zeroThr)
            M.U[n][zeroIdx] = 0
        elapsed = time.time() - startTime
        compOut.append({
            "expt": exptID,
            "R": R,
            "Outer": outerIter,
            "Model": "Limestone",
            "Comp": elapsed
        })
예제 #22
0
R = args.rank
seed = args.seed
iter = args.iterations
innerIter = 10
tol = 1e-2
zeroThr = 1e-5

# input file and output file
inputFile = args.inputFile.format("data")
yaxis = decompTools.loadAxisInfo(args.inputFile.format("info"))

print "Starting Tensor Factorization with ID:{0}".format(exptID)
X = sptensor.loadTensor(inputFile)
np.random.seed(seed)
Y, ls = CP_ALS.cp_als(X, R, tol=tol, maxiters=iter)
ll = CP_APR.loglikelihood(X, Y)

# normalize the factors using the 1 norm and then sort in descending order
Y.normalize_sort(1)
Y = decompTools.zeroSmallFactors(Y, zeroThr=zeroThr)

Y.writeRawFile("results/als-raw-{0}.dat".format(exptID))
Youtfile = "results/als-db-{0}-{1}.csv".format(exptID, iter)
Ysqlfile = "results/als-sql-{0}.sql".format(exptID)
# save the decomposition into the format
Yout = decompTools.getDBOutput(Y, yaxis)
Yout = np.column_stack((np.repeat(exptID, Yout.shape[0]), Yout))
np.savetxt(Youtfile, Yout, fmt="%s", delimiter="|")

sqlOut = file(Ysqlfile, "w")
sqlOut.write(
예제 #23
0
	return fms, fos, nnz

for sample in range(10):
	seed = sample*1000
	np.random.seed(seed)
	## solve the solution
	startTime = time.time()
	spntf = SP_NTF.SP_NTF(X, R=R, alpha=alpha, maxinner=INNER_ITER, maxiters=MAX_ITER)
	Yinfo = spntf.computeDecomp(gamma=gamma)
	## calculate all the request entries
	marbleElapse = time.time() - startTime
	marbleFMS, marbleFOS, marbleNNZ = calculateValues(TM, spntf.M[SP_NTF.REG_LOCATION])

	np.random.seed(seed)
	startTime = time.time()
	YCP, ycpstats, mstats = CP_APR.cp_apr(X, R=R, maxinner=INNER_ITER, maxiters=MAX_ITER)
	cpaprElapse = time.time() - startTime
	cpaprFMS, cpaprFOS, cpaprNNZ = calculateValues(TM, YCP)

	for n in range(YCP.ndims()):
		YCP.U[n] = tensorTools.hardThresholdMatrix(YCP.U[n], gamma[n])
	limestoneFMS, limestoneFOS, limestoneNNZ = calculateValues(TM, YCP)

	sampleResult = {
	"Order": ["Marble", "CPAPR", "Limestone"],
	"FMS":[marbleFMS, cpaprFMS, limestoneFMS],
	"FOS":[marbleFOS, cpaprFOS, limestoneFOS],
	"CompTime": [marbleElapse, cpaprElapse, cpaprElapse],
	"NNZ": [marbleNNZ, cpaprNNZ, limestoneNNZ]
	}
	data[str(sample)] = sampleResult
예제 #24
0
## connection to mongo-db
client = MongoClient()
db = client.gravel
exptDB = db.factor

## verify the experimentID is okay
if exptDB.find({"id": exptID}).count():
	print "Experiment ID already exists, select another"
	return

print "Starting Tensor Factorization with ID:{0}".format(exptID)
np.random.seed(seed)

## factorize using CP_APR (this is the original)
Y, iterStats, modelStats = CP_APR.cp_apr(X, R, tol=tol, maxiters=outerIters, maxinner=innerIters)

##


Y.writeRawFile("results/apr-raw-{0}.dat".format(exptID))
Youtfile = "results/apr-db-{0}-{1}.csv".format(exptID, iter)
Ysqlfile = "results/apr-sql-{0}.sql".format(exptID)
# save the decomposition into the format
Yout = decompTools.getDBOutput(Y, yaxis)
Yout = np.column_stack((np.repeat(exptID, Yout.shape[0]), Yout))
np.savetxt(Youtfile, Yout, fmt="%s", delimiter="|")

sqlOut = file(Ysqlfile, "w")
sqlOut.write("load data local infile '/home/joyce/workspace//Health/analysis/tensor/{0}' into table tensor_factors fields terminated by '|'  ;\n".format(Youtfile))
sqlOut.write("insert into tensor_models(expt_ID, label_ID, description, rank, iterations, inner_iterations, seed, least_squares, log_likelihood, kkt_violation) values({0}, {1}, \'{2}\', {3}, {4}, {5}, {6}, {7}, {8}, {9});\n".format(exptID, labelID, exptDesc, R, iter, innerIter, seed, mstats['LS'], mstats['LL'], mstats['KKT']))
예제 #25
0
import numpy as np;
import CP_APR
import ktensor
import KLProjection

""" 
Test file associated with the CP decomposition using APR
"""

""" Test factorization of sparse matrix """
subs = np.array([[0,3,1], [1,0,1], [1,2,1], [1,3,1], [3,0,0]]);
vals = np.array([[1],[1],[1],[1],[3]]);
siz = np.array([5,5,2]) # 5x5x2 tensor
X = sptensor.sptensor(subs, vals, siz)
U0 = np.array([[0.7689, 0.8843, 0.7487, 0.0900], [0.1673, 0.5880, 0.8256, 0.1117], [0.8620, 0.1548, 0.7900, 0.1363], [0.9899, 0.1999, 0.3185, 0.6787], [0.5144, 0.4070, 0.5341, 0.4952]])
U1 = np.array([[0.1897, 0.5606, 0.8790, 0.9900], [0.4950, 0.9296, 0.9889, 0.5277], [0.1476, 0.6967, 0.0006, 0.4795], [0.0550, 0.5828, 0.8654, 0.8013], [0.8507, 0.8154, 0.6126, 0.2278]])
U2 = np.array([[0.4981, 0.5747, 0.7386, 0.2467], [0.9009, 0.8452, 0.5860, 0.6664]])
Minit = ktensor.ktensor(np.ones(4), [U0, U1, U2])
fms = Minit.fms(Minit)

Y, cpstats, modelStats = CP_APR.cp_apr(X,4, Minit=Minit, maxiters=100);
Y.normalize_sort(1)

subs2 = np.array([[0,3,1], [1,2,0]])
vals2 = np.array([[1], [1]])
siz2 = np.array([2,5,2])
Xhat = sptensor.sptensor(subs2, vals2, siz2)

klproj = KLProjection.KLProjection(Y.U, 4)
np.random.seed(10)
klproj.projectSlice(Xhat, 0)
예제 #26
0
client = MongoClient()
db = client.gravel
exptDB = db.factor

## verify the experimentID is okay
if exptDB.find({"id": exptID}).count():
    print "Experiment ID already exists, select another"
    return

print "Starting Tensor Factorization with ID:{0}".format(exptID)
np.random.seed(seed)

## factorize using CP_APR (this is the original)
Y, iterStats, modelStats = CP_APR.cp_apr(X,
                                         R,
                                         tol=tol,
                                         maxiters=outerIters,
                                         maxinner=innerIters)

##

Y.writeRawFile("results/apr-raw-{0}.dat".format(exptID))
Youtfile = "results/apr-db-{0}-{1}.csv".format(exptID, iter)
Ysqlfile = "results/apr-sql-{0}.sql".format(exptID)
# save the decomposition into the format
Yout = decompTools.getDBOutput(Y, yaxis)
Yout = np.column_stack((np.repeat(exptID, Yout.shape[0]), Yout))
np.savetxt(Youtfile, Yout, fmt="%s", delimiter="|")

sqlOut = file(Ysqlfile, "w")
sqlOut.write(
예제 #27
0
 def logLikelihood(self):
     ## calculate the log likelihood for each tensor
     ll = [CP_APR.loglikelihood(self.X[i], self.M[i]) for i in range(len(self.X))]
     return np.sum(np.array(ll))