예제 #1
0
def useHier(topX, regX, R, hierIters, hierInner, regIters, regInner,
            tensorInfo):
    topY1, top1stats, top1mstats = CP_APR.cp_apr(topX,
                                                 R,
                                                 maxiters=hierIters,
                                                 maxinner=hierInner)
    # reduce them to probability and then just sort them
    topY1.normalize_sort(1)
    topY1 = pmdTools.zeroSmallFactors(topY1, 1e-4)
    ### Use the factors to populate the factors
    Udiag = np.zeros((len(tensorInfo['diag']), R))
    Umed = np.zeros((len(tensorInfo['med']), R))
    ### Patient factors stays the same
    for idx, diag in enumerate(tensorInfo['diag']):
        topDiagIdx = tensorInfo['diagHier'][diag]
        diagCount = tensorInfo['diagHierCount'][topDiagIdx]
        Udiag[idx, :] = topY1.U[1][topDiagIdx, :] / diagCount
    for idx, med in enumerate(tensorInfo['med']):
        topMedIdx = tensorInfo['medHier'][med]
        medCount = tensorInfo['medHierCount'][topMedIdx]
        Umed[idx, :] = topY1.U[2][topMedIdx, :] / medCount
    Mtop = ktensor.ktensor(np.ones(R), [topY1.U[0].copy(), Udiag, Umed])
    Y1, ystats, mstats = CP_APR.cp_apr(X1,
                                       R,
                                       Minit=Mtop,
                                       maxiters=regIters,
                                       maxinner=regInner)
    return Y1, topY1, top1stats, top1mstats, ystats, mstats
예제 #2
0
    def evaluatePredictionAUC_1(self,experCount):
        run = 0
        sumBaseAUC=0.0
        sumCprAUC=0.0
        MCPR, cpstats, mstats = CP_APR.cp_apr(self.X, self.R, maxiters=1, maxinner=7)

        #MCPR.normalize_sort(1)
        MCPR.redistribute(0)
        ## scale by summing across the rows
        totWeight = np.sum(MCPR.U[0], axis=1)
        zeroIdx = np.where(totWeight < 1e-100)[0]
        if len(zeroIdx) > 0:
            # for the zero ones we're going to evenly distribute
            evenDist = np.repeat(1.0 / self.R, len(zeroIdx)*self.R)
            MCPR.U[0][zeroIdx, :] = evenDist.reshape((len(zeroIdx), self.R))
            totWeight = np.sum(MCPR.U[0], axis=1)
        twMat = np.repeat(totWeight, self.R).reshape(self.X.shape[0], self.R)
        MCPR.U[0] = MCPR.U[0] / twMat

        rawXfile=self.data_dir+'experiment_runprecess/rawdataX_'+str(experCount)+'.csv'
        rawYfile=self.data_dir+'experiment_runprecess/rawdataY_'+str(experCount)+'.csv'
        cprXfile=self.data_dir+'experiment_runprecess/cprdataX_'+str(experCount)+'.csv'
        cprYfile=self.data_dir+'experiment_runprecess/cprdataY_'+str(experCount)+'.csv'
        np.savetxt(rawXfile,self.rawFeatures)
        np.savetxt(rawYfile,self.Y)
        np.savetxt(cprXfile, MCPR.U[0])
        np.savetxt(cprYfile,self.Y)
예제 #3
0
def factorTensor(X):
    # set the seed for the same initialization
    np.random.seed(seed)
    Y, iterStats, mstats = CP_APR.cp_apr(X, R, tol=tol, maxiters=outerIters, maxinner=innerIters)
    Y.normalize_sort(1)
    Y = decompTools.zeroSmallFactors(Y, zeroThr=zeroThr)
    return Y
예제 #4
0
def factorTensor(X):
    # set the seed for the same initialization
    np.random.seed(seed)
    Y, iterStats, mstats = CP_APR.cp_apr(X, R, tol=tol, maxiters=outerIters, maxinner=innerIters)
    Y.normalize_sort(1)
    Y = decompTools.zeroSmallFactors(Y, zeroThr=zeroThr)
    return Y
예제 #5
0
 def findFactors(self, trainX, zeroThr=1e-4):
     """ Find the factor basis for this tensor """
     M, cpstats, mstats = CP_APR.cp_apr(trainX, R=self.R, maxiters=self.outerIter, maxinner=self.innerIter)
     M.normalize_sort(1)
     # zero out the small factors
     for n in range(M.ndims()):
         zeroIdx = np.where(M.U[n] < zeroThr)
         M.U[n][zeroIdx] = 0
     return KLProjection.KLProjection(M.U, self.R)
예제 #6
0
 def findFactors(self, trainX, zeroThr=1e-4):
     """ Find the factor basis for this tensor """
     M, cpstats, mstats = CP_APR.cp_apr(trainX, R=self.R, maxiters=self.outerIter, maxinner=self.innerIter)
     M.normalize_sort(1)
     # zero out the small factors
     for n in range(M.ndims()):
         zeroIdx = np.where(M.U[n] < zeroThr)
         M.U[n][zeroIdx] = 0
     return KLProjection.KLProjection(M.U, self.R)
예제 #7
0
def findFactors(X, R=100, outerIter=70, innerIter=10, zeroThr=1e-4):
    """ Find the factor basis for this tensor """
    M, cpstats, mstats = CP_APR.cp_apr(X,
                                       R=R,
                                       maxiters=outerIter,
                                       maxinner=innerIter)
    M.normalize_sort(1)
    M = decompTools.zeroSmallFactors(M, zeroThr)
    return KLProjection.KLProjection(M.U, R), M, mstats
예제 #8
0
def useHier(topX, regX, R, hierIters, hierInner, regIters, regInner, tensorInfo):
    topY1, top1stats, top1mstats = CP_APR.cp_apr(topX, R, maxiters=hierIters, maxinner=hierInner)
    # reduce them to probability and then just sort them
    topY1.normalize_sort(1)
    topY1 = pmdTools.zeroSmallFactors(topY1, 1e-4)
    ### Use the factors to populate the factors
    Udiag = np.zeros((len(tensorInfo['diag']), R))
    Umed = np.zeros((len(tensorInfo['med']), R))
    ### Patient factors stays the same
    for idx, diag in enumerate(tensorInfo['diag']):
        topDiagIdx = tensorInfo['diagHier'][diag]
        diagCount = tensorInfo['diagHierCount'][topDiagIdx]
        Udiag[idx,:] = topY1.U[1][topDiagIdx,:] / diagCount
    for idx, med in enumerate(tensorInfo['med']):
        topMedIdx = tensorInfo['medHier'][med]
        medCount = tensorInfo['medHierCount'][topMedIdx]        
        Umed[idx,:] = topY1.U[2][topMedIdx,:] / medCount
    Mtop = ktensor.ktensor(np.ones(R), [topY1.U[0].copy(), Udiag, Umed])
    Y1, ystats, mstats = CP_APR.cp_apr(X1, R, Minit=Mtop, maxiters=regIters, maxinner=regInner)
    return Y1, topY1, top1stats, top1mstats, ystats, mstats
예제 #9
0
def decomposeCountTensor(filename, R, outerIters=20, innerIters=10, convergeTol=1e-2, zeroTol=1e-4):
    """
    Given a file, load the tensor data and then 
    From a file, load the tensor data and 
    then decompose using CP_APR with specified rank
    
    Parameters:
    filename - the file that stores the sparse tensor representation using numpy
    R - the rank of the tensor
    outerIters - the maximum number of outer iterations
    innerIters - the maximum number of inner iterations
    convergeTol - the convergence tolerance
    zeroTol - the amount to zero out the factors
    
    Output:
    
    """
    X = sptensor.loadTensor(filename)
    Y, iterStats, modelStats = CP_APR.cp_apr(X, R, tol=convergeTol, maxiters=outerIters, maxinner=innerIters)
    # normalize the factors using the 1 norm and then sort in descending order
    Y.normalize_sort(1)
    Y = zeroSmallFactors(Y, zeroThr=zeroTol)
    return Y, iterStats, modelStats
예제 #10
0
client = MongoClient()
db = client.gravel
exptDB = db.factor

## verify the experimentID is okay
if exptDB.find({"id": exptID}).count():
    print "Experiment ID already exists, select another"
    return

print "Starting Tensor Factorization with ID:{0}".format(exptID)
np.random.seed(seed)

## factorize using CP_APR (this is the original)
Y, iterStats, modelStats = CP_APR.cp_apr(X,
                                         R,
                                         tol=tol,
                                         maxiters=outerIters,
                                         maxinner=innerIters)

##

Y.writeRawFile("results/apr-raw-{0}.dat".format(exptID))
Youtfile = "results/apr-db-{0}-{1}.csv".format(exptID, iter)
Ysqlfile = "results/apr-sql-{0}.sql".format(exptID)
# save the decomposition into the format
Yout = decompTools.getDBOutput(Y, yaxis)
Yout = np.column_stack((np.repeat(exptID, Yout.shape[0]), Yout))
np.savetxt(Youtfile, Yout, fmt="%s", delimiter="|")

sqlOut = file(Ysqlfile, "w")
sqlOut.write(
예제 #11
0
compOut = []

for train, test in ttss:
    if n != nSample:
        n = n + 1
        continue
    else:
        trainShape = list(X.shape)
        train[0] = len(train)
        trainX = predictionModel.tensorSubset(X, train, trainShape)

        ## Do the tensor factorization
        np.random.seed(seed)
        startTime = time.time()
        M, cpstats, mstats = CP_APR.cp_apr(trainX,
                                           R,
                                           maxiters=outerIter,
                                           maxinner=10)
        M.normalize_sort(1)
        # zero out the small factors
        for n in range(1, 2):
            zeroIdx = np.where(M.U[n] < zeroThr)
            M.U[n][zeroIdx] = 0
        elapsed = time.time() - startTime
        compOut.append({
            "expt": exptID,
            "R": R,
            "Outer": outerIter,
            "Model": "Limestone",
            "Comp": elapsed
        })
예제 #12
0
## connection to mongo-db
client = MongoClient()
db = client.gravel
exptDB = db.factor

## verify the experimentID is okay
if exptDB.find({"id": exptID}).count():
	print "Experiment ID already exists, select another"
	return

print "Starting Tensor Factorization with ID:{0}".format(exptID)
np.random.seed(seed)

## factorize using CP_APR (this is the original)
Y, iterStats, modelStats = CP_APR.cp_apr(X, R, tol=tol, maxiters=outerIters, maxinner=innerIters)

##


Y.writeRawFile("results/apr-raw-{0}.dat".format(exptID))
Youtfile = "results/apr-db-{0}-{1}.csv".format(exptID, iter)
Ysqlfile = "results/apr-sql-{0}.sql".format(exptID)
# save the decomposition into the format
Yout = decompTools.getDBOutput(Y, yaxis)
Yout = np.column_stack((np.repeat(exptID, Yout.shape[0]), Yout))
np.savetxt(Youtfile, Yout, fmt="%s", delimiter="|")

sqlOut = file(Ysqlfile, "w")
sqlOut.write("load data local infile '/home/joyce/workspace//Health/analysis/tensor/{0}' into table tensor_factors fields terminated by '|'  ;\n".format(Youtfile))
sqlOut.write("insert into tensor_models(expt_ID, label_ID, description, rank, iterations, inner_iterations, seed, least_squares, log_likelihood, kkt_violation) values({0}, {1}, \'{2}\', {3}, {4}, {5}, {6}, {7}, {8}, {9});\n".format(exptID, labelID, exptDesc, R, iter, innerIter, seed, mstats['LS'], mstats['LL'], mstats['KKT']))
예제 #13
0
    startTime = time.time()
    spntf = SP_NTF.SP_NTF(X,
                          R=R,
                          alpha=alpha,
                          maxinner=INNER_ITER,
                          maxiters=MAX_ITER)
    Yinfo = spntf.computeDecomp(gamma=gamma)
    ## calculate all the request entries
    marbleElapse = time.time() - startTime
    marbleFMS, marbleFOS, marbleNNZ = calculateValues(
        TM, spntf.M[SP_NTF.REG_LOCATION])

    np.random.seed(seed)
    startTime = time.time()
    YCP, ycpstats, mstats = CP_APR.cp_apr(X,
                                          R=R,
                                          maxinner=INNER_ITER,
                                          maxiters=MAX_ITER)
    cpaprElapse = time.time() - startTime
    cpaprFMS, cpaprFOS, cpaprNNZ = calculateValues(TM, YCP)

    for n in range(YCP.ndims()):
        YCP.U[n] = tensorTools.hardThresholdMatrix(YCP.U[n], gamma[n])
    limestoneFMS, limestoneFOS, limestoneNNZ = calculateValues(TM, YCP)

    sampleResult = {
        "Order": ["Marble", "CPAPR", "Limestone"],
        "FMS": [marbleFMS, cpaprFMS, limestoneFMS],
        "FOS": [marbleFOS, cpaprFOS, limestoneFOS],
        "CompTime": [marbleElapse, cpaprElapse, cpaprElapse],
        "NNZ": [marbleNNZ, cpaprNNZ, limestoneNNZ]
    }
예제 #14
0
	return fms, fos, nnz

for sample in range(10):
	seed = sample*1000
	np.random.seed(seed)
	## solve the solution
	startTime = time.time()
	spntf = SP_NTF.SP_NTF(X, R=R, alpha=alpha, maxinner=INNER_ITER, maxiters=MAX_ITER)
	Yinfo = spntf.computeDecomp(gamma=gamma)
	## calculate all the request entries
	marbleElapse = time.time() - startTime
	marbleFMS, marbleFOS, marbleNNZ = calculateValues(TM, spntf.M[SP_NTF.REG_LOCATION])

	np.random.seed(seed)
	startTime = time.time()
	YCP, ycpstats, mstats = CP_APR.cp_apr(X, R=R, maxinner=INNER_ITER, maxiters=MAX_ITER)
	cpaprElapse = time.time() - startTime
	cpaprFMS, cpaprFOS, cpaprNNZ = calculateValues(TM, YCP)

	for n in range(YCP.ndims()):
		YCP.U[n] = tensorTools.hardThresholdMatrix(YCP.U[n], gamma[n])
	limestoneFMS, limestoneFOS, limestoneNNZ = calculateValues(TM, YCP)

	sampleResult = {
	"Order": ["Marble", "CPAPR", "Limestone"],
	"FMS":[marbleFMS, cpaprFMS, limestoneFMS],
	"FOS":[marbleFOS, cpaprFOS, limestoneFOS],
	"CompTime": [marbleElapse, cpaprElapse, cpaprElapse],
	"NNZ": [marbleNNZ, cpaprNNZ, limestoneNNZ]
	}
	data[str(sample)] = sampleResult
예제 #15
0
import CP_APR
import ktensor
""" 
Test file associated with the CP decomposition using APR
"""
""" Test factorization of sparse matrix """
subs = np.array([[0, 3, 1], [1, 0, 1], [1, 2, 1], [1, 3, 1], [3, 0, 0]])
vals = np.array([[1], [1], [1], [1], [3]])
siz = np.array([5, 5, 2])  # 5x5x2 tensor
X = sptensor.sptensor(subs, vals, siz)
U0 = np.array([[0.7689, 0.8843, 0.7487, 0.0900],
               [0.1673, 0.5880, 0.8256, 0.1117],
               [0.8620, 0.1548, 0.7900, 0.1363],
               [0.9899, 0.1999, 0.3185, 0.6787],
               [0.5144, 0.4070, 0.5341, 0.4952]])
U1 = np.array([[0.1897, 0.5606, 0.8790, 0.9900],
               [0.4950, 0.9296, 0.9889, 0.5277],
               [0.1476, 0.6967, 0.0006, 0.4795],
               [0.0550, 0.5828, 0.8654, 0.8013],
               [0.8507, 0.8154, 0.6126, 0.2278]])
U2 = np.array([[0.4981, 0.5747, 0.7386, 0.2467],
               [0.9009, 0.8452, 0.5860, 0.6664]])
Minit = ktensor.ktensor(np.ones(4), [U0, U1, U2])
fms = Minit.fms(Minit)

Y, cpstats, modelStats = CP_APR.cp_apr(X, 4, Minit=Minit, maxiters=100)
Y.normalize_sort(1)
""" Test factorization of regular matrix """
X = tensor.tensor(range(1, 25), [3, 4, 2])
print CP_APR.cp_apr(X, 4)
예제 #16
0
import numpy as np;
import CP_APR
import ktensor
import KLProjection

""" 
Test file associated with the CP decomposition using APR
"""

""" Test factorization of sparse matrix """
subs = np.array([[0,3,1], [1,0,1], [1,2,1], [1,3,1], [3,0,0]]);
vals = np.array([[1],[1],[1],[1],[3]]);
siz = np.array([5,5,2]) # 5x5x2 tensor
X = sptensor.sptensor(subs, vals, siz)
U0 = np.array([[0.7689, 0.8843, 0.7487, 0.0900], [0.1673, 0.5880, 0.8256, 0.1117], [0.8620, 0.1548, 0.7900, 0.1363], [0.9899, 0.1999, 0.3185, 0.6787], [0.5144, 0.4070, 0.5341, 0.4952]])
U1 = np.array([[0.1897, 0.5606, 0.8790, 0.9900], [0.4950, 0.9296, 0.9889, 0.5277], [0.1476, 0.6967, 0.0006, 0.4795], [0.0550, 0.5828, 0.8654, 0.8013], [0.8507, 0.8154, 0.6126, 0.2278]])
U2 = np.array([[0.4981, 0.5747, 0.7386, 0.2467], [0.9009, 0.8452, 0.5860, 0.6664]])
Minit = ktensor.ktensor(np.ones(4), [U0, U1, U2])
fms = Minit.fms(Minit)

Y, cpstats, modelStats = CP_APR.cp_apr(X,4, Minit=Minit, maxiters=100);
Y.normalize_sort(1)

subs2 = np.array([[0,3,1], [1,2,0]])
vals2 = np.array([[1], [1]])
siz2 = np.array([2,5,2])
Xhat = sptensor.sptensor(subs2, vals2, siz2)

klproj = KLProjection.KLProjection(Y.U, 4)
np.random.seed(10)
klproj.projectSlice(Xhat, 0)
예제 #17
0
    diagIdx = idx / 470
    medIdx = idx % 470
    return axisList[1][diagIdx] + axisList[2][medIdx]
    
for train, test in ttss:
    if n != nSample:
        n = n + 1
        continue
    else:
        trainShape = list(X.shape)
        train[0] = len(train)
        trainX = predictionModel.tensorSubset(X, train, trainShape)
        
        ## Do the tensor factorization
        np.random.seed(seed)
        M, cpstats, mstats = CP_APR.cp_apr(trainX, R, maxiters=outerIter, maxinner=10)
        M.normalize_sort(1)
        M.writeRawFile(factorFile)
        Yout = decompTools.getDBOutput(M, yaxis)
        Yout = np.column_stack((np.repeat(exptID, Yout.shape[0]), Yout))
        np.savetxt(Youtfile, Yout, fmt="%s", delimiter="|")
        sqlOut = file(Ysqlfile, "w")
        sqlOut.write("load data local infile '/home/joyce/workspace/Health/analysis/tensor/{0}' into table tensor_factors fields terminated by '|'  ;\n".format(Youtfile))
        sqlOut.write("insert into tensor_models(expt_ID, label_ID, description, rank, iterations, inner_iterations, seed, least_squares, log_likelihood, kkt_violation) values({0}, {1}, \'{2}\', {3}, {4}, {5}, {6}, {7}, {8}, {9});\n".format(exptID, labelID, exptDesc, R, outerIter, innerIter, seed, mstats['LS'], mstats['LL'], mstats['KKT']))

        klp = KLProjection.KLProjection(M.U, M.R)
        ptfFeat = klp.projectSlice(X, 0)
        trainY = Y[train]
        predModel.fit(ptfFeat[train, :], trainY)
        ptfPred = predModel.predict_proba(ptfFeat[test,:])
        fpr, tpr, thresholds = metrics.roc_curve(Y[test], ptfPred[:, 1], pos_label=1)
예제 #18
0
## calculate diagnosis-medication combination
diagMed = [[a, b] for a, b in itertools.product(yaxis[1], yaxis[2])] 

def getDBEntry(featureName, m):
    output = np.zeros((1, 4))
    for r in range(R):
        # get the nonzero indices
        idx = np.flatnonzero(m[:, r])
        tmp = np.column_stack((np.array(diagMed)[idx], np.repeat(r, len(idx)), m[idx, r]))
        output = np.vstack((output, tmp))
    output = np.delete(output, (0), axis=0)
    output = np.column_stack((np.repeat(exptID, output.shape[0]), np.repeat(featureName, output.shape[0]), output))
    return output

np.random.seed(seed)
M, cpstats, mstats = CP_APR.cp_apr(X, R, maxiters=iters, maxinner=innerIter)
M.normalize_sort(1)
## Threshold the values
for n in range(1,2):
    zeroIdx = np.where(M.U[n] < modeThr)
    M.U[n][zeroIdx] = 0
## Get the diagnosis-medication matrix
ptfMatrix = khatrirao.khatrirao(M.U[1], M.U[2])
dbOutput = getDBEntry("CP-APR", ptfMatrix)

flatX = sptenmat.sptenmat(X, [0]).tocsrmat() # matricize along the first mode
nmfModel = nimfa.mf(flatX, method="nmf", max_iter=iters, rank=R)
nmfResult = nimfa.mf_run(nmfModel)
nmfBasis = nmfResult.coef().transpose()
nmfBasis = preprocessing.normalize(nmfBasis, norm="l1", axis=0)
nmfBasis = nmfBasis.toarray()
예제 #19
0
flatX = sptenmat.sptenmat(xprime, [0]).tocsrmat() # matricize along the first mode
stats = np.zeros((1,6))

## NMF Timing
for k in range(samples):
    startTime = time.time()
    nmfModel = nimfa.mf(flatX, method="nmf", max_iter=iters, rank=R)
    nmfResult = nimfa.mf_run(nmfModel)
    elapsed = time.time() - startTime
    stats = np.vstack((stats, np.array([R, iters, pn, k, "NMF", elapsed])))
    
## PCA Timing
for k in range(samples):
    startTime = time.time()
    pcaModel.fit(flatX)
    elapsed = time.time() - startTime
    stats = np.vstack((stats, np.array([R, iters, pn, k, "PCA", elapsed])))

## Tensor factorization timing
for k in range(samples):
    startTime = time.time()
    CP_APR.cp_apr(xprime, R, maxiters=iters)
    elapsed = time.time() - startTime
    stats = np.vstack((stats, np.array([R, iters, pn, k, "CP_APR", elapsed])))
    
stats = np.delete(stats, (0), axis=0)

outFile = "results/patient-cpu-{0}.csv".format(pn)
np.savetxt(outFile, stats,  fmt="%s", delimiter="|")
print "load data local infile '/home/joyce/workspace/Health/analysis/tensor/{0}' into table comp_metrics fields terminated by '|'  ;\n".format(outFile)
예제 #20
0
def findFactors(X, R=100, outerIter=70, innerIter=10, zeroThr=1e-4):
    """ Find the factor basis for this tensor """
    M, cpstats, mstats = CP_APR.cp_apr(X, R=R, maxiters=outerIter, maxinner=innerIter)
    M.normalize_sort(1)
    M = decompTools.zeroSmallFactors(M, zeroThr)
    return KLProjection.KLProjection(M.U, R), M, mstats