예제 #1
0
    def evaluatePredictionAUC(self):
            run = 0
            sumBaseAUC=0.0
            sumCprAUC=0.0
            for train, test in self.ttss:
                print "Evaluating Run:{0}".format(run)
                # get the indices for the training tensor
                trainShape = list(self.X.shape)
                trainShape[0] = len(train)
                trainX = tensorSubset(self.X, train, trainShape)
                trainY = self.Y[train]
                ## find the tensor factors for PTF-HT
                klp = self.findFactors(trainX)

                ## Get the reduced features for the data points
                ptfFeat = klp.projectSlice(self.X, 0)
                ## Evaluate the raw fit using logistic regression

                baseAUC, basePred = predictionTools.getAUC(self.predModel, self.rawFeatures, self.Y, train, test)
                cprAUC, cprPred = predictionTools.getAUC(self.predModel, ptfFeat, self.Y, train, test)
                sumBaseAUC+=baseAUC
                sumCprAUC+=cprAUC

                run = run + 1
            print sumBaseAUC/run
            print('**************************************')
            print sumCprAUC/run
            return sumBaseAUC/run,sumCprAUC/run
예제 #2
0
    def evaluatePredictionAUC_2(self,experCount,Demog):
        run = 0
        sumBaseAUC=0.0
        sumCprAUC=0.0
        lambda1=1
        lambda4=1
        DemoU=np.random.rand(self.R,Demog.shape[1])
        MCPR, cpstats, mstats = cp_apr_demog.cp_apr(self.X, self.R,Demog,DemoU,lambda1,lambda4, maxiters=40, maxinner=self.innerIter)
        MCPR.normalize_sort(1)

        ## scale by summing across the rows
        totWeight = np.sum(MCPR.U[0], axis=1)
        zeroIdx = np.where(totWeight < 1e-100)[0]
        if len(zeroIdx) > 0:
            # for the zero ones we're going to evenly distribute
            evenDist = np.repeat(1.0 / self.R, len(zeroIdx)*self.R)
            MCPR.U[0][zeroIdx, :] = evenDist.reshape((len(zeroIdx), self.R))
            totWeight = np.sum(MCPR.U[0], axis=1)
        twMat = np.repeat(totWeight, self.R).reshape(self.X.shape[0], self.R)
        MCPR.U[0] = MCPR.U[0] / twMat
        #print(MCPR.U[0])
        #print(self.rawFeatures)
        rawXfile=self.data_dir+'experimentDemo/rawdataX_'+str(experCount)+'.csv'
        rawYfile=self.data_dir+'experimentDemo/rawdataY_'+str(experCount)+'.csv'
        cprXfile=self.data_dir+'experimentDemo/cprdataX_'+str(experCount)+'.csv'
        cprYfile=self.data_dir+'experimentDemo/cprdataY_'+str(experCount)+'.csv'
        np.savetxt(rawXfile,self.rawFeatures)
        np.savetxt(rawYfile,self.Y)
        np.savetxt(cprXfile, MCPR.U[0])
        np.savetxt(cprYfile,self.Y)

        for train, test in self.ttss:
            print "Evaluating Run:{0}".format(run)
            # get the indices for the training tensor
            trainShape = list(self.X.shape)
            trainShape[0] = len(train)
            trainX = tensorSubset(self.X, train, trainShape)
            trainY = self.Y[train]
            ## Evaluate the raw fit using logistic regression

            baseAUC, basePred = predictionTools.getAUC(self.predModel, self.rawFeatures, self.Y, train, test)
            cprAUC, cprPred = predictionTools.getAUC(self.predModel, MCPR.U[0], self.Y, train, test)
            sumBaseAUC+=baseAUC
            sumCprAUC+=cprAUC
            print('base:'+str(baseAUC))
            print('apr:'+str(cprAUC))
            run = run + 1
        print('**************************************')
        print sumBaseAUC/run
        print sumCprAUC/run
        return sumBaseAUC/run,sumCprAUC/run
예제 #3
0
import json
import numpy as np
from sklearn.cross_validation import StratifiedShuffleSplit
from sklearn.linear_model import LogisticRegression
import sys
sys.path.append("..")

import sptenmat
import tensorIO
import predictionTools

X, axisDict, classDict = tensorIO.loadSingleTensor("data/cms-tensor-{0}.dat")
Y = np.array(classDict.values(), dtype='int')
predModel = LogisticRegression(C=990000, penalty='l1', tol=1e-6)
flatX = sptenmat.sptenmat(X, [0]).tocsrmat()  # matricize along the first mode
testSize = 0.5

outfile = open("results/baseline-results.json", 'w')
for seed in range(0, 1000, 100):
    ttss = StratifiedShuffleSplit(Y,
                                  n_iter=1,
                                  test_size=testSize,
                                  random_state=seed)
    for train, test in ttss:
        trainY = Y[train]
        baseAUC, basePred = predictionTools.getAUC(predModel, flatX, Y, train,
                                                   test)
        output = {"type": "baseline", "seed": seed, "auc": baseAUC}
        outfile.write(json.dumps(output) + '\n')

outfile.close()
예제 #4
0
    ## store off the raw file
    MFact[0].writeRawFile("results/pred-raw-marble-{0}.dat".format(exptID))
    MFact[1].writeRawFile(
        "results/pred-raw-bias-marble-{0}.dat".format(exptID))

    ## compare to the traditional non-negative
    startTime = time.time()
    MCPR, cpstats, mstats = CP_APR.cp_apr(trainX,
                                          R,
                                          maxiters=outerIter,
                                          maxinner=innerIter)
    cpaprElapse = time.time() - startTime
    MCPR.writeRawFile("results/pred-raw-cpapr-{0}.dat".format(exptID))
    MCPR.normalize_sort(1)
    klp = KLProjection.KLProjection(MCPR.U, MCPR.R)
    cprFeat = klp.projectSlice(X, 0)

    ## prediction part
    baseAUC, basePred = predictionTools.getAUC(predModel, rawFeatures, Y,
                                               train, test)
    marbleAUC, marblePred = predictionTools.getAUC(predModel, pftMat, Y, train,
                                                   test)
    cprAUC, cprPred = predictionTools.getAUC(predModel, cprFeat, Y, train,
                                             test)

    output['time'] = [0, cpaprElapse, marbleElapse]
    output['auc'] = [baseAUC, cprAUC, marbleAUC]
    output['order'] = ['Baseline', 'CP-APR', 'Marble']

with open("results/pred-{0}.json".format(exptID), 'w') as outfile:
    json.dump(output, outfile)
예제 #5
0
"""
Experiment to compute the baseline predictive model using flat features
"""
import json
import numpy as np
from sklearn.cross_validation import StratifiedShuffleSplit
from sklearn.linear_model import LogisticRegression
import sys
sys.path.append("..")

import sptenmat
import tensorIO
import predictionTools

X, axisDict, classDict = tensorIO.loadSingleTensor("data/cms-tensor-{0}.dat")
Y = np.array(classDict.values(), dtype='int')
predModel = LogisticRegression(C=990000, penalty='l1', tol=1e-6)
flatX =  sptenmat.sptenmat(X, [0]).tocsrmat() # matricize along the first mode
testSize = 0.5

outfile = open("results/baseline-results.json", 'w')
for seed in range(0, 1000, 100):
	ttss = StratifiedShuffleSplit(Y, n_iter=1, test_size=testSize, random_state=seed)
	for train, test in ttss:
		trainY = Y[train]
		baseAUC, basePred = predictionTools.getAUC(predModel, flatX, Y, train, test)
		output = {"type": "baseline", "seed": seed, "auc": baseAUC }
		outfile.write(json.dumps(output) + '\n')

outfile.close()
예제 #6
0
	## create the raw features
	rawFeatures = predictionTools.createRawFeatures(X)
	startTime = time.time()
	MFact, Minfo = SP_NTF.sp_ntf(trainX, R=R, alpha=alpha, gamma=gamma, maxiters = outerIter, maxinner=innerIter)
	marbleElapse = time.time() - startTime
	pftMat, pftBias = SP_NTF.projectTensor(X, MFact, 0, maxinner=innerIter)
	
	## store off the raw file
	MFact[0].writeRawFile("results/pred-raw-marble-{0}.dat".format(exptID))
	MFact[1].writeRawFile("results/pred-raw-bias-marble-{0}.dat".format(exptID))

	## compare to the traditional non-negative
	startTime = time.time()
	MCPR, cpstats, mstats = CP_APR.cp_apr(trainX, R, maxiters=outerIter, maxinner=innerIter)
	cpaprElapse = time.time() - startTime
	MCPR.writeRawFile("results/pred-raw-cpapr-{0}.dat".format(exptID))
	MCPR.normalize_sort(1)
	klp = KLProjection.KLProjection(MCPR.U, MCPR.R)
	cprFeat = klp.projectSlice(X, 0)

	## prediction part
	baseAUC, basePred = predictionTools.getAUC(predModel, rawFeatures, Y, train, test)
	marbleAUC, marblePred = predictionTools.getAUC(predModel, pftMat, Y, train, test)
	cprAUC, cprPred = predictionTools.getAUC(predModel, cprFeat, Y, train, test)

	output['time'] = [0, cpaprElapse, marbleElapse]
	output['auc'] = [baseAUC, cprAUC, marbleAUC]
	output['order'] = ['Baseline', 'CP-APR', 'Marble']

with open("results/pred-{0}.json".format(exptID), 'w') as outfile:
	json.dump(output, outfile)