コード例 #1
0
"""
Experiment to compute the baseline predictive model using flat features
"""
import json
import numpy as np
from sklearn.cross_validation import StratifiedShuffleSplit
from sklearn.linear_model import LogisticRegression
import sys
sys.path.append("..")

import sptenmat
import tensorIO
import predictionTools

X, axisDict, classDict = tensorIO.loadSingleTensor("data/cms-tensor-{0}.dat")
Y = np.array(classDict.values(), dtype='int')
predModel = LogisticRegression(C=990000, penalty='l1', tol=1e-6)
flatX = sptenmat.sptenmat(X, [0]).tocsrmat()  # matricize along the first mode
testSize = 0.5

outfile = open("results/baseline-results.json", 'w')
for seed in range(0, 1000, 100):
    ttss = StratifiedShuffleSplit(Y,
                                  n_iter=1,
                                  test_size=testSize,
                                  random_state=seed)
    for train, test in ttss:
        trainY = Y[train]
        baseAUC, basePred = predictionTools.getAUC(predModel, flatX, Y, train,
                                                   test)
        output = {"type": "baseline", "seed": seed, "auc": baseAUC}
コード例 #2
0
data_values = nparr_data_by_pt[nnz].flatten()
data_values = np.reshape(data_values, (len(data_values), 1))
nonzero_subs = np.zeros((len(data_values), num_dims))
nonzero_subs.dtype = 'int'
for n in range(num_dims):
    nonzero_subs[:, n] = nnz[n]
sparse_tensor_first_10_ruid = sptensor.sptensor(nonzero_subs, data_values)


#save the tensor
tensorIO.saveSingleTensor(sparse_tensor_first_10_ruid, axisDict, od_patClass_first_10_ruid, "htn-first10-tensor-{0}.dat") #

### LEFT OFF HERE: june 25, 6pm ##################################################################

## load the tensor #######
loaded_X, loaded_axisDict, loaded_classDict = tensorIO.loadSingleTensor("htn-first10-tensor-{0}.dat")

## do the decomposition ######
#store the data in "data"
data = {'exptID': exptID, 'size': MSize, 'sparsity': AFill, "rank": R, "alpha": alpha, "gamma": gamma}

def calculateValues(TM, M):
    fms = TM.greedy_fms(M)
    fos = TM.greedy_fos(M)
    nnz = tensorTools.countTensorNNZ(M)
    return fms, fos, nnz
##raw features
#rawFeatures = predictionTools.createRawFeatures(X)
startTime = time.time()#start time -- to time it
##factorization
spntf_htn_first_10_ruid = SP_NTF.SP_NTF(loaded_X, R=R, alpha=alpha)
コード例 #3
0
                    default=0.5)
parser.add_argument("-g", '--gamma', nargs='+', type=float, help="gamma")
parser.add_argument("-s", "--seed", type=int, help="random seed", default=0)
args = parser.parse_args()

inputFile = args.infile
exptID = args.eid
testSize = args.testSize
innerIter = 10
outerIter = args.iter
R = args.rank
gamma = args.gamma
alpha = args.alpha
seed = args.seed

X, axisDict, classDict = tensorIO.loadSingleTensor(inputFile)
Y = np.array(classDict.values(), dtype='int')
ttss = StratifiedShuffleSplit(Y,
                              n_iter=1,
                              test_size=testSize,
                              random_state=seed)
predModel = LogisticRegression(C=1.0, penalty='l1', tol=1e-6)

output = {
    "expt": exptID,
    "iters": outerIter,
    "inner": innerIter,
    "R": R,
    "gamma": gamma,
    "alpha": alpha,
    "seed": seed
コード例 #4
0
"""
Experiment to compute the baseline predictive model using flat features
"""
import json
import numpy as np
from sklearn.cross_validation import StratifiedShuffleSplit
from sklearn.linear_model import LogisticRegression
import sys
sys.path.append("..")

import sptenmat
import tensorIO
import predictionTools

X, axisDict, classDict = tensorIO.loadSingleTensor("data/cms-tensor-{0}.dat")
Y = np.array(classDict.values(), dtype='int')
predModel = LogisticRegression(C=990000, penalty='l1', tol=1e-6)
flatX =  sptenmat.sptenmat(X, [0]).tocsrmat() # matricize along the first mode
testSize = 0.5

outfile = open("results/baseline-results.json", 'w')
for seed in range(0, 1000, 100):
	ttss = StratifiedShuffleSplit(Y, n_iter=1, test_size=testSize, random_state=seed)
	for train, test in ttss:
		trainY = Y[train]
		baseAUC, basePred = predictionTools.getAUC(predModel, flatX, Y, train, test)
		output = {"type": "baseline", "seed": seed, "auc": baseAUC }
		outfile.write(json.dumps(output) + '\n')

outfile.close()
コード例 #5
0
parser.add_argument("-t", "--testSize", type=float, help="test size", default=0.5)
parser.add_argument("-g", '--gamma', nargs='+', type=float, help="gamma")
parser.add_argument("-s", "--seed", type=int, help="random seed", default=0)
args = parser.parse_args()

inputFile = args.infile
exptID = args.eid
testSize = args.testSize
innerIter = 10
outerIter = args.iter
R = args.rank
gamma = args.gamma
alpha = args.alpha
seed = args.seed

X, axisDict, classDict = tensorIO.loadSingleTensor(inputFile)
Y = np.array(classDict.values(), dtype='int')
ttss = StratifiedShuffleSplit(Y, n_iter=1, test_size=testSize, random_state=seed)
predModel = LogisticRegression(C=1.0, penalty='l1', tol=1e-6)

output = { "expt": exptID, "iters": outerIter, "inner": innerIter, "R": R, 
	"gamma": gamma, "alpha": alpha, "seed": seed }

for train, test in ttss:
	trainShape = list(X.shape)
	trainShape[0] = len(train)
	## take the subset for training
	trainX = predictionTools.tensorSubset(X, train, trainShape)
	trainY = Y[train]

	## create the raw features
コード例 #6
0
data_values = nparr_data_by_pt[nnz].flatten()
data_values = np.reshape(data_values, (len(data_values), 1))
nonzero_subs = np.zeros((len(data_values), num_dims))
nonzero_subs.dtype = 'int'
for n in range(num_dims):
    nonzero_subs[:, n] = nnz[n]
sparse_tensor_all_finite = sptensor.sptensor(nonzero_subs, data_values)


#save the tensor
tensorIO.saveSingleTensor(sparse_tensor_all_finite, axisDict, od_patClass_all_finite, "htn-allfinite-tensor-{0}.dat") #

### LEFT OFF HERE: june 25, 6pm ##################################################################

## load the tensor #######
loaded_X, loaded_axisDict, loaded_classDict = tensorIO.loadSingleTensor("htn-allfinite-tensor-{0}.dat")

## do the decomposition ######
#store the data in "data"
data = {'exptID': exptID, 'size': MSize, 'sparsity': AFill, "rank": R, "alpha": alpha, "gamma": gamma}

def calculateValues(TM, M):
    fms = TM.greedy_fms(M)
    fos = TM.greedy_fos(M)
    nnz = tensorTools.countTensorNNZ(M)
    return fms, fos, nnz
##raw features
#rawFeatures = predictionTools.createRawFeatures(X)
startTime = time.time()#start time -- to time it
##factorization
spntf_htn_all_finite = SP_NTF.SP_NTF(loaded_X, R=R, alpha=alpha)