예제 #1
0
"""

import numpy as np
from util import read_file
import ffs
import tags
from logistic_regression import LogisticRegression


"""Import a small sample dataset and run calcgis. Export the output to a csv."""

data_sample, labels_sample = read_file('sample')

lr = LogisticRegression(method="collins", max_iters=1)

labels_proc = lr.preproclabels(labels_sample)

i = int(np.random.rand() * len(data_sample))
n = len(data_sample[i])
ws = np.random.rand(ffs.numJ)
x = data_sample[i]
y = labels_proc[i]

#lr.calcgis(ws, x, n)
print data_sample[i]
print labels_sample[i],y
print ws

lr.calcAs(x, n)
print "As",lr.As
예제 #2
0
    # read data and split training data into training and validation sets
    data_train, labels_train = read_file('training')
        
    #assert len(data_train[0]) == len(labels_train[0])
    #assert len(data_train[200]) == len(labels_train[200])

    data_test, labels_test = read_file('test')
            
    #assert len(data_test[0]) == len(data_test[0])
    #assert len(data_test[200]) == len(data_test[200])
    
    return data_train, data_test, labels_train, labels_test
    
def runML(meth, itrs, data_train, data_test, labels_train, labels_test):
    print meth,datetime.now().time()
    model = LogisticRegression(method=meth,max_iters=itrs)
    model.fit(data_train, labels_train)
    print datetime.now().time()
    prediction = model.predict(data_test)
    tagscores = LogisticRegression.tagAccuracy(labels_test, prediction)
    score = np.mean(tagscores)
    print "  score tags: mean: {}, max: {}, min: {}".format(score,max(tagscores),min(tagscores))
    print "  error rate: {}".format(1 - score)
    print datetime.now().time()

if __name__ == "__main__":
    data_train, data_test, labels_train, labels_test = importData()
    labels_test=LogisticRegression.preproclabels(labels_test)
    
    runML("collins",10,data_train, data_test, labels_train, labels_test)
    runML("cd",10,data_train, data_test, labels_train, labels_test)