def __init__(self, pca=False): dataset = PollutedSpambase() self.train_data, self.train_labels = dataset.training() self.test_data, self.test_labels = dataset.testing() if pca: pca = PCA(n_components=100) pca.fit(self.train_data) # Project PCA onto testing data #print self.train_data.shape, self.test_data.shape self.train_data = pca.transform(self.train_data) self.test_data = pca.transform(self.test_data)
from polluted import PollutedSpambase from evaluator import Evaluator from descent import GradientDescent if __name__=="__main__": # Get data dataset = PollutedSpambase() train_data, train_labels = dataset.training() test_data, test_labels = dataset.testing() # Do Logistic Regression gd = GradientDescent(train_data, train_labels) # 200,000 iterations gives ~85% acc W = gd.logreg_stoch(it=200001) # Evaluate solution evaluator = Evaluator([test_data], [test_labels], [W]) evaluator.accuracy()