def sk(): datafile = './sutil/datasets/ex2data1.txt' d = Dataset.fromDataFile(datafile, ',') ms = LogisticRegression() m = SklearnModel('Sklearn Logistic', ms) m.trainModel(d) m.score(d.X, d.y)
def test_normalize_features(self): """ Test for normalized distance function """ datafile = './sutil/datasets/ex1data1.txt' d = Dataset.fromDataFile(datafile, ',') print(d.shape)
def simple(): datafile = './sutil/datasets/ex2data1.txt' d = Dataset.fromDataFile(datafile, ',') theta = np.zeros((d.n + 1, 1)) lr = RegularizedLogisticRegression(theta, 0.03, 0) lr.trainModel(d) lr.score(d.X, d.y)
def test_save(self): print("=" * 20) print("Testing save") datafile = './sutil/datasets/ex1data1.txt' d = Dataset.fromDataFile(datafile, ',') d.plotDataRegression('example') d.save() d.save('test') print(d.shape)
def test_biased_x(self): print("=" * 20) print("Testing biased x") datafile = './sutil/datasets/ex1data1.txt' d = Dataset.fromDataFile(datafile, ',') print(d.shape) print(d.X[0]) print(d.normalizeFeatures()) print(d.getBiasedX())
def test_load_data(self): """ Test for the dataset from file function """ datafile = './sutil/datasets/ex1data1.txt' d = Dataset.fromDataFile(datafile, ',') d.plotDataRegression('example', False) print(d.shape) for i in range(len(d.X)): print(str(d.X[i]) + ' -->' + str(d.y[i])) datafile = './sutil/datasets/ex1data2.txt' d2 = Dataset.fromDataFile(datafile, ',') print(d2.shape) datafile = './sutil/datasets/ex2data1.txt' d3 = Dataset.fromDataFile(datafile, ',') print(d3.shape) d3.plotData('example3') datafile = './sutil/datasets/ex2data2.txt' d4 = Dataset.fromDataFile(datafile, ',') print(d4.shape) d4.plotData('example4')
def test_split(self): print("=" * 20) print("Testing split") datafile = './sutil/datasets/ex1data1.txt' d = Dataset.fromDataFile(datafile, ',') d.plotDataRegression('example', True) print(d.shape) train, validation, test = d.split(0.8, 0.2) print(train.shape, validation.shape, test.shape) print(train.m, validation.m, test.m) print(train.m / d.m, validation.m / d.m, test.m / d.m) print(d.shape) train1, test1 = d.split(0.8, 0) print(train1.shape, test1.shape) print(train1.m, test1.m) print(train1.m / d.m, test1.m / d.m) print(d.shape)
# -*- coding: utf-8 -*- import numpy as np from sutil.base.Dataset import Dataset from sutil.models.RegularizedLogisticRegression import RegularizedLogisticRegression from sutil.metrics.ModelROC import ModelROC datafile = './sutil/datasets/ex2data1.txt' d = Dataset.fromDataFile(datafile, ',') theta = np.zeros((d.n + 1, 1)) alpha = 0.03 l = 0 lr = RegularizedLogisticRegression(theta, alpha, l) lr.trainModel(d) m = ModelROC(lr, d.getBiasedX(), d.y, legend='Example of Model ROC usage') m.plot() m.zoom((0, 0.4), (0.5, 1.0))
def fromDataFile(cls, datafile, delimeter, alpha=0.1, l=0.1): data = Dataset.fromDataFile(datafile, delimeter) theta = np.random.random(data.X.shape[1] + 1) return cls(data, theta, alpha, l)