Beispiel #1
0
def run_svm (trainDataFile, trainData, testData):
	s = SVM();
	#param = modelSelection.Param(s, 'C', [0.0001,0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000])
	#m = modelSelection.ModelSelector(param, measure='roc');
	#m.train(trainData);
	#m.save("trainModel.pyml");
	s.train(trainData);
	s.save("trainModel.pyml");
	new_svm = SVM();
	new_svm.load("trainModel.pyml", trainData);
	r = new_svm.test(testData);
	return r
def BuildModelAndTest(trainData, testData, modelFile, buildModel):

	if (buildModel == 'true'):
		print "* *** BUILDING MODEL *****"
		Cs = [ 10**x for x in xrange( -10, 5 ) ]
		param = modelSelection.Param(svm.SVM(), 'C', Cs)

		m = modelSelection.ModelSelector(param, measure ='roc')
		m.train(trainData)
		results = m.test(testData); #combined HuR, TTP test data
		m.save(modelFile);
		print "**** MODEL OUTPUT **** "
		print m.classifier.C
		print m.log
	else:
		s = SVM();
		s.load(modelFile, trainData);
		results = s.test(testData)

	return results;
def BuildModelAndTest(trainData, testData, modelFile, buildModel):

    if (buildModel == 'true'):
        print "* *** BUILDING MODEL *****"
        Cs = [10**x for x in xrange(-10, 5)]
        param = modelSelection.Param(svm.SVM(), 'C', Cs)

        m = modelSelection.ModelSelector(param, measure='roc')
        m.train(trainData)
        results = m.test(testData)
        #combined HuR, TTP test data
        m.save(modelFile)
        print "**** MODEL OUTPUT **** "
        print m.classifier.C
        print m.log
    else:
        s = SVM()
        s.load(modelFile, trainData)
        results = s.test(testData)

    return results
def test(component='svm', **args):

    if 'container' in args:
        container = args['container']
    else:
        container = 'SparseDataSet'

    try:
        DataSet = getattr(vectorDatasets, container)
    except:
        raise ValueError, 'wrong container ' + container

    s = svm.SVM()

    results = {}

    comp = 'general'
    if component == 'all' or component == comp:
        d = DataSet(heartdatafile, labelsColumn=0)
        s.train(d)
        s.test(d)
        s = svm.SVM()
        s.stratifiedCV(d)
        d2 = Aggregate([d, d])
        #r = s.stratifiedCV(d2)
        #r = s.loo(d)
        d.attachKernel('polynomial')
        s.cv(d)
        d.attachKernel('linear')
        s = svm.SVM()
        s.train(d)

        d = DataSet(numpy.random.randn(100, 10))
        d = DataSet([[1, 2], [2, 3]])
        #d = SequenceData(['asa', 'ben', 'hur'])

    comp = 'svm'
    if component == 'all' or component == comp:
        d = DataSet(heartdatafile, labelsColumn=0)
        results[comp] = []
        d.attachKernel('polynomial')
        results[comp].append(s.cv(d, saveSpace=True))
        d.attachKernel('linear')
        results[comp].append(s.cv(d))
        results[comp].append(
            SVM(optimizer='liblinear', loss='l1', C=1).stratifiedCV(d, seed=1))
        results[comp].append(
            SVM(optimizer='liblinear', loss='l2', C=1).stratifiedCV(d, seed=1))

    comp = 'kernelData'
    if component == 'all' or component == comp:
        d = DataSet(heartdatafile, labelsColumn=0)
        results[comp] = []
        kdata = KernelData('heart.kernel', gistFormat=True)
        kdata.attachLabels(d.labels)
        s = svm.SVM()
        results[comp].append(s.cv(kdata))
        kdata.attachKernel('gaussian', gamma=0.1)
        results[comp].append(s.cv(kdata))

    comp = 'normalization'
    if component == 'all' or component == comp:
        results[comp] = []
        data = DataSet(heartdatafile, labelsColumn=0)
        data.attachKernel('polynomial', degree=4, normalization='dices')
        s = svm.SVM()
        results[comp].append(s.cv(data))

    comp = 'svr'
    if component == 'all' or component == comp:
        d = DataSet(heartdatafile, labelsColumn=0, numericLabels=True)
        results[comp] = []
        s = svm.SVR()
        #results[comp].append(
        #    s.cv(d, saveSpace = True))
        #results[comp].append(
        #    s.trainTest(d, range(150), range(151, 250)))
        results[comp].append(s.cv(d))

    comp = 'save'
    if component == 'all' or component == comp:
        results[comp] = []
        s = svm.SVM()
        data = DataSet(heartdatafile, labelsColumn=0)
        import tempfile
        tmpfile = tempfile.mktemp()
        r = s.cv(data)
        r.save(tmpfile)
        r = resultsObjects.loadResults(tmpfile)
        results['save'].append(r)

        r = s.nCV(data)
        r.save(tmpfile)
        results['save'].append(resultsObjects.loadResults(tmpfile))

        r = {}
        for i in range(10):
            r[i] = s.cv(data)

        resultsObjects.saveResultObjects(r, tmpfile)
        r = resultsObjects.loadResults(tmpfile)

    comp = 'classifiers'
    if component == 'all' or component == comp:
        d = DataSet(heartdatafile, labelsColumn=0)
        results[comp] = []
        cl = knn.KNN()
        results[comp].append(cl.stratifiedCV(d))
        print 'testing ridge regression'
        ridge = ridgeRegression.RidgeRegression()
        #results[comp].append(
        #    ridge.cv(d))

    comp = 'platt'
    if component == 'all' or component == comp:
        results[comp] = []
        d = DataSet(heartdatafile, labelsColumn=0)
        p = platt.Platt2(s)
        results[comp].append(p.stratifiedCV(d))

    comp = 'save'
    if component == 'all' or component == comp:
        data = DataSet(heartdatafile, labelsColumn=0)
        s = SVM()
        s.train(data)
        s.save('model.pyml')
        s2 = SVM()
        s2.load('model.pyml', data)
        results[comp].append(s2.test(data))

        d = DataSet(heartdatafile, labelsColumn=0)
        kdata = KernelData('heart.kernel', gistFormat=True)
        kdata.attachLabels(d.labels)
        s = svm.SVM()
        s.train(data)
        s.save('model2.pyml')
        s2 = SVM()
        s2.load('model2.pyml', kdata)
        results[comp].append(s2.test(kdata))

        data = DataSet(irisdatafile, labelsColumn=-1)
        mc = multi.OneAgainstRest(SVM())
        mc.train(data)
        mc.save('iris.pyml')
        mc = multi.OneAgainstRest(SVM())
        mc.load('iris.pyml', data)
        results[comp].append(mc.test(data))

    comp = 'multi'
    if component == 'all' or component == comp:
        results[comp] = []
        d = DataSet(irisdatafile, labelsColumn=-1)

        mc = multi.OneAgainstOne(svm.SVM())
        results[comp].append(mc.cv(d))

        d = DataSet(irisdatafile, labelsColumn=-1)

        mc = multi.OneAgainstRest(svm.SVM())
        results[comp].append(mc.cv(d))

        mc = multi.OneAgainstRest(svm.SVM())
        d.attachKernel('poly')
        results[comp].append(mc.cv(d))
        d.attachKernel('linear')
        mc = multi.OneAgainstRest(svm.SVM())
        #kdata = datafunc.KernelData('iris.linear.kernel',
        #                            labelsFile = 'irisY.csv', labelsColumn = 0, gistFormat = True)
        #results[comp].append(mc.cv(kdata))

    comp = 'featsel'
    if component == 'all' or component == comp:
        results[comp] = []

        s = svm.SVM()
        d = DataSet(yeastdatafile, labelsColumn=0)
        d2 = labels.oneAgainstRest(d, '2')
        results[comp].append(s.stratifiedCV(d2, seed=1))

        # feature selection
        m = composite.FeatureSelect(s, featsel.RFE())
        results[comp].append(m.stratifiedCV(d2, seed=1))

        d = DataSet(yeastdatafile, labelsColumn=0)
        d2 = labels.oneAgainstRest(d, '2')
        fs = featsel.FeatureScore('golub')
        f = featsel.Filter(fs, sigma=2)
        m = composite.FeatureSelect(s, f)
        results[comp].append(m.stratifiedCV(d2, seed=1))

        # same thing but with a Chain:
        c = composite.Chain([f, s])
        #r = c.stratifiedCV (d2)

    comp = 'modelSelection'
    if component == 'all' or component == comp:
        results[comp] = []
        s = svm.SVM()
        d = DataSet(heartdatafile, labelsColumn=0)
        p = modelSelection.ParamGrid(svm.SVM(ker.Polynomial()), 'C',
                                     [0.1, 1, 10, 100], 'kernel.degree',
                                     [2, 3, 4])
        p = modelSelection.ParamGrid(svm.SVM(ker.Gaussian()), 'C',
                                     [0.1, 1, 10, 100], 'kernel.gamma',
                                     [0.01, 0.1, 1])
        #p = modelSelection.Param(svm.SVM(), 'C', [0.1, 1, 10, 100])

        m = modelSelection.ModelSelector(p, measure='roc', foldsToPerform=2)
        m = modelSelection.ModelSelector(p)
        #m = modelSelection.SVMselect()
        results[comp].append(m.cv(d))

    comp = 'preproc'
    if component == 'all' or component == comp:
        results[comp] = []

        s = svm.SVM()
        d = DataSet(yeastdatafile, labelsColumn=0)
        d2 = labels.oneAgainstRest(d, '2')
        results[comp].append(s.stratifiedCV(d2))
        p = preproc.Standardizer()
        p.train(d2)
        results[comp].append(s.stratifiedCV(d2))
        print p.scale
        print p.translation

    return results
Beispiel #5
0
def inDomainTest(trainModelFile, trainSpectrumData, testSpectrumData, resultsFile):
	new_svm = SVM()
	new_svm.load(trainModelFile, trainSpectrumData)
	results = new_svm.test(testSpectrumData)

	demo_utils.print_results(results, resultsFile)
Beispiel #6
0
def test (component = 'svm', **args) :

    if 'container' in args :
        container = args['container']
    else :
        container = 'SparseDataSet'

    try :
        DataSet = getattr(vectorDatasets, container)
    except :
        raise ValueError, 'wrong container ' + container

    s = svm.SVM()

    results = {}

    comp = 'general'
    if component == 'all' or component == comp :
        d = DataSet (heartdatafile, labelsColumn = 0)
        s.train(d)
        s.test(d)
        s = svm.SVM()
        s.stratifiedCV(d)
        d2 = Aggregate([d,d])
        #r = s.stratifiedCV(d2)
        #r = s.loo(d)
        d.attachKernel('polynomial')
        s.cv(d)
        d.attachKernel('linear')
        s = svm.SVM()
        s.train(d)

        d = DataSet(numpy.random.randn(100,10))
        d = DataSet([[1,2], [2,3]])
        #d = SequenceData(['asa', 'ben', 'hur'])
        
    comp = 'svm'
    if component == 'all' or component == comp :
        d = DataSet (heartdatafile, labelsColumn = 0)
        results[comp] = []
        d.attachKernel('polynomial')
        results[comp].append(
            s.cv(d, saveSpace = True))
        d.attachKernel('linear')
        results[comp].append(
            s.cv(d))
        results[comp].append(SVM(optimizer = 'liblinear', loss = 'l1', C=1).stratifiedCV(d, seed = 1))
        results[comp].append(SVM(optimizer = 'liblinear', loss = 'l2', C=1).stratifiedCV(d, seed = 1))
        
    comp = 'kernelData'
    if component == 'all' or component == comp :
        d = DataSet (heartdatafile, labelsColumn = 0)
        results[comp] = []
        kdata = KernelData('heart.kernel', gistFormat = True)
        kdata.attachLabels(d.labels)
        s=svm.SVM()
        results[comp].append(
            s.cv(kdata))
        kdata.attachKernel('gaussian', gamma = 0.1)
        results[comp].append(
            s.cv(kdata))

    comp = 'normalization'
    if component == 'all' or component == comp :
        results[comp] = []
        data = DataSet (heartdatafile, labelsColumn = 0)
        data.attachKernel('polynomial', degree = 4, normalization = 'dices')
        s=svm.SVM()
        results[comp].append(
            s.cv(data))

    comp = 'svr'
    if component == 'all' or component == comp :
        d = DataSet (heartdatafile, labelsColumn = 0, numericLabels = True)
        results[comp] = []
        s = svm.SVR()
        #results[comp].append(
        #    s.cv(d, saveSpace = True))
        #results[comp].append(
        #    s.trainTest(d, range(150), range(151, 250)))
        results[comp].append( s.cv(d) )

    comp = 'save'
    if component == 'all' or component == comp :
        results[comp] = []
        s = svm.SVM()
        data = DataSet (heartdatafile, labelsColumn = 0)
        import tempfile
        tmpfile = tempfile.mktemp()
        r = s.cv(data)
        r.save(tmpfile)
        r = resultsObjects.loadResults(tmpfile)
        results['save'].append(r)


        r = s.nCV(data)
        r.save(tmpfile)
        results['save'].append(resultsObjects.loadResults(tmpfile))

        r = {}
        for i in range(10) :
            r[i] = s.cv(data)

        resultsObjects.saveResultObjects(r, tmpfile)
        r = resultsObjects.loadResults(tmpfile)
        
    comp = 'classifiers'
    if component == 'all' or component == comp :
        d = DataSet (heartdatafile, labelsColumn = 0)
        results[comp] = []
        cl = knn.KNN()
        results[comp].append(
            cl.stratifiedCV(d))
        print 'testing ridge regression'
        ridge = ridgeRegression.RidgeRegression()
        #results[comp].append(
        #    ridge.cv(d))

    comp = 'platt'
    if component == 'all' or component == comp :
        results[comp] = []
        d = DataSet (heartdatafile, labelsColumn = 0)
        p = platt.Platt2(s)
        results[comp].append(p.stratifiedCV(d))

    comp = 'save'
    if component == 'all' or component == comp :        
        data = DataSet (heartdatafile, labelsColumn = 0)
        s = SVM()
        s.train(data)
        s.save('model.pyml')
        s2 = SVM()
        s2.load('model.pyml', data)
        results[comp].append(s2.test(data))

        d = DataSet (heartdatafile, labelsColumn = 0)
        kdata = KernelData('heart.kernel', gistFormat = True)
        kdata.attachLabels(d.labels)
        s=svm.SVM()
        s.train(data)
        s.save('model2.pyml')
        s2 = SVM()
        s2.load('model2.pyml', kdata)
        results[comp].append(s2.test(kdata))

        data = DataSet(irisdatafile, labelsColumn = -1)
        mc = multi.OneAgainstRest(SVM())
        mc.train(data)
        mc.save('iris.pyml')
        mc = multi.OneAgainstRest(SVM())
        mc.load('iris.pyml', data)
        results[comp].append(mc.test(data))

    comp = 'multi'
    if component == 'all' or component == comp :
        results[comp] = []
        d = DataSet(irisdatafile, labelsColumn = -1)

        mc = multi.OneAgainstOne (svm.SVM())
        results[comp].append(
            mc.cv(d))

        d = DataSet(irisdatafile, labelsColumn = -1)
        
        mc = multi.OneAgainstRest (svm.SVM())
        results[comp].append(
            mc.cv(d))

        mc = multi.OneAgainstRest (svm.SVM())
        d.attachKernel('poly')
        results[comp].append(
            mc.cv(d))
        d.attachKernel('linear')
        mc = multi.OneAgainstRest (svm.SVM())
        #kdata = datafunc.KernelData('iris.linear.kernel',
        #                            labelsFile = 'irisY.csv', labelsColumn = 0, gistFormat = True)
        #results[comp].append(mc.cv(kdata))

        
    comp = 'featsel'
    if component == 'all' or component == comp :
        results[comp] = []
        
        s = svm.SVM()
        d = DataSet (yeastdatafile, labelsColumn = 0)
        d2 = labels.oneAgainstRest(d, '2')
        results[comp].append(
            s.stratifiedCV(d2, seed = 1))

        # feature selection
        m = composite.FeatureSelect (s, featsel.RFE())
        results[comp].append(
            m.stratifiedCV(d2, seed = 1))

        d = DataSet (yeastdatafile, labelsColumn = 0)
        d2 = labels.oneAgainstRest(d, '2')
        fs = featsel.FeatureScore ('golub')
        f = featsel.Filter (fs, sigma = 2)
        m = composite.FeatureSelect (s, f)
        results[comp].append(
            m.stratifiedCV(d2, seed = 1))

        # same thing but with a Chain:
        c = composite.Chain ([f,s]) 
        #r = c.stratifiedCV (d2)

    comp = 'modelSelection'
    if component == 'all' or component == comp :
        results[comp] = []
        s = svm.SVM()
        d = DataSet (heartdatafile, labelsColumn = 0)
        p = modelSelection.ParamGrid(svm.SVM(ker.Polynomial()), 'C', [0.1, 1, 10, 100],
                                     'kernel.degree', [2, 3, 4])
        p = modelSelection.ParamGrid(svm.SVM(ker.Gaussian()), 'C', [0.1, 1, 10, 100],
                                     'kernel.gamma', [0.01, 0.1, 1])
        #p = modelSelection.Param(svm.SVM(), 'C', [0.1, 1, 10, 100])

        m = modelSelection.ModelSelector(p, measure = 'roc', foldsToPerform = 2)
        m = modelSelection.ModelSelector(p)
        #m = modelSelection.SVMselect()
        results[comp].append(
            m.cv(d))

    comp = 'preproc'
    if component == 'all' or component == comp :
        results[comp] = []
        
        s = svm.SVM()
        d = DataSet (yeastdatafile, labelsColumn = 0)
        d2 = labels.oneAgainstRest(d, '2')
        results[comp].append(
            s.stratifiedCV(d2))
        p = preproc.Standardizer()
        p.train(d2)
        results[comp].append(
            s.stratifiedCV(d2))
        print p.scale
        print p.translation

    return results