def main(): DS = ClassificationDataSet.loadFromFile("ClassifierDataSet") print DS.calculateStatistics() print type(DS) rates = measuredLearning(DS)
# Produce two new datasets, the first one containing the fraction given by proportion of the samples. # splitWithProportion(proportion=0.5) # print len(ds) # tstdata, trndata = alldata.splitWithProportion( 0.25 ) # for input, target in ds: # print input,target # print ds['input'] # print ds['target'] #hidden class by default sigmoid all_data=ClassificationDataSet.loadFromFile("nn-data") # tstdata, trndata = all_data.splitWithProportion( 0.25 ) tstdata_temp, partdata_temp = all_data.splitWithProportion( 0.25 ) trndata_temp,validata_temp = partdata_temp.splitWithProportion(0.50) tstdata = ClassificationDataSet(200, 1, nb_classes=2) for n in xrange(0, tstdata_temp.getLength()): tstdata.addSample( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1] ) trndata = ClassificationDataSet(200, 1, nb_classes=2) for n in xrange(0, trndata_temp.getLength()): trndata.addSample( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1] ) validata= ClassificationDataSet(200, 1, nb_classes=2)
from pybrain.structure import RecurrentNetwork, FeedForwardNetwork from pybrain.structure import LinearLayer, SigmoidLayer, TanhLayer from pybrain.structure import FullConnection from pybrain.datasets import SupervisedDataSet, ClassificationDataSet from pybrain.utilities import percentError from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure.modules import SoftmaxLayer, BiasUnit from pylab import ion, ioff, figure, draw, contourf, clf, show, hold, plot from scipy import diag, arange, meshgrid, where from numpy.random import multivariate_normal from numpy import array_equal import pickle DSSuperRaw = SupervisedDataSet.loadFromFile("Data/DSSuperRaw") DSClassRaw = ClassificationDataSet.loadFromFile("Data/DSClassRaw") DSSuperWhiten = SupervisedDataSet.loadFromFile("Data/DSSuperWhiten") DSClassWhiten = ClassificationDataSet.loadFromFile("Data/DSClassWhiten") DSSuperNorm = SupervisedDataSet.loadFromFile("Data/DSSuperNorm") DSClassNorm = ClassificationDataSet.loadFromFile("Data/DSClassNorm") layers = (14, 14, 8) net = buildNetwork(*layers, hiddenclass=TanhLayer, bias=True, outputbias=True, outclass=SoftmaxLayer, recurrent=True) TrainDS, TestDS = DSSuperNorm.splitWithProportion(0.7) # TrainDS._convertToOneOfMany() # TestDS._convertToOneOfMany()
from pybrain.datasets import ClassificationDataSet print "Reading data set..." DS = ClassificationDataSet.loadFromFile('dataset.csv') #Split validation set TestDS, TrainDS = DS.splitWithProportion( 0.25 ) #train svm from svm import svm_problem, svm_parameter, libsvm, gen_svm_nodearray #define problem with data from the pybrain dataset. # best python explanation for libsvm is here: https://github.com/arnaudsj/libsvm/tree/master/python #we have to convert the data to ints and lists because of the low-level c interface prob = svm_problem([int(t) for t in TrainDS['target']],[list(i) for i in TrainDS['input']]) param = svm_parameter() # option: -t 0: linear kernel. Best for classification. # option: -c 0.01: regularization parameter. smaller is more regularization # see below for all options param.parse_options('-t 0 -c 0.01') print "Training svm..." model = libsvm.svm_train(prob,param) print "Testing svm with three random inputs" from random import randrange for j in range(3): i = randrange(0,len(TestDS)) #again some conversion needed because of low level interface x0,m_idx = gen_svm_nodearray(list(TestDS['input'][i])) prediction = libsvm.svm_predict(model, x0) print("Target:{0}, prediction:{1}".format(TestDS['target'][i],prediction))
def genet(lst): n, lg, nhide, nhide1, epo, wd = lst ds = ClassificationDataSet.loadFromFile('r10000_l' + str(lg) + '.dat') fna = fname1(n, lg, nhide, nhide1, epo, wd) nt = trainet2(ds, nhide, nhide1, epo, wd, fn=fna) NetworkWriter.writeToFile(nt, filename=fna + '.xml')
from pybrain.datasets import ClassificationDataSet print "Reading data set..." DS = ClassificationDataSet.loadFromFile('dataset.csv') #Split validation set TestDS, TrainDS = DS.splitWithProportion(0.25) #train svm from svm import svm_problem, svm_parameter, libsvm, gen_svm_nodearray #define problem with data from the pybrain dataset. # best python explanation for libsvm is here: https://github.com/arnaudsj/libsvm/tree/master/python #we have to convert the data to ints and lists because of the low-level c interface prob = svm_problem([int(t) for t in TrainDS['target']], [list(i) for i in TrainDS['input']]) param = svm_parameter() # option: -t 0: linear kernel. Best for classification. # option: -c 0.01: regularization parameter. smaller is more regularization # see below for all options param.parse_options('-t 0 -c 0.01') print "Training svm..." model = libsvm.svm_train(prob, param) print "Testing svm with three random inputs" from random import randrange for j in range(3): i = randrange(0, len(TestDS)) #again some conversion needed because of low level interface x0, m_idx = gen_svm_nodearray(list(TestDS['input'][i])) prediction = libsvm.svm_predict(model, x0)