Exemple #1
0
def big_training(np_data, num_nets=1, num_epoch=20, net_builder=net_full, train_size=.1, testing=False):
    sss = cross_validation.StratifiedShuffleSplit(np_data[:,:1].ravel(), n_iter=num_nets , test_size=1-train_size, random_state=3476)
    nets=[None for net_ind in range(num_nets)]
    trainaccu=[[0 for i in range(num_epoch)] for net_ind in range(num_nets)]
    testaccu=[[0 for i in range(num_epoch)] for net_ind in range(num_nets)]
    net_ind=0
    for train_index, test_index in sss:
        print ('%s Building %d. network.' %(time.ctime(), net_ind+1))
        #print("TRAIN:", len(train_index), "TEST:", len(test_index))
        trainset = ClassificationDataSet(np_data.shape[1] - 1, 1)
        trainset.setField('input', np_data[train_index,1:]/100-.6)
        trainset.setField('target', np_data[train_index,:1])
        trainset._convertToOneOfMany( )
        trainlabels = trainset['class'].ravel().tolist()
        if testing:
            testset = ClassificationDataSet(np_data.shape[1] - 1, 1)
            testset.setField('input', np_data[test_index,1:]/100-.6)
            testset.setField('target', np_data[test_index,:1])
            testset._convertToOneOfMany( )
            testlabels = testset['class'].ravel().tolist()
        nets[net_ind] = net_builder()
        trainer = BackpropTrainer(nets[net_ind], trainset)
        for i in range(num_epoch):
            for ii in range(3):
                err = trainer.train()
            print ('%s Epoch %d: Network trained with error %f.' %(time.ctime(), i+1, err))
            trainaccu[net_ind][i]=accuracy_score(trainlabels,trainer.testOnClassData())
            print ('%s Epoch %d: Train accuracy is %f' %(time.ctime(), i+1, trainaccu[net_ind][i]))
            print ([sum([trainaccu[y][i]>tres for y in range(net_ind+1)]) for tres in [0,.1,.2,.3,.4,.5,.6]])
            if testing:
                testaccu[net_ind][i]=accuracy_score(testlabels,trainer.testOnClassData(testset))
                print ('%s Epoch %d: Test accuracy is %f' %(time.ctime(), i+1, testaccu[net_ind][i]))
        NetworkWriter.writeToFile(nets[net_ind], 'nets/'+net_builder.__name__+str(net_ind)+'.xml')
        net_ind +=1
    return [nets, trainaccu, testaccu]
Exemple #2
0
def ann(training_filename , testing_filename,itr,epoch,model_type):
    training_start_time = "The generation of data set and training started at :%s" % datetime.datetime.now()
    training_dataset            = np.genfromtxt(training_filename, skip_header=0,dtype="int", delimiter='\t' )
    data = ClassificationDataSet(len(training_dataset[0])-1, 2, nb_classes=2)
    for aSample in training_dataset:
        data.addSample(aSample[0:len(aSample)-1],[aSample[len(aSample)-1]] );
        
    #  
    data._convertToOneOfMany( )

    fann = buildNetwork(314,2,outclass=SoftmaxLayer);
    trainer = BackpropTrainer( fann, dataset=data, momentum=0.1, verbose=False, weightdecay=0.01)
    counter = 0;
    print training_start_time
    while(counter < itr):
        trainer.trainEpochs( epoch );
        counter = counter + 1;
    
    trnresult = percentError( trainer.testOnClassData(),data['class'] )
    trained_result_log = "epoch: %4d" % trainer.totalepochs, \
          "  train error: %5.2f%%" % trnresult;
    
    
    training_time_end = "The training and result logging ended at %s :" % datetime.datetime.now()
    
    filename = working_dir + "\models\\"+model_type + ".obj"
    save_trained_model(fann, filename)
    
    log_file.write("\n" + training_start_time+"\n")
    log_file.write(str(trained_result_log)+"\n")
    log_file.write(training_time_end+"\n")
def classify(Xtrain, Ytrain, n_hidden=5):
    """ Use entirety of provided X, Y to predict

    Arguments
    Xtrain -- Training data
    Ytrain -- Training prediction

    Returns
    classifier -- a classifier fitted to Xtrain and Ytrain
    """

    # PyBrain expects data in its DataSet format
    trndata = ClassificationDataSet(Xtrain.shape[1], nb_classes=2)
    trndata.setField('input', Xtrain)
    # Apprently, arrays don't work here as they try to access second dimension size...
    trndata.setField('target', mat(Ytrain).transpose())

    trndata._convertToOneOfMany() # one output neuron per class

    # build neural net and train it
    net = buildNetwork(trndata.indim, n_hidden, trndata.outdim, outclass=SoftmaxLayer)
    trainer = BackpropTrainer(net, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01)

    trainer.trainUntilConvergence()
    #trainer.trainEpochs(5)

    print "trained"
    #trainer.trainEpochs(5)

    # Return a functor that wraps calling predict
    return NeuralNetworkClassifier(trainer)
Exemple #4
0
def run_nn_fold(training_data, test_data):
    test_features, ignore, featureMap, labels, labelMap = fs.mutualinfo(training_data)

    input_len = len(test_features[0])
    num_classes = len(labelMap.keys())
    train_ds = ClassificationDataSet(input_len, 1,nb_classes=num_classes)
    for i in range(len(test_features)):
        train_ds.addSample(tuple(test_features[i]), (labels[i]))
    train_ds._convertToOneOfMany()
    net = buildNetwork(train_ds.indim, 2, train_ds.outdim, bias=True, hiddenclass=TanhLayer, outclass=SoftmaxLayer)
    trainer = BackpropTrainer(net, train_ds, verbose=True)
    print "training until convergence..."
    trainer.trainUntilConvergence(maxEpochs=100)
    print "done. testing..."


    test_ds = ClassificationDataSet(input_len, 1,nb_classes=num_classes)  

    labels = []
    for tweetinfo in test_data:
        featuresFound = tweetinfo["Features"]
        label = tweetinfo["Answer"]
        labels.append(label)
        features = [0]*len(featureMap.keys())
        for feat in featuresFound:
            if feat in featureMap:
                features[ featureMap[feat] ] = 1
        test_ds.addSample(tuple(features), (labelMap[label]))

    test_ds._convertToOneOfMany()
    tstresult = percentError( trainer.testOnClassData(
            dataset=test_ds ), test_ds['class'] )
    print tstresult
def prepare_dataset():
    # Prepare output coding. "-" is 1 "." is 0
    d_morse_array = '100' # ( 1, 0, 0 ) # D -.. - 100
    g_morse_array = '110' # ( 1, 1, 0 ) # G --. - 110
    k_morse_array = '101' # ( 1, 0, 1 ) # K -.- - 101
    o_morse_array = '111' # ( 1, 1, 1 ) # O --- - 111
    r_morse_array = '010' # ( 0, 1, 0 ) # R .-. - 010
    s_morse_array = '000' # ( 0, 0, 0 ) # S ... - 000
    u_morse_array = '001' # ( 0, 0, 1 ) # U ..- - 001
    w_morse_array = '011' # ( 0, 1, 1 ) # W .-- - 011
    # Load learning data
    d_array = read_array( "d" )
    g_array = read_array( "g" )
    k_array = read_array( "k" )
    o_array = read_array( "o" )
    r_array = read_array( "r" )
    s_array = read_array( "s" )
    u_array = read_array( "u" )
    w_array = read_array( "w" )
    # Create dataset
    dataset = ClassificationDataSet( 1600, nb_classes=8, class_labels=[d_morse_array,g_morse_array,k_morse_array,o_morse_array,r_morse_array,s_morse_array,u_morse_array,w_morse_array] )
    # add all samples to dataset
    dataset.addSample( d_array, [0] )
    dataset.addSample( g_array, [1] )
    dataset.addSample( k_array, [2] )
    dataset.addSample( o_array, [3] )
    dataset.addSample( r_array, [4] )
    dataset.addSample( s_array, [5] )
    dataset.addSample( u_array, [6] )
    dataset.addSample( w_array, [7] )
    dataset._convertToOneOfMany( )
    return dataset
def simpleNeuralNetworkTrain(fileName, numFeatures, numClasses, possibleOutputs, numHiddenNodes, numTrainingEpochs):

    data = np.genfromtxt(fileName)
    trnIn = data[:, 0:5]
    trnOut = data[:, 6]
    trnOut = [int(val) for val in trnOut]

    normalizeData(trnIn, numFeatures)
    trndata = ClassificationDataSet(numFeatures, possibleOutputs, nb_classes=numClasses)
    for row in range(0, len(trnIn)):
        tempListOut = []
        tempListIn = []
        tempListOut.append(int(trnOut[row]))
        for i in range(0, numFeatures):
            tempListIn.append(trnIn[row][i])
        trndata.addSample(tempListIn, tempListOut)

    trndata._convertToOneOfMany()

    #  When running for the first time
    myNetwork = buildNetwork(numFeatures, numHiddenNodes, numClasses, outclass=SoftmaxLayer, bias=True, recurrent=False)

    # Read from file after the first try.
    #  myNetwork = NetworkReader.readFrom('firstTime.xml')    # Use saved results.
    trainer = BackpropTrainer(myNetwork, dataset=trndata, momentum=0.0, verbose=True, weightdecay=0.0)
    for i in range(numTrainingEpochs):
        trainer.trainOnDataset(dataset=trndata)
Exemple #7
0
class neuralNetwork():

	def __init__( self, n_classes ):
		self.n_classes = n_classes

	def fit( self, X, Y ):
		n_features = X.shape[1]
		self.train_ds = ClassificationDataSet( n_features, 1, nb_classes = self.n_classes )
		for train, target in zip( X, Y ):
			self.train_ds.addSample( train, [target] )

		self.train_ds._convertToOneOfMany( )

		self.net = buildNetwork( self.train_ds.indim, 2*n_features, self.train_ds.outdim, outclass = SoftmaxLayer )
		self.trainer = BackpropTrainer( self.net, self.train_ds )

	def predict( self, X ):
		n_features = X.shape[1]
		self.test_ds = ClassificationDataSet( n_features, 1, nb_classes = self.n_classes )
		for test in X:
			self.test_ds.addSample( test, [1] )

		self.test_ds._convertToOneOfMany( )

		for i in range( 100 ):
			self.trainer.trainEpochs( 5 )
			self.labels = self.net.activateOnDataset( self.test_ds )
			self.labels = self.labels.argmax(axis=1)
		return self.labels
Exemple #8
0
class EightBitBrain(object):
    
    def __init__(self, dataset, inNodes, outNodes, hiddenNodes, classes):
        self.__dataset = ClassificationDataSet(inNodes, classes-1)
        for element in dataset:
            self.addDatasetSample(self._binaryList(element[0]), element[1])
        self.__dataset._convertToOneOfMany()
        self.__network = buildNetwork(inNodes, hiddenNodes, self.__dataset.outdim, recurrent=True)
        self.__trainer = BackpropTrainer(self.__network, learningrate = 0.01, momentum = 0.99, verbose = True)
        self.__trainer.setData(self.__dataset)

    def _binaryList(self, n):
        return [int(c) for c in "{0:08b}".format(n)]
    
    def addDatasetSample(self, argument, target):
        self.__dataset.addSample(argument, target)

    def train(self, epochs):
        self.__trainer.trainEpochs(epochs)
    
    def activate(self, information):
        result = self.__network.activate(self._binaryList(information))
        highest = (0,0)
        for resultClass in range(len(result)):
            if result[resultClass] > highest[0]:
                highest = (result[resultClass], resultClass)
        return highest[1]
	def test(self,filename,classes,trainer,net):
		testLabels = []

		#load test data
		tstdata = ClassificationDataSet(103, 1, nb_classes=classes)
		tstdata = self.loaddata(filename, classes)

		testLabels = tstdata['target'];

		# some sort of mandatory conversion
		tstdata._convertToOneOfMany()
		
		# using numpy array
		output = np.array([net.activate(x) for x, _ in tstdata])
		output = output.argmax(axis=1)
		print(output)
		print("on test data",percentError( output, tstdata['class'] ))

		for i, l in enumerate(output):
			print l, '->', testLabels[i][0]

		# alternate version - using activateOnDataset function
		out = net.activateOnDataset(tstdata).argmax(axis=1)
		print out
		return percentError( out, tstdata['class'])
Exemple #10
0
def build_sample_nn():
	means = [(-1,0),(2,4),(3,1)]
	cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])]
	alldata = ClassificationDataSet(2, 1, nb_classes=3)
	for n in xrange(400):
	      for klass in range(3):
	                input = multivariate_normal(means[klass],cov[klass])
	                alldata.addSample(input, [klass])

	tstdata_temp, trndata_temp = alldata.splitWithProportion(0.25)

	tstdata = ClassificationDataSet(2, 1, nb_classes=3)
	for n in xrange(0, tstdata_temp.getLength()):
	    tstdata.addSample( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1] )

	trndata = ClassificationDataSet(2, 1, nb_classes=3)
	for n in xrange(0, trndata_temp.getLength()):
	    trndata.addSample( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1] )

	trndata._convertToOneOfMany( )
	tstdata._convertToOneOfMany( )

	fnn = buildNetwork( trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer )


	trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01)

	return trainer, fnn, tstdata
def get_ds_for_pybrain(X,y):
	ds = ClassificationDataSet(2127,nb_classes=5)
	tuples_X = [tuple(map(float,tuple(x))) for x in X.values]
	tuples_y = [tuple(map(float,(y,))) for y in y.values]
	for X,y in zip(tuples_X,tuples_y):
		ds.addSample(X,y)
	ds._convertToOneOfMany()
	return ds
def main():
    random.seed(50)
    data, digit = read_data(DATAFILE)
    
#     ds = ClassificationDataSet(64, 1, nb_classes=10)
#     
#     
#     for i in xrange(len(data)):
#         ds.addSample(data[i], [digit[i]])
#     ds._convertToOneOfMany()
#     
#     simple_network(data, digit, ds)
#     one_hidden_layer(data, digit, ds)
    n_folds = 5
    perms = np.array_split(np.arange(len(data)), n_folds)
    simple_results = []
    one_hl_results = []
    creative_results = []
    for i in xrange(n_folds):
        train_ds = ClassificationDataSet(64, 1, nb_classes = 10)
        test_ds = ClassificationDataSet(64, 1, nb_classes = 10)
        
        train_perms_idxs = range(n_folds)
        train_perms_idxs.pop(i)
        temp_list = []
        for train_perms_idx in train_perms_idxs:
            temp_list.append(perms[ train_perms_idx ])
        train_idxs = np.concatenate(temp_list)
        
        for idx in train_idxs:
            train_ds.addSample(data[idx], [digit[idx]])
        train_ds._convertToOneOfMany()

        # determine test indices
        test_idxs = perms[i]
        for idx in test_idxs:
            test_ds.addSample(data[idx], [digit[idx]])
        test_ds._convertToOneOfMany()
        
        simple_results.append(simple_network(data, digit, train_ds, test_ds))
        one_hl_results.append(one_hidden_layer(data, digit, train_ds, test_ds))
        creative_results.append(creative_network(data, digit, train_ds, test_ds))
        
    for i in xrange(len(simple_results)):
        print 'Simple %f : Hidden %f : Creative %f' % (simple_results[i],
                                                    one_hl_results[i],
                                                    creative_results[i])
    print 'Simple mean: %f' % np.mean(simple_results)
    print 'One hidden layer mean: %f' % np.mean(one_hl_results)
    print 'Creative mean : %f' % np.mean(creative_results)
    
     
    print "Simple vs onehl"
    paired_t_test(simple_results, one_hl_results)
    print "simple vs creative"
    paired_t_test(simple_results, creative_results)
    print "onehl vs creative"
    paired_t_test(one_hl_results, creative_results)
Exemple #13
0
def nn_classify():
    # train_X,Y = load_svmlight_file('data/train_metrix')
    # rows = pd.read_csv('data/log_test2.csv',index_col=0).sort_index().index.unique()
    # train_X = pd.read_csv('data/train_tfidf.csv',index_col=0)
    # test_X = pd.read_csv('data/test_tfidf.csv',index_col=0)
    # select = SelectPercentile(f_classif, percentile=50)
    # select.fit(train_X,Y)
    # train_X = select.transform(train_X)
    # test_X = select.transform(test_X)
    # print 'dump train...'
    # dump_svmlight_file(train_X,Y,'data/train_last')
    # test_Y = [0]*(test_X.shape[0])
    # print 'dump test...'
    # dump_svmlight_file(test_X,test_Y,'data/test_last')

    train_X,Y = load_svmlight_file('data/train_last')
    test_X,test_Y = load_svmlight_file('data/test_last')
    train_X = train_X.toarray()
    test_X = test_X.toarray()
    Y = [int(y)-1 for y in Y]
    print 'Y:',len(Y)
    rows = pd.read_csv('data/log_test2.csv',index_col=0).sort_index().index.unique()
    train_n = train_X.shape[0]
    m = train_X.shape[1]
    test_n = test_X.shape[0]
    print train_n,m,#test_n
    train_data = ClassificationDataSet(m,1,nb_classes=12)
    test_data = ClassificationDataSet(m,1,nb_classes=12)
    # test_data = ClassificationDataSet(test_n,m,nb_classes=12)
    for i in range(train_n):
        train_data.addSample(np.ravel(train_X[i]),Y[i])
    for i in range(test_n):
        test_data.addSample(test_X[i],Y[i])
    trndata = train_data
    # tstdata = train_data

    trndata._convertToOneOfMany()
    # tstdata._convertToOneOfMany()
    test_data._convertToOneOfMany()

     # 先用训练集训练出所有的分类器
    print 'train classify...'
    fnn = buildNetwork( trndata.indim, 400 , trndata.outdim, outclass=SoftmaxLayer )
    trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, learningrate=0.01 , verbose=True, weightdecay=0.01)
    trainer.trainEpochs(3)
    # print 'Percent Error on Test dataset: ' , percentError( trainer.testOnClassData (
    #            dataset=tstdata )
    #            , )
    print 'end train classify'
    pre_y = trainer.testOnClassData(dataset=trndata)
    print metrics.classification_report(Y,pre_y)
    pre_y = trainer.testOnClassData(dataset=test_data)
    print 'write result...'
    print 'before:',pre_y[:100]
    pre_y = [int(y)+1 for y in pre_y]
    print 'after:',pre_y[:100]
    DataFrame(pre_y,index=rows).to_csv('data/info_test2.csv', header=False)
    print 'end...'
Exemple #14
0
    def __init__(self, data, targets, cv_data, cv_targets, extra, layers, epochs=1, smoothing=1, new=True, filename_in=False):
        
        if len(cv_data) != len(cv_targets): raise Exception("Number of CV data and CV targets must be equal")
        if len(data) != len(targets): raise Exception("Number of data and targets must be equal")

        if new:
            class_tr_targets = [str(int(t[0]) - 1) for t in targets] # for pybrain's classification datset
            print "...training the DNNRegressor"
            if len(layers) > 2: # TODO testing only
                net = DNNRegressor(data, extra, class_tr_targets, layers, hidden_layer="TanhLayer", final_layer="SoftmaxLayer", compression_epochs=epochs, bias=True, autoencoding_only=False)
                print "...running net.fit()"
                net = net.fit()
            elif len(layers) == 2:
                net = buildNetwork(layers[0], layers[-1], outclass=SoftmaxLayer, bias=True)

            ds = ClassificationDataSet(len(data[0]), 1, nb_classes=9)
            bag = 1
            noisy, _ = self.dropout(data, noise=0.0, bag=bag, debug=True)
            bagged_targets = []
            for t in class_tr_targets:
                for b in range(bag):
                    bagged_targets.append(t)
            for i,d in enumerate(noisy):
                t = bagged_targets[i]
                ds.addSample(d, t)
            ds._convertToOneOfMany()

            print "...smoothing for epochs: ", smoothing
            self.model = net
            preds = [self.predict(d) for d in cv_data]
            cv = score(preds, cv_targets, debug=False)
            preds = [self.predict(d) for d in data]
            tr = score(preds, targets, debug=False)
            trainer = BackpropTrainer(net, ds, verbose=True, learningrate=0.0008, momentum=0.04, weightdecay=0.05) # best score 0.398 after 50 compression epochs and 200 epochs with lr=0.0008, weightdecay=0.05, momentum=0.04. Used dropout of 0.2 in compression, 0.5 in softmax pretraining, and no dropout in smoothing.
            print "Train score before training: ", tr
            print "CV score before training: ", cv
            for i in range(smoothing):
                trainer.train()
                self.model = net
                preds = [self.predict(d) for d in cv_data]
                cv = score(preds, cv_targets, debug=False)
                preds = [self.predict(d) for d in data]
                tr = score(preds, targets, debug=False)
                print "Train/CV score at epoch ", (i+1), ': ', tr, '/', cv
                #if i == 1:
                    #print "...saving the model"
                    #save("data/1000_ex_4_hidden/net_epoch_1.txt", net)
                #elif i == 3:
                    #print "...saving the model"
                    #save("data/1000_ex_4_hidden/net_epoch_3.txt", net)
                #elif i == 5:
                    #print "...saving the model"
                    #save("data/1000_ex_4_hidden/net_epoch_5.txt", net)
            print "...saving the model"
            #save("data/1000_ex_4_hidden/net_epoch_10.txt", net)
        else:
            model = load(filename_in)
            self.model = model
 def createDataset(self, inputData):
     data = ClassificationDataSet(100,nb_classes=len(inputData.keys()), class_labels=inputData.keys())
     allTheLetters = string.uppercase
     for i in range(300):
         for letter in inputData.keys():
             data.addSample(inputData[letter], allTheLetters.index(letter)) 
     
     data._convertToOneOfMany([0,1])
     return data
Exemple #16
0
def makeClassificationDataSet(X, Y, nb_classes=12):
    """ dim(X) = c(n,m)
             dim(Y) = c(n,1)
             the class of Y must be 0 ,1, 2 ..., where its label starts with 0
        """
    alldata = ClassificationDataSet(inp=X.shape[1], target=1, nb_classes=nb_classes)
    [alldata.addSample(X[row, :], [Y[row]]) for row in range(X.shape[0])]
    alldata._convertToOneOfMany()
    return alldata
Exemple #17
0
def classificationDataSet(subjects=['a2','b','c1','c2'], segClass=0, db=None, seg_width=10, usePCA=True, n_components=5, isTrainingData=False):
	if not db:
		db = gyroWalkingData()

	if usePCA:
		DS = ClassificationDataSet(n_components*3, nb_classes=2)
	else:
		DS = ClassificationDataSet(21*3, nb_classes=2)
	
	for subject in subjects:
		# Initialise data
		if usePCA:
			raw = db.pca_dict(n_components=n_components, whiten=False)[subject]
		else:
			raw = db.data[subject][:,2:]
		gradients, standardDeviations = summaryStatistics(raw, std_window=seg_width)

		# Initialise segments
		if 0 <= segClass < 4:
			segs = [s for s,c in db.manual_gait_segments[subject] if c == segClass]
		else:
			segs = db.segments[subject]

		# Add data
		for i in range(0,len(raw)):

			"""
			# Look for segments in window, including those of other classes
			hasSeg = 0
			hasOtherSeg = False
			for j in range(seg_width):
				if i+j in segs:
					hasSeg = 1
				else:
					if i+j in zip(*db.manual_gait_segments[subject])[0]:
						hasOtherSeg = True
				if hasOtherSeg:
					hasSeg = 0

			# Add segments to classifier, duplicating rare classes if it is training data
			for j in range(seg_width):
				if i+j < len(raw):
					DS.appendLinked( np.concatenate( [raw[i+j],gradients[i+j],standardDeviations[i+j]] ), [hasSeg] )
					if isTrainingData and (hasSeg or hasOtherSeg):
						for i in range(0):
							DS.appendLinked( np.concatenate( [raw[i+j],gradients[i+j],standardDeviations[i+j]] ), [hasSeg] )
			"""

			hasSeg = 0
			if i in segs:
				hasSeg = 1
			DS.appendLinked( np.concatenate( [raw[i],gradients[i],standardDeviations[i]] ), [hasSeg] )
	
	DS._convertToOneOfMany()
	if isTrainingData:
		DS = balanceClassRatios(DS)
	return DS
	def livetest(self,data):
		trainer, net = self.unpickleModel()
		testData = ClassificationDataSet(103, 1, nb_classes=9)
		testData.addSample(data[0],1);
		testData._convertToOneOfMany()
		out = net.activateOnDataset(testData).argmax(axis=1)
		percentError(out, testData['class'])
		print self.labelToLetter[str(out[0])]
		return self.labelToLetter[str(out[0])]
def _createDataSet(X, Y, one_based):
    labels = np.unique(Y)
    alldata = ClassificationDataSet(X.shape[1], nb_classes = labels.shape[0], class_labels = labels)
    shift = 1 if one_based else 0
    for i in range(X.shape[0]):
        alldata.addSample(X[i], Y[i] - shift)
    
    alldata._convertToOneOfMany()
    return alldata
Exemple #20
0
def createDataset():
    data = ClassificationDataSet(100,nb_classes=len(lettersDict.keys()), class_labels=lettersDict.keys())
    allTheLetters = string.uppercase
    for letter in lettersDict.keys():
        data.addSample(lettersDict[letter], allTheLetters.index(letter)) 
    
    data._convertToOneOfMany(bounds=[0, 1])
    print data.calculateStatistics()

    return data
def buildDataset(labels, data):
	'''
	builds and returns training and test datasets from user image mappings
	'''
	DS = ClassificationDataSet(len(data[0][0].ravel()), 1, nb_classes=len(labels), class_labels=labels)

	for img, label in data:
		DS.addSample(img.ravel(), [label])
	DS._convertToOneOfMany()
	return DS
def conv2DS(Xv,yv = None) :
    if yv == None :
        yv =  np.asmatrix( np.ones( (Xv.shape[0],1) ) )
        for j in range(len(classNames)) : yv[j] = j

    C = len(unique(yv.flatten().tolist()[0]))
    DS = ClassificationDataSet(M, 1, nb_classes=C)
    for i in range(Xv.shape[0]) : DS.appendLinked(Xv[i,:].tolist()[0], [yv[i].A[0][0]])
    DS._convertToOneOfMany( )
    return DS
def main():

	in_data=np.genfromtxt('logit-train.csv', delimiter = ',')
	out_data = np.genfromtxt('logit-test.csv', delimiter = ',')

	#getting in the data from csv files and making it suitable for further action.
	in_data=in_data[~np.isnan(in_data).any(1)]
	t=len(in_data[0,:])
	y_train=np.array(in_data[0:,t-1])
	x_train=np.array(in_data[0:,:t-1])

	scaler = preprocessing.StandardScaler().fit(x_train) #standardization plays an important role in all NN algos

	x_train=scaler.transform(x_train) #final x_train

	out_data=out_data[~np.isnan(out_data).any(1)]
	t=len(out_data[0,:])
	y_test=np.array(out_data[0:,t-1])
	x_test=np.array(out_data[0:,:t-1])

	x_test=scaler.transform(x_test) # final x_test

	alltraindata=ClassificationDataSet(t-1,1,nb_classes=2)
	for count in range(len((in_data))):
		alltraindata.addSample(x_train[count],[y_train[count]])

	alltraindata._convertToOneOfMany(bounds=[0,1])

	alltestdata=ClassificationDataSet(t-1,1,nb_classes=2)
	for count in range(len((out_data))):
		alltestdata.addSample(x_test[count],[y_test[count]])

	alltestdata._convertToOneOfMany(bounds=[0,1])
	
	numRBFCenters = 10 #the 'h' value
	
	rbf=RBFNN(alltraindata.indim, alltraindata.outdim, numRBFCenters)

	rbf.train(alltraindata['input'],alltraindata['target'])
	
	testdata_target=rbf.test(alltestdata['input']) #values obtained after testing, T is a 'n x outdim' matrix
	testdata_target = testdata_target.argmax(axis=1)  # the highest output activation gives the class. Selects the class predicted
  	#testdata_target = testdata_target.reshape(len(in_data),1)	

	#compare to y_test to obtain the accuracy.
	
	# count=0
	# for x in range(len(y_test)):
	# 	if testdata_target[x] == y_test[x]:
	# 		count+=1
	# tstresult2=float(count)/float(len(y_test)) * 100

   	tstresult = percentError(testdata_target,alltestdata['class'])
   	
	print "Accuracy on test data is: %5.2f%%," % (100-tstresult)
Exemple #24
0
def trainNN(data: list, targets: list, seed):
    """
    Trains a neural network
    """
    X_tweet_counts = count_vect.fit_transform(data)

    # Compute term frequencies and store in X_train_tf
    # Compute tfidf feature values and store in X_train_tfidf
    X_train_tfidf = tfidf_transformer.fit_transform(X_tweet_counts)
    arr = X_train_tfidf.toarray()

    trainingdata = arr[:int(.75 * len(arr))]
    testdata = arr[int(.75 * len(arr)):]
    trainingtargets = targets[:int(.75 * len(targets))]
    testtargets = targets[int(.75 * len(targets)):]

    trainingds = ClassificationDataSet(len(arr[0]), 1, nb_classes=2)
    testds = ClassificationDataSet(len(arr[0]), 1, nb_classes=2)

    for index, data in enumerate(trainingdata):
        trainingds.addSample(data, trainingtargets[index])
    for index, data in enumerate(testdata):
        testds.addSample(data, testtargets[index])

    trainingds._convertToOneOfMany()
    testds._convertToOneOfMany()

    net = buildNetwork(trainingds.indim, 10, 10, 10, trainingds.outdim, outclass=SoftmaxLayer)
    trainer = BackpropTrainer(net, dataset=trainingds, learningrate=.65, momentum=.1)

    besttrain = 99.9
    besttest = 99.9
    bestresults = []
    bestclass = []

    for i in range(20):
        trainer.trainEpochs(1)
        trainresult = percentError(trainer.testOnClassData(), trainingds['class'])
        teststuff = trainer.testOnClassData(dataset=testds)
        testresult = percentError(teststuff, testds['class'])
        if testresult < besttest:
            besttest = testresult
            besttrain = trainresult
            bestresults = teststuff
            bestclass = testds['class']

        print("epoch: %2d" % trainer.totalepochs)
        print("train error: %2.2f%%" % trainresult)
        print("test error: %2.2f%%" % testresult)
    print("Best test error accuracy: {:.2f}%".format(besttest))
    print("Best test error f1 score: {:.4f}%".format(f1_score(bestclass, bestresults, average='macro')))
    print("Confusion Matrix:")
    print(confusion_matrix(bestclass, bestresults))

    return besttest
def main():
    means = [(-1,0),(2,4),(3,1)]
    cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])]
    alldata = ClassificationDataSet(2, 1, nb_classes=3)
    for n in xrange(400):
        for klass in range(3):
            input = multivariate_normal(means[klass],cov[klass])
            alldata.addSample(input, [klass])
    tstdata, trndata = alldata.splitWithProportion( 0.25 )
    trndata._convertToOneOfMany( )
    tstdata._convertToOneOfMany( )
    print "Number of training patterns: ", len(trndata)
    print "Input and output dimensions: ", trndata.indim, trndata.outdim
    print "First sample (input, target, class):"
    print trndata['input'][0], trndata['target'][0], trndata['class'][0]
    
    fnn = buildNetwork( trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer )
    trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01)
    ticks = arange(-3.,6.,0.2)
    X, Y = meshgrid(ticks, ticks)
    # need column vectors in dataset, not arrays
    griddata = ClassificationDataSet(2,1, nb_classes=3)
    for i in xrange(X.size):
        griddata.addSample([X.ravel()[i],Y.ravel()[i]], [0])
    griddata._convertToOneOfMany()  # this is still needed to make the fnn feel comfy
    
    for i in range(20):
        trainer.trainEpochs(1)
    
        trnresult = percentError( trainer.testOnClassData(),
                                  trndata['class'] )
        tstresult = percentError( trainer.testOnClassData(
               dataset=tstdata ), tstdata['class'] )
    
        print "epoch: %4d" % trainer.totalepochs, \
              "  train error: %5.2f%%" % trnresult, \
              "  test error: %5.2f%%" % tstresult
        
        out = fnn.activateOnDataset(griddata)
        out = out.argmax(axis=1)  # the highest output activation gives the class
        out = out.reshape(X.shape)
        figure(1)
        ioff()  # interactive graphics off
        clf()   # clear the plot
        hold(True) # overplot on
        for c in [0,1,2]:
            here, _ = where(tstdata['class']==c)
            plot(tstdata['input'][here,0],tstdata['input'][here,1],'o')
        if out.max()!=out.min():  # safety check against flat field
            contourf(X, Y, out)   # plot the contour
        ion()   # interactive graphics on
        draw()  # update the plot
        
    ioff()
    show()
def mlpClassifier(X,y,train_indices, test_indices, mom=0.1,weightd=0.01, epo=5):
    X_train, y_train, X_test, y_test = X[train_indices],y[train_indices], X[test_indices], y[test_indices]

    #Converting the data into a dataset which is easily understood by PyBrain. 
    tstdata = ClassificationDataSet(X.shape[1],target=1,nb_classes=8)
    trndata = ClassificationDataSet(X.shape[1],target=1,nb_classes=8)
 #   print "shape of X_train & y_train: " + str(X_train.shape) + str(y_train.shape)
    for i in range(y_train.shape[0]):
        trndata.addSample(X_train[i,:], y_train[i])
    for i in range(y_test.shape[0]):
        tstdata.addSample(X_test[i,:], y_test[i])
    trndata._convertToOneOfMany()
    tstdata._convertToOneOfMany()

    #printing the specs of data
#    print "Number of training patterns: ", len(trndata)
#    print "Input and output dimensions: ", trndata.indim, trndata.outdim
#    print "First sample (input, target, class):"
#    print trndata['input'][0], trndata['target'][0], trndata['class'][0]

    #The neural-network used
 #   print "Building Network..."
    #input layer, hidden layer of size 10(very small), output layer
    ANNc = FeedForwardNetwork()
    inLayer = LinearLayer(trndata.indim, name="ip")
    hLayer1 = TanhLayer(100, name = "h1")
    hLayer2 = SigmoidLayer(100, name = "h2")
    outLayer = SoftmaxLayer(trndata.outdim, name = "op")

    ANNc.addInputModule(inLayer)
    ANNc.addModule(hLayer1)
    ANNc.addModule(hLayer2)
    ANNc.addOutputModule(outLayer)

    ip_to_h1 = FullConnection(inLayer, hLayer1, name = "ip->h1")
    h1_to_h2 = FullConnection(hLayer1, hLayer2, name = "h1->h2")
    h2_to_op = FullConnection(hLayer2, outLayer, name = "h2->op")

    ANNc.addConnection(ip_to_h1)
    ANNc.addConnection(h1_to_h2)
    ANNc.addConnection(h2_to_op)
    ANNc.sortModules()

#    print "Done. Training the network."

    #The trainer used, in our case Back-propagation trainer
    trainer = BackpropTrainer( ANNc, dataset=trndata, momentum=mom, verbose=True, weightdecay=weightd)
    trainer.trainEpochs( epo )

    #The error
    trnresult = percentError( trainer.testOnClassData(dataset=trndata), trndata['class'] )
    tstresult = percentError( trainer.testOnClassData(dataset=tstdata ), tstdata['class'] )
 #   print "Done."
    return ANNc, trainer.totalepochs, (100 - trnresult), (100 - tstresult) 
def EvaluateArtificialNeuralNetwork(training_data, Input_features, Output_feature, NUMBER_CLASSES, HIDDEN_NEURONS, NUMBER_LAYERS, dataset_name, ParameterVal):

	X = training_data[Input_features]
	Y = training_data[Output_feature]

	ds = ClassificationDataSet(X.shape[1], nb_classes=NUMBER_CLASSES)

	for k in xrange(len(X)): 
		ds.addSample((X.ix[k,:]), Y.ix[k,:])

	tstdata_temp, trndata_temp = ds.splitWithProportion(.25)

	tstdata = ClassificationDataSet(X.shape[1], nb_classes=NUMBER_CLASSES)
	for n in xrange(0, tstdata_temp.getLength()):
		tstdata.addSample( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1] )

	trndata = ClassificationDataSet(X.shape[1], nb_classes=NUMBER_CLASSES)
	for n in xrange(0, trndata_temp.getLength()):
		trndata.addSample( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1] )

	if NUMBER_CLASSES > 1:
		trndata._convertToOneOfMany( )
		tstdata._convertToOneOfMany( )

	'''*****Actual computation with one layer and HIDDEN_NEURONS number of neurons********'''

	fnn = buildNetwork( trndata.indim, HIDDEN_NEURONS , trndata.outdim, outclass=SoftmaxLayer )

	trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=False, weightdecay=0.01)

	trainer.trainUntilConvergence(maxEpochs=3)

	trnresult = percentError( trainer.testOnClassData(), trndata['class'] )
	tstresult = percentError( trainer.testOnClassData(dataset=tstdata ), tstdata['class'] )

	print ("Accuracy with Artificial Neural Network: epoch: " + str(trainer.totalepochs) + "  TrainingSet:" + str(1-trnresult/100) + "  TestSet:" + str(1-tstresult/100))

	'''****** Graphical Representation*****'''

	'''tot_hidden_tests, X_train, X_test, Y_train, Y_test, training_error, test_error = InitiateErrorCalcData(ParameterVal, training_data[Input_features], training_data[Output_feature])

	for  hidden_unit in tot_hidden_tests:
		print ("Computing hidden unit :" + str(hidden_unit))
		model = buildNetwork( trndata.indim, hidden_unit , trndata.outdim, outclass=SoftmaxLayer )
		temp_trainer = BackpropTrainer( model, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01)
		temp_trainer.trainUntilConvergence(maxEpochs=3)
		training_error.append(MSE( temp_trainer.testOnClassData(), trndata['class'] ))
		test_error.append(MSE( temp_trainer.testOnClassData(dataset=tstdata ), tstdata['class'] ))

	PlotErrors(tot_hidden_tests, training_error, test_error, dataset_name, "Number of Hidden Units for single layer ANN", "MSE")'''

	'''*****Graphical representation with multiple layers and HIDDEN_NEURONS number of neurons********'''

	'''ffn = FeedForwardNetwork()
Exemple #28
0
def create_dataset(filename):
    dataset = ClassificationDataSet(13, 1, class_labels=['0', '1', '2'])
    football_data = FootballDataCsv(filename)
    total_min = football_data.total_min()
    total_max = football_data.total_max()
    for data in football_data:
        normalized_features = [normalize(x, min_value=total_min, max_value=total_max) for x in data.to_list()]
        dataset.addSample(normalized_features, [data.binarized_output])
    dataset.assignClasses()
    dataset._convertToOneOfMany()
    return dataset
def create_network(X,Y,testx,testy):
    numOfFeature=X.shape[1]
    numOfExample=X.shape[0]
    alldata = ClassificationDataSet(numOfFeature, 1, nb_classes=10)   #创建分类数据组
    for i in range(0,numOfExample):
        alldata.addSample(X[i], Y[i])
    alldata._convertToOneOfMany()

    numOfFeature1=testx.shape[1]
    numOfExample1=testx.shape[0]
    testdata = ClassificationDataSet(numOfFeature1, 1, nb_classes=10)   #创建分类数据组
    for i in range(0,numOfExample1):
        testdata.addSample(testx[i],testy[i])
    testdata._convertToOneOfMany()

    print alldata.indim
    print alldata.outdim
    net = FeedForwardNetwork()
    inLayer = LinearLayer(alldata.indim)
    hiddenLayer1 = SigmoidLayer(60)      #层数自己定,但是从训练效果来看,并不是网络层数和节点数越多越好
    hiddenLayer2 = SigmoidLayer(60) 
    outLayer = SoftmaxLayer(alldata.outdim)
    #bias = BiasUnit('bias')
    net.addInputModule(inLayer)
    net.addModule(hiddenLayer1)
    net.addModule(hiddenLayer2)
    net.addOutputModule(outLayer)
    #net.addModule(bias)
    in_to_hidden = FullConnection(inLayer, hiddenLayer1)
    hidden_to_out = FullConnection(hiddenLayer2, outLayer)
    hidden_to_hidden = FullConnection(hiddenLayer1, hiddenLayer2)
    net.addConnection(in_to_hidden)
    net.addConnection(hidden_to_hidden)
    net.addConnection(hidden_to_out)
    net.sortModules()

    #fnn = buildNetwork( alldata.indim, 100, alldata.outdim, outclass=SoftmaxLayer )
    trainer = BackpropTrainer( net, dataset=alldata, momentum=0.1, verbose=True, weightdecay=0.01)
    for i in range(0,20):
        print i
        trainer.trainEpochs( 1 )     #将数据训练一次
        print "train finish...."
        outtrain = net.activateOnDataset(alldata)   
        outtrain = outtrain.argmax(axis=1)  # the highest output activation gives the class,每个样本取最大概率的类 out=[[1],[2],[3],[2]...]
        outtest = net.activateOnDataset(testdata)   
        outtest = outtest.argmax(axis=1)  # the highest output activation gives the class,每个样本取最大概率的类 out=[[1],[2],[3],[2]...]
        trnresult = percentError( outtrain,alldata['class'] )
        tstresult = percentError( outtest,testdata['class'] )
        #trnresult = percentError( trainer.testOnClassData(dataset=alldata),alldata['class'] )
        #tstresult = percentError( trainer.testOnClassData(dataset=testdata),testdata['class'] )
        print "epoch: %4d" % trainer.totalepochs,"  train error: %5.2f%%" % trnresult,"  test error: %5.2f%%" % tstresult

    return net
 def predict(self,place,timestamp):
     sample=self.__prepare_features(place,timestamp)
     griddata = ClassificationDataSet(2,1, nb_classes=self.num_of_places)
     griddata.addSample(sample, [0])
     griddata._convertToOneOfMany()  
     out = self.fnn.activateOnDataset(griddata)
     index=out.argmax(axis=1)
     result=None
     for key in self.places_indexes:
         if self.places_indexes[key]==index:
             result=key
     return result
target = (y == 1) * 1
# target = y + 1
# target = y

for i in xrange(N_train):
    if y[i] != 0:
        train_data.addSample(X_new[i, ], [target[i]])

for i in xrange(N_train + 1, N_test_end):
    if y[i] != 0:
        test_data.addSample(X_new[i, ], [target[i]])

for i in xrange(X_new.shape[0]):
    all_data.addSample(X_new[i, ], [target[i]])

train_data._convertToOneOfMany()
test_data._convertToOneOfMany()
all_data._convertToOneOfMany()

print("building")
fnn = buildNetwork(train_data.indim,
                   6,
                   train_data.outdim,
                   fast=True,
                   outclass=SoftmaxLayer)
trainer = BackpropTrainer(fnn,
                          dataset=train_data,
                          momentum=0.2,
                          verbose=True,
                          learningrate=0.05,
                          lrdecay=1.0)
Exemple #32
0
                                  class_labels=['jovem', 'adulto', 'idoso'])
for n in range(test_data_temp.getLength()):
    test_data.addSample(
        test_data_temp.getSample(n)[0],
        test_data_temp.getSample(n)[1])

val_data = ClassificationDataSet(2,
                                 1,
                                 nb_classes=3,
                                 class_labels=['jovem', 'adulto', 'idoso'])
for n in range(val_data_temp.getLength()):
    val_data.addSample(
        val_data_temp.getSample(n)[0],
        val_data_temp.getSample(n)[1])

train_data._convertToOneOfMany(bounds=[0, 1])
test_data._convertToOneOfMany(bounds=[0, 1])
val_data._convertToOneOfMany(bounds=[0, 1])

from pybrain.structure.modules import SoftmaxLayer
from pybrain.utilities import percentError

net = buildNetwork(train_data.indim,
                   5,
                   train_data.outdim,
                   outclass=SoftmaxLayer)


def show_weights(net):
    for mod in net.modules:
        for conn in net.connections[mod]:
Exemple #33
0
        p[2] = 0
    if all(board == r[0]):
        p[3] = 0
#    p /= p.sum()
#    return random.choice(arange(4), p = p)
    return p.argmax()

if __name__ == '__main__':
    tr_x = load('rec_board.npy')
    tr_y = load('rec_move.npy')

    tr_x = con1(tr_x.T)
    
    print tr_x.shape
    print tr_y.shape

    data = ClassificationDataSet(tr_x.shape[1], 1, nb_classes = 4)
    for ind, ele in enumerate(tr_x):
        data.addSample(ele, tr_y[ind])
    data._convertToOneOfMany()
    print data.outdim

    fnn = buildNetwork(data.indim, 10, 10, data.outdim, hiddenclass=SigmoidLayer, outclass=SoftmaxLayer )
    trainer = BackpropTrainer( fnn, dataset=data)#, momentum=0.1, verbose=True, weightdecay=0.01)   
    for i in xrange(3):
        print trainer.train()
    #trainer.trainUntilConvergence()

    game = _2048(length = 4)
    game.mul_test(100, lambda a, b, c, d, e: softmax_dec(a, b, c, d, e, f = fnn.activate), addition_arg = True)
                              verbose=True,
                              weightdecay=0.01)

    trainer.trainUntilConvergence()
    #trainer.trainEpochs(5)

    print "trained"
    #trainer.trainEpochs(5)

    # Return a functor that wraps calling predict
    return NeuralNetworkClassifier(trainer)


if __name__ == "__main__":
    # First obtain our training and testing data
    # Training has 50K samples, Testing 100K
    Xt, Yt, Xv = load_validation_data()

    # Run Neural Network over training data
    classifier = classify(Xt, Yt)

    # Prepare validation data and predict
    tstdata = ClassificationDataSet(Xv.shape[1], 1, nb_classes=2)
    tstdata.setField('input', Xv)
    tstdata._convertToOneOfMany()  # one output neuron per class

    predictions = classifier.predict(tstdata)

    # Write prediction to file
    write_test_prediction("out_nn.txt", np.array(majority))
# [set Data]

#CSV_TRAIN = "dataset/train_na2zero.csv"
#CSV_TEST = "dataset/test_na2zero.csv"
CSV_TRAIN = "dataset/train_zero_60x60.csv"
CSV_TEST = "dataset/test_zero_60x60.csv"

df_train = pd.read_csv(CSV_TRAIN)
Y = df_train.y
Y = Y -1 # in order to make target in the range of [0, 1, 2, 3, ...., 11]
X = df_train.iloc[:, 1:].values

alldata = ClassificationDataSet(inp=X.shape[1], target=1, nb_classes=12)
for i in range(X.shape[0]):
    alldata.addSample(X[i, :], [Y[i]])
alldata._convertToOneOfMany()

df_test = pd.read_csv(CSV_TEST)
test_X = df_test.iloc[:, 1:].values

print "Number of training patterns: ", len(alldata)
print "Input and output dimensions: ", alldata.indim, alldata.outdim
print "First sample (input, target, class):"
print alldata['input'][0], alldata['target'][0], alldata['class'][0]

#############################################################################
# fnn
n = buildNetwork(alldata.indim, 1000, 1000, 1000, alldata.outdim, outclass=SoftmaxLayer, bias=True)
print("\n[ Network Structure]\n",n)

#############################################################################
for i in range(len(Y)):
    y = 0
    if Y['好瓜_是'][i] == 1:
        y = 1
        ds.appendLinked(X.ix[i], y)
ds.calculateStatistics()  # 返回一个类直方图?搞不懂在做什么

# Step 4: 分开测试集和训练集
testdata = ClassificationDataSet(19, 1, nb_classes=2, class_labels=labels)
testdata_temp, traindata_temp = ds.splitWithProportion(0.25)
for n in range(testdata_temp.getLength()):
    testdata.appendLinked(
        testdata_temp.getSample(n)[0],
        testdata_temp.getSample(n)[1])
print(testdata)
testdata._convertToOneOfMany()
print(testdata)
traindata = ClassificationDataSet(19, 1, nb_classes=2, class_labels=labels)
for n in range(traindata_temp.getLength()):
    traindata.appendLinked(
        traindata_temp.getSample(n)[0],
        traindata_temp.getSample(n)[1])
traindata._convertToOneOfMany()
'''
# 使用sklean的OneHotEncoder
# 缺点是只能单列进行操作,最后再复合,麻烦
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
a = LabelEncoder().fit_transform(df[df.columns[0]])
# dataset_One = OneHotEncoder.fit(df.values[])
# print(df['色泽']) # 单独的Series?
Exemple #37
0
def exec_algo(xml_file, output_location):
                
        rootObj=ml.parse(xml_file)

        #Getting the root element so that we get the subclasses and its members and member function

        file=open(rootObj.MachineLearning.classification.datafile)

        var_inp=rootObj.MachineLearning.classification.input
        var_out=rootObj.MachineLearning.classification.output
        classes=rootObj.MachineLearning.classification.classes

        DS=ClassificationDataSet(var_inp,var_out,nb_classes=classes)

        for line in file.readlines():
                data=[float(x) for x in line.strip().split(',') if x != '']
                inp=tuple(data[:var_inp])
                output=tuple(data[var_inp:])
                DS.addSample(inp,output)

        split=rootObj.MachineLearning.classification.split
        tstdata,trndata=DS.splitWithProportion(split)
        trdata=ClassificationDataSet(trndata.indim,var_out,nb_classes=classes)
        tsdata=ClassificationDataSet(tstdata.indim,var_out,nb_classes=classes)

        for i in xrange(trndata.getLength()):
            trdata.addSample(trndata.getSample(i)[0],trndata.getSample(i)[1])

        for i in xrange(tstdata.getLength()):
            tsdata.addSample(tstdata.getSample(i)[0],tstdata.getSample(i)[1])


        trdata._convertToOneOfMany()
        tsdata._convertToOneOfMany()

        hiddenNeurons=rootObj.MachineLearning.classification.algorithm.RadialBasisFunctionNetwork.hiddenNeurons
        fnn=FeedForwardNetwork()
        inputLayer=LinearLayer(trdata.indim)
        hiddenLayer=GaussianLayer(hiddenNeurons)
        outputLayer=LinearLayer(trdata.outdim)

        fnn.addInputModule(inputLayer)
        fnn.addModule(hiddenLayer)
        fnn.addOutputModule(outputLayer)

        in_to_hidden=FullConnection(inputLayer,hiddenLayer)
        hidden_to_outputLayer=FullConnection(hiddenLayer,outputLayer)

        fnn.addConnection(in_to_hidden)
        fnn.addConnection(hidden_to_outputLayer)

        fnn.sortModules()
        learningrate=rootObj.MachineLearning.classification.algorithm.RadialBasisFunctionNetwork.learningRate
        momentum=rootObj.MachineLearning.classification.algorithm.RadialBasisFunctionNetwork.momentum
        epochs=rootObj.MachineLearning.classification.algorithm.RadialBasisFunctionNetwork.epochs
        trainer=BackpropTrainer(fnn,dataset=trdata, verbose=True, learningrate=learningrate, momentum=momentum)
        trainer.trainEpochs(epochs=epochs)
        #trainer.train()
        #trainer.trainUntilConvergence(dataset=trdata, maxEpochs=500, verbose=True, continueEpochs=10, validationProportion=0.25)

        trresult=percentError(trainer.testOnClassData(),trdata['class'])

        #testingResult=percentError(trainer.testOnClassData(dataset=tsdata),tsdata['class'])

        #print "Training accuracy : %f , Testing Accuracy: %f" % (100-trresult,100-testingResult)

        print "Training accuracy : %f " % (100-trresult)
        ts=time.time()
        directory = output_location + sep + str(int(ts)) ;
        makedirs(directory)
        fileObject=open(output_location + sep + str(int(ts)) + sep + 'pybrain_RBF','w')
        pickle.dump(trainer,fileObject)
        pickle.dump(fnn,fileObject)
        fileObject.close()
Exemple #38
0
    def model_net(self, fields, datas=None):
        # 对需要处理的数据进行归一化处理,防止大数吃掉小数
        # https://www.jianshu.com/p/682c24aef525 用python做数据分析4|pandas库介绍之DataFrame基本操作
        # 归一 https://www.zhihu.com/question/57509028
        # 标准化和归一化什么区别? https://www.zhihu.com/question/20467170
        # sklearn库中数据预处理函数fit_transform()和transform()的区别 http://blog.csdn.net/quiet_girl/article/details/72517053
        # 需具体了解其实现方式
        from sklearn.preprocessing import MinMaxScaler
        from pybrain.structure import SoftmaxLayer
        from pybrain.datasets import ClassificationDataSet
        from pybrain.tools.shortcuts import buildNetwork
        from pybrain.supervised.trainers import BackpropTrainer
        from pybrain.utilities import percentError
        from pybrain.structure import TanhLayer

        scaler = MinMaxScaler()
        datas[fields] = scaler.fit_transform(datas[fields])

        tran_data = datas[fields].values
        tran_target = datas['Flag'].values
        tran_label = ['Sell', 'Hold', 'Buy']

        class_datas = ClassificationDataSet(6,
                                            1,
                                            nb_classes=3,
                                            class_labels=tran_label)
        print(type(tran_target))
        print(tran_target)
        for i in range(len(tran_data)):
            class_datas.appendLinked(tran_data[i], tran_target[i])

        tstdata_temp, trndata_temp = class_datas.splitWithProportion(0.25)

        print(len(tstdata_temp), len(trndata_temp))

        tstdata = ClassificationDataSet(6,
                                        1,
                                        nb_classes=3,
                                        class_labels=tran_label)
        trndata = ClassificationDataSet(6,
                                        1,
                                        nb_classes=3,
                                        class_labels=tran_label)

        for n in range(0, trndata_temp.getLength()):
            trndata.appendLinked(
                trndata_temp.getSample(n)[0],
                trndata_temp.getSample(n)[1])

        for n in range(0, tstdata_temp.getLength()):
            tstdata.appendLinked(
                tstdata_temp.getSample(n)[0],
                tstdata_temp.getSample(n)[1])

        tstdata._convertToOneOfMany()
        trndata._convertToOneOfMany()

        tnet = buildNetwork(trndata.indim,
                            5,
                            trndata.outdim,
                            hiddenclass=TanhLayer,
                            outclass=SoftmaxLayer)
        trainer = BackpropTrainer(tnet,
                                  dataset=trndata,
                                  batchlearning=True,
                                  momentum=0.1,
                                  verbose=True,
                                  weightdecay=0.01)

        for i in range(5000):
            trainer.trainEpochs(20)
            trnresult = percentError(trainer.testOnClassData(),
                                     trndata['class'])
            testResult = percentError(trainer.testOnClassData(dataset=tstdata),
                                      tstdata['class'])
            print("epoch: %4d" % trainer.totalepochs, \
                  "  train error: %5.2f%%" % trnresult, \
                  "  test error: %5.2f%%" % testResult)

        return trainer, class_datas
Exemple #39
0
all_data.setField('input', raw_inputs)
all_data.setField('target', raw_target)
all_data.setField('class', raw_target)

test_data_temp, training_data_temp = all_data.splitWithProportion(0.33)

test_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign','Malignant'])
for n in xrange(0, test_data_temp.getLength()):
    test_data.addSample(test_data_temp.getSample(n)[0], test_data_temp.getSample(n)[1])

training_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign','Malignant'])
for n in xrange(0, training_data_temp.getLength()):
    training_data.addSample(training_data_temp.getSample(n)[0], training_data_temp.getSample(n)[1])

training_data._convertToOneOfMany()
test_data._convertToOneOfMany()

#********************End of Data Preparation***************************

#********************NN With BackPropagation***************************
fnn_backprop = buildNetwork(training_data.indim, 2, training_data.outdim, bias=True, outclass=SoftmaxLayer)

trainer = BackpropTrainer(fnn_backprop, dataset=training_data, momentum=0.1, verbose=True, weightdecay=0.01)

epochs = 10
epoch_v = []
trnerr_backprop = []
tsterr_backprop = []
for i in xrange(epochs):
    # If you set the 'verbose' trainer flag, this will print the total error as it goes.
def get_training_object(train_list, feature, duration, delta_bool, delta2_bool,
                        base_path):
    syl_list = []
    for t in train_list:
        syl_obj = Utility.load_obj(t)
        syl_list += syl_obj.syllables_list
    syllable_management_object = SyllableDatabaseManagement(
        syllable_list=syl_list)
    Y, names, tone, stress, syllable_short_long_type, syllalbe_position, phoneme, syllable_type = syllable_management_object.get_GP_LVM_training_data(
        feature_key=feature,
        dur_position=duration,
        # dur_position=[],
        delta_bool=delta_bool,
        delta2_bool=delta2_bool,
        num_sampling=50,
        get_only_stress='1')

    tone = np.array(tone)

    Y = np.array(Y)
    for r in range(len(Y[0])):
        Y[:, r] = preprocessing.normalize(Y[:, r])

    arr = np.arange(len(Y))
    np.random.shuffle(arr)

    label_feature = tone
    alldata = ClassificationDataSet(len(Y[0]),
                                    1,
                                    nb_classes=len(set(label_feature)))
    for a in arr:
        alldata.addSample(Y[a], label_feature[a])

    alldata._convertToOneOfMany()

    if Utility.is_file_exist('{}/GP_model.npy'.format(base_path)):

        model = Utility.load_obj('{}/GP_model.npy'.format(base_path))
        input_sensitivity = model.input_sensitivity()

        latent_data = np.array(
            Utility.load_obj('{}/GP_model.npy'.format(base_path)).X.mean)
        name_index = np.array(
            Utility.load_obj('{}/name_index.npy'.format(base_path)))

        latent_Y = []
        for n in names:
            ind = np.where(name_index == n)
            # print latent_data[ind][0].shape
            latent_Y.append(latent_data[ind][0])

        latent_Y = np.array(latent_Y)
        print latent_Y.shape

        for r in range(len(latent_Y[0])):
            # latent_Y[:,r] = preprocessing.normalize(latent_Y[:,r])
            latent_Y[:, r] = preprocessing.normalize(latent_Y[:, r] *
                                                     input_sensitivity[r])

        lat_data = ClassificationDataSet(len(latent_Y[0]),
                                         1,
                                         nb_classes=len(set(label_feature)))
        for a in arr:
            # print latent_Y[a], a
            lat_data.addSample(latent_Y[a], label_feature[a])

    else:
        lat_data = ClassificationDataSet(len(latent_Y[0]),
                                         1,
                                         nb_classes=len(set(label_feature)))

    lat_data._convertToOneOfMany()
    return (alldata, lat_data)
Exemple #41
0
def hillclimb(domain,costf):
    # Create a random solution
    sol=[random.randint(domain[i][0],domain[i][1]) for i in range(len(domain))]
    
    # Main loop
    while 1:
        # Create list of neighboring solutions
        neighbors=[]
        for j in range(len(domain)):
            # One away in each direction
            if sol[j]>domain[j][0]:
            neighbors.append(sol[0:j]+[sol[j]+1]+sol[j+1:])
            if sol[j]<domain[j][1]:
            neighbors.append(sol[0:j]+[sol[j]-1]+sol[j+1:])

        # See what the best solution amongst the neighbors is
        current=costf(sol)
        best=current
        for j in range(len(neighbors)):
            cost=costf(neighbors[j])
            if cost<best:
                best=cost
                sol=neighbors[j]
            # If there's no improvement, then we've reached the top
            if best==current:
                break
    return sol

def plot_learning_curve(x, training_erorr, test_error, graph_title, graph_xlabel, graph_ylabel, ylim=None, xlim=None):
    
    plt.figure()
    plt.title(graph_title)
    if ylim is not None:
        plt.ylim(*ylim)
    if xlim is not None:
        plt.xlim(*xlim)
    plt.xlabel(graph_xlabel)
    plt.ylabel(graph_ylabel)

    train_error_mean = np.mean(training_erorr)
    train_error_std = np.std(training_erorr)
    test_error_mean = np.mean(test_error)
    test_error_std = np.std(test_error)

    plt.grid()

    plt.fill_between(x, training_erorr - train_error_std,
                     training_erorr + train_error_std, alpha=0.1,
                     color="r")
    plt.fill_between(x, test_error - test_error_std,
                     test_error + test_error_std, alpha=0.1, color="g")
    print x
    print train_error_mean
    print training_erorr
    plt.plot(x, training_erorr, 'o-', color="r", label="Training score")
    plt.plot(x, test_error, 'o-', color="g", label="Test Score")

    plt.legend(loc="best")
    plt.savefig('plots/'+graph_title+'.png')
    plt.close()
    #plt.show()
#************************End of Functions**************************************************

#************************Start Data Prep********************************************
raw_data = np.genfromtxt('BreastCancerWisconsinDataset_modified.txt', delimiter=",", skip_header=1)
raw_inputs = raw_data[:,0:-1]
raw_target = raw_data[:,9:]

assert (raw_inputs.shape[0] == raw_target.shape[0]),"Inputs count and target count do not match"

all_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign','Malignant'])

all_data.setField('input', raw_inputs)
all_data.setField('target', raw_target)
all_data.setField('class', raw_target)

test_data_temp, training_data_temp = all_data.splitWithProportion(0.33)

test_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign','Malignant'])
for n in xrange(0, test_data_temp.getLength()):
    test_data.addSample(test_data_temp.getSample(n)[0], test_data_temp.getSample(n)[1])

training_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign','Malignant'])
for n in xrange(0, training_data_temp.getLength()):
    training_data.addSample(training_data_temp.getSample(n)[0], training_data_temp.getSample(n)[1])

training_data._convertToOneOfMany()
test_data._convertToOneOfMany()

#********************End of Data Preparation***************************

#********************NN With GA***************************
def fitFunction (net, dataset=training_data, targetClass=training_data['class']):
    error = percentError(testOnClassData_custom(net, dataset=training_data), targetClass)
    return error

stepSize = [.05, .5, 1]
for s in stepSize:
    fnn_ga = buildNetwork(training_data.indim, 2, training_data.outdim, bias=True, outclass=SoftmaxLayer)

    domain = [(-1,1)]*len(fnn_ga.params)
    #print domain
    epochs = 20
    epoch_v = []
    trnerr_ga = []
    tsterr_ga = []
    iteration = 5
    for i in xrange(epochs):
        winner = geneticoptimize(iteration,domain,fnn_ga,fitFunction,popsize=100,step=s, mutprob=0.2,elite=0.2)
        fnn_ga.params[:] = winner[:]
        training_error = fitFunction(fnn_ga, dataset=training_data, targetClass=training_data['class'])
        test_error = fitFunction(fnn_ga, dataset=test_data, targetClass=test_data['class'])
        epoch_v.append(i*iteration)
        trnerr_ga.append(training_error)
        tsterr_ga.append(test_error)
        print ("This is the training and test error at the epoch: ", training_error, test_error, i*iteration)


    ylim = (0, 70)
    xlim = (50, 1005)
    print ("This is epoch_value",epoch_v)
    print ("This is training ga",trnerr_ga)
    print ("This is test ga",tsterr_ga)
    plot_learning_curve(epoch_v, trnerr_ga, tsterr_ga, "Neural Network With GA_step_"+str(s), "Epochs", "Error %", ylim, xlim=None)

#*****************End of GA NN*******************************

print ("This is the length of the training and test data, respectively", len(training_data), len(test_data))
print (training_data.indim, training_data.outdim)
print ("This is the shape of the input", all_data['input'].shape)
print ("This is the shape of the target", all_data['target'].shape)
print ("This is the shape of the class", all_data['class'].shape)
print ("This is count of classes", all_data.nClasses)
print ("Here is the statistics on the class", all_data.calculateStatistics())
print ("Here the linked fields", all_data.link)
print ("This is the shape of the input in training", training_data['input'].shape)
print ("This is the shape of the target in training", training_data['target'].shape)
print ("This is the shape of the class in training", training_data['class'].shape)
print ("This is the shape of the input in training", test_data['input'].shape)
print ("This is the shape of the target in training", test_data['target'].shape)
print ("This is the shape of the class in training", test_data['class'].shape)
                   10,
                   trndata.outdim,
                   hiddenclass=TanhLayer,
                   outclass=SoftmaxLayer)

trainer = BackpropTrainer(fnn,
                          dataset=trndata,
                          momentum=0.05,
                          verbose=True,
                          weightdecay=0.01)

predictdata = ClassificationDataSet(5400, 1, nb_classes=29)
for i in range(0, len(norm_test_X)):
    predictdata.addSample(norm_test_X[i], [0])

predictdata._convertToOneOfMany(
)  # this is still needed to make the fnn feel comfy

for i in range(2000):
    trainer.trainEpochs(1)
    trnresult = percentError(trainer.testOnClassData(), trndata['class'])
    tstresult = percentError(trainer.testOnClassData(dataset=tstdata),
                             tstdata['class'])

    print "epoch: %4d" % trainer.totalepochs, \
          "  train error: %5.2f%%" % trnresult, \
          "  test error: %5.2f%%" % tstresult

    out = fnn.activateOnDataset(predictdata)
    out = out.argmax(axis=1)  # the highest output activation gives the class
    result = [labels[e] for e in out]
print result
def NNBackPropCustom(trainInputs,
                     trainTarget,
                     testInputs,
                     testTarget,
                     inputDim,
                     targetDim,
                     numClass,
                     classLabels,
                     bias=True,
                     numHiddenLayers=2,
                     numEpoch=10,
                     momentum=0.1,
                     weightdecay=0.01):
    #NN Data Preparation
    assert (
        trainInputs.shape[0] == trainTarget.shape[0]
    ), "Inputs count and target count for your training data do not match for NN Analysis"
    assert (
        testInputs.shape[0] == testTarget.shape[0]
    ), "Inputs count and target count for your test data do not match for NN Analysis"

    training_data = ClassificationDataSet(inputDim,
                                          targetDim,
                                          nb_classes=numClass,
                                          class_labels=classLabels)
    test_data = ClassificationDataSet(inputDim,
                                      targetDim,
                                      nb_classes=numClass,
                                      class_labels=classLabels)

    training_data.setField('input', trainInputs)
    training_data.setField('target', trainTarget)
    training_data.setField('class', trainTarget)

    test_data.setField('input', testInputs)
    test_data.setField('target', testTarget)
    test_data.setField('class', testTarget)

    training_data._convertToOneOfMany()
    test_data._convertToOneOfMany()

    # NN With BackPropagation
    fnn_backprop = buildNetwork(training_data.indim,
                                numHiddenLayers,
                                training_data.outdim,
                                bias=bias,
                                outclass=SoftmaxLayer)

    trainer = BackpropTrainer(fnn_backprop,
                              dataset=training_data,
                              momentum=momentum,
                              verbose=True,
                              weightdecay=weightdecay)

    epochs = numEpoch
    epoch_v = []
    trnerr_backprop = []
    tsterr_backprop = []
    for i in xrange(epochs):
        # If you set the 'verbose' trainer flag, this will print the total error as it goes.
        trainer.trainEpochs(1)

        trnresult = percentError(trainer.testOnClassData(),
                                 training_data['class'])
        tstresult = percentError(trainer.testOnClassData(dataset=test_data),
                                 test_data['class'])
        print("epoch: %4d" % trainer.totalepochs,
              " train error: %5.2f%%" % trnresult,
              " test error: %5.2f%%" % tstresult)
        epoch_v.append(trainer.totalepochs)
        trnerr_backprop.append(trnresult)
        tsterr_backprop.append(tstresult)

    return epoch_v, trnerr_backprop, tsterr_backprop
Exemple #44
0
def main():

	in_data=np.genfromtxt('logit-train.csv', delimiter = ',')
	out_data = np.genfromtxt('logit-test.csv', delimiter = ',')

	#getting in the data from csv files and making it suitable for further action.
	in_data=in_data[~np.isnan(in_data).any(1)]
	t=len(in_data[0,:])
	y_train=np.array(in_data[0:,t-1])
	x_train=np.array(in_data[0:,:t-1])

	scaler = preprocessing.StandardScaler().fit(x_train) #standardization plays an important role in all NN algos

	x_train=scaler.transform(x_train) #final x_train

	out_data=out_data[~np.isnan(out_data).any(1)]
	t=len(out_data[0,:])
	y_test=np.array(out_data[0:,t-1])
	x_test=np.array(out_data[0:,:t-1])

	x_test=scaler.transform(x_test) # final x_test

	alltraindata=ClassificationDataSet(t-1,1,nb_classes=2)
	for count in range(len((in_data))):
		alltraindata.addSample(x_train[count],[y_train[count]])

	alltraindata._convertToOneOfMany(bounds=[0,1])

	alltestdata=ClassificationDataSet(t-1,1,nb_classes=2)
	for count in range(len((out_data))):
		alltestdata.addSample(x_test[count],[y_test[count]])

	alltestdata._convertToOneOfMany(bounds=[0,1])
	
	numRBFCenters = 10 #the 'h' value
	
	rbf=RBFNN(alltraindata.indim, alltraindata.outdim, numRBFCenters)

	rbf.train(alltraindata['input'],alltraindata['target'])
	
	testdata_target=rbf.test(alltestdata['input']) #values obtained after testing, T is a 'n x outdim' matrix
	testdata_target = testdata_target.argmax(axis=1)  # the highest output activation gives the class. Selects the class predicted
  	#testdata_target = testdata_target.reshape(len(in_data),1)	

	#compare to y_test to obtain the accuracy.
	
	# count=0
	# for x in range(len(y_test)):
	# 	if testdata_target[x] == y_test[x]:
	# 		count+=1
	# tstresult2=float(count)/float(len(y_test)) * 100

   	tstresult = percentError(testdata_target,alltestdata['class'])
   	
	print "Accuracy on test data is: %5.2f%%," % (100-tstresult)

	for x in range(len(y_test)):
		if any(y_test[x]) == True:
			y_test[x] = 1
		else:
			y_test[x] = 0

	average_label = ['micro','macro','weighted']
	for label in average_label: 
		f1 = f1_score(y_test, testdata_target, average=label)
		print "f1 score (%s)" %label, "is ", f1
def main():

    in_data = np.genfromtxt('logit-train.csv', delimiter=',')
    out_data = np.genfromtxt('logit-test.csv', delimiter=',')

    #getting in the data from csv files and making it suitable for further action.
    in_data = in_data[~np.isnan(in_data).any(1)]
    t = len(in_data[0, :])
    y_train = np.array(in_data[0:, t - 1])
    x_train = np.array(in_data[0:, :t - 1])

    scaler = preprocessing.StandardScaler().fit(
        x_train)  #standardization plays an important role in all NN algos

    x_train = scaler.transform(x_train)  #final x_train

    out_data = out_data[~np.isnan(out_data).any(1)]
    t = len(out_data[0, :])
    y_test = np.array(out_data[0:, t - 1])
    x_test = np.array(out_data[0:, :t - 1])

    x_test = scaler.transform(x_test)  # final x_test

    alltraindata = ClassificationDataSet(t - 1, 1, nb_classes=2)
    for count in range(len((in_data))):
        alltraindata.addSample(x_train[count], [y_train[count]])

    alltraindata._convertToOneOfMany(bounds=[0, 1])

    alltestdata = ClassificationDataSet(t - 1, 1, nb_classes=2)
    for count in range(len((out_data))):
        alltestdata.addSample(x_test[count], [y_test[count]])

    alltestdata._convertToOneOfMany(bounds=[0, 1])

    numRBFCenters = 50

    kmeans = KMeans(n_clusters=numRBFCenters
                    )  # KMeans to find the centroids for the RBF neurons.
    kmeans.fit(alltraindata['input'])
    centers = kmeans.cluster_centers_
    #centers.shape = (numRBFCenters,13)
    cluster_distance = kmeans.transform(alltraindata['input'])
    #cluster_distance.shape = (152,10) and kmeans.labels_.shape = (152,)

    #cluster_distance.shape = (152,50)

    # Calculating the sigma/smoothness parameter of each Radial Basis Function
    # It is the variance/standard deviation of the points of each cluster, thus giving a value for each RBFcenter
    distance_std = []
    distance_within_cluster = []
    for lab in range(numRBFCenters):
        for x, label in enumerate(kmeans.labels_):
            if label == lab:
                distance_within_cluster.append(cluster_distance[x][label])
        distance_std.append(np.std(distance_within_cluster))

    rbf = RBFNN(
        alltraindata.indim, alltraindata.outdim, numRBFCenters, centers,
        distance_std)  # Passing the centers array for RBFNN initialization

    rbf.train(alltraindata['input'], alltraindata['target'])

    testdata_target = rbf.test(
        alltestdata['input']
    )  #values obtained after testing, T is a 'n x outdim' matrix
    testdata_target = testdata_target.argmax(
        axis=1
    )  # the highest output activation gives the class. Selects the class predicted

    traindata_target = rbf.test(alltraindata['input'])
    traindata_target = traindata_target.argmax(
        axis=1
    )  # the highest output activation gives the class. Selects the class predicted

    #compare to y_test to obtain the accuracy.

    # count=0
    # for x in range(len(y_test)):
    # 	if testdata_target[x] == y_test[x]:
    # 		count+=1
    # tstresult2=float(count)/float(len(y_test)) * 100

    trnresult = percentError(traindata_target, alltraindata['class'])
    tstresult = percentError(testdata_target, alltestdata['class'])

    print "Accuracy on train data is: %5.2f%%," % (100 - trnresult)
    print "Accuracy on test data is: %5.2f%%," % (100 - tstresult)

    for x in range(len(y_test)):
        if any(y_test[x]) == True:
            y_test[x] = 1
        else:
            y_test[x] = 0

    average_label = ['micro', 'macro', 'weighted']
    for label in average_label:
        f1 = f1_score(y_test, testdata_target, average=label)
        print "f1 score (%s)" % label, "is ", f1
Exemple #46
0
def main():
    """
    CLI Arguments allowed:
        --display_graphs       Displays graphs
        --retrain              Trains a new model
        --cross-validate       Runs cross validation to fine tune the model
        --test=validation_set  Tests the latest trained model against the validation set
        --test=test_set        Tests the latets trained model against the test set
    """

    global trainer, classifier
    inputs_train, targets_train, inputs_valid, targets_valid, inputs_test, targets_test = load_parsed_data()

    if '--display_graphs' in sys.argv:
        display_graphs = True

    print('using {} percent of all data in corpus'.format(PERCENTAGE_DATA_SET_TO_USE*100))
    print('using {} most common words as features'.format(NUM_FEATURES))

    if not trained_model_exists() or '--retrain' in sys.argv:
        train_features, valid_features, test_features = extract_features(
            inputs_train[:len(inputs_train)*PERCENTAGE_DATA_SET_TO_USE],
            targets_train[:len(targets_train)*PERCENTAGE_DATA_SET_TO_USE],
            inputs_valid[:len(inputs_valid)*PERCENTAGE_DATA_SET_TO_USE],
            targets_valid[:len(targets_valid)*PERCENTAGE_DATA_SET_TO_USE],
            inputs_test[:len(inputs_test)*PERCENTAGE_DATA_SET_TO_USE],
            targets_test[:len(targets_test)*PERCENTAGE_DATA_SET_TO_USE]
        )

        save_features(train_features, valid_features, test_features)
        pca = RandomizedPCA(n_components=N_COMPONENTS, whiten=False).fit(train_features)
        save_pca(pca)
        print ("Saved PCA")

        X_train = pca.transform(train_features)
        X_valid = pca.transform(valid_features)
        pca = None
        print ("Created PCAd features")

        valid_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2)
        for i in range(len(X_valid)):
            valid_data.addSample(X_valid[i], targets_test[i])
        valid_data._convertToOneOfMany()
        X_valid = None

        train_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2)
        for i in range(len(X_train)):
            train_data.addSample( X_train[i], targets_train[i])
        train_data._convertToOneOfMany()
        X_train = None

        classifier = buildNetwork( train_data.indim, N_HIDDEN, train_data.outdim, outclass=SoftmaxLayer)
        trainer = BackpropTrainer( classifier, dataset=train_data, momentum=0.1, learningrate=0.01 , verbose=True)
        train_model(train_data, valid_data)

        save_model(classifier)
        train_data = None
        valid_data = None

    else:
        train_features, valid_features, test_features = load_features()
        pca = load_pca()
        X_train = pca.transform(train_features)

        pca = None
        print ("Created PCAd features")

        train_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2)
        for i in range(len(X_train)):
            train_data.addSample( X_train[i], targets_train[i])
        train_data._convertToOneOfMany()
        X_train = None

        classifier = load_trained_model()
        trainer = BackpropTrainer( classifier, dataset=train_data, momentum=0.1, learningrate=0.01 , verbose=True)


    if '--test=validation_set' in sys.argv:
        print ("Running against validation set")
        pca = load_pca()
        X_valid = pca.transform(valid_features)
        pca = None
        valid_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2)
        for i in range(len(X_valid)):
            valid_data.addSample( X_valid[i], targets_test[i])
        valid_data._convertToOneOfMany()
        X_valid = None

        make_prediction(valid_data)


    if '--test=test_set' in sys.argv:
        print ("Running against test set")
        pca = load_pca()
        X_test = pca.transform(test_features)
        pca = None
        test_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2)
        for i in range(len(X_test)):
            test_data.addSample( X_test[i], targets_test[i])
        test_data._convertToOneOfMany()
        y_pred = trainer.testOnClassData(dataset=test_data)
        plot_precision_and_recall(y_pred, targets_test[:len(targets_test) * PERCENTAGE_DATA_SET_TO_USE])
        X_test = None

        make_prediction(test_data)
Exemple #47
0
class Brain():
    """
    Constructor.
    Input: hidden_nodes - number of hidden nodes used in the neuralnetwork
    """
    def __init__(self, hidden_nodes=30):
        """
        parameters to buildNetwork are inputs, hidden layers, output
        bias = true allows for a bias unit to be added in each neural net layer
        hiddenclass represents the method used by the hidden layer
        """
        # Regression

        # self.classifier_neural_net = buildNetwork(12, hidden_nodes, 1, bias=True, hiddenclass=TanhLayer)
        # # Initializing dataset for supervised regression training
        # self.data_sets = SupervisedDataSet(12, 1)
        # # classification_trainer uses backpropagation supervised training method for training the newural network
        # self.classification_trainer = BackpropTrainer(self.classifier_neural_net, self.data_sets)

        # Classification
        self.classifier_neural_net = buildNetwork(12,
                                                  hidden_nodes,
                                                  3,
                                                  outclass=SoftmaxLayer,
                                                  hiddenclass=TanhLayer)
        self.data_sets = ClassificationDataSet(12, 1, nb_classes=3)
        self.classification_trainer = BackpropTrainer(
            self.classifier_neural_net,
            self.data_sets,
            momentum=0.1,
            verbose=True,
            weightdecay=0.01)

    """
    Method to add a sample image to the datasets for training the classifier
    """

    def add_image_to_train(self, image_file, group_id):
        tto = io.twelve_tone(image_file)
        print(tto)
        # regression
        # self.data_sets.addSample(tto, (group_id,))

        # classification
        self.data_sets.addSample(tto, [group_id])

    def train(self):
        #classification_trainer.trainUntilConvergence()
        #this will take forever (possibly literally in the pathological case)

        # classification
        self.data_sets._convertToOneOfMany()

        # self.classification_trainer.trainEpochs(30)
        print("Converging...This is going to take long!")
        self.classification_trainer.trainUntilConvergence()

    def save(self, file_name="classifier.brain"):
        with open(get_path(file_name), 'wb') as file_pointer:
            pickle.dump(self.classifier_neural_net, file_pointer)

    def load(self, file_name="classifier.brain"):
        with open(get_path(file_name), 'rb') as file_pointer:
            self.classifier_neural_net = pickle.load(file_pointer)

    def accuracy(self):
        if len(self.data_sets) == 0:
            print "No data_sets found. Maybe you loaded the classifier from a file?"
            return

        # regression
        # tstresult = self.classifier_neural_net.activateOnDataset(self.data_sets)
        # print self.data_sets['target']
        # tstresult = mean_squared_error(self.data_sets['target'], tstresult)
        # print "epoch: %4d" % self.classification_trainer.totalepochs, \
        #       "trainer error: %5.2f%%" % tstresult, \
        #       "trainer accuracy: %5.2f%%" % (100-tstresult)

        # classification
        tstresult = percentError(
            self.classification_trainer.testOnClassData(
                dataset=self.data_sets), self.data_sets['class'])

        print "epoch: %4d" % self.classification_trainer.totalepochs, \
              "trainer error: %5.2f%%" % tstresult, \
              "trainer accuracy: %5.2f%%" % (100-tstresult)

    def classify(self, image_file):
        score = self.classifier_neural_net.activate(io.twelve_tone(image_file))
        print(score)

        # regression
        # score = round(score)

        # classification
        score = max(xrange(len(score)), key=score.__getitem__)

        print(score)
        if score == 0:
            return "chick-peas"
        elif score == 1:
            return "green-peas"
        else:
            return "rice"
def ANN(X_train, Y_train, X_test, Y_test, *args):
    """
    An Artificial Neural Network, based on the python library pybrain. In the future this function
    should be modified to use the SkyNet ANN code instead.
    
    INPUTS:
    X_train - An array containing the features of the training set, of size (N_samples, N_features)
    Y_train - An array containing the class labels of the training set, of size (N_samples,)
    X_test - An array containing the features of the testeing set, of size (N_samples, N_features)
    Y_test - An array containing the class labels of the testing set, of size (N_samples)
    *args - Currently unused. In the future could specify the network architecture and activation
                functions at each node.
    
    OUTPUTS:
    probs - an array containing the probabilities for each class for each member of the testing set,
                of size (N_samples, N_classes)
    """
    
    Y_train_copy = Y_train.copy()
    Y_test_copy = Y_test.copy()

    #Convert class labels from 1,2,3 to 0,1,2 as _convertToOneOfMany requires this
    Y_train_copy[(Y_train_copy==1)]=0
    Y_train_copy[(Y_train_copy==2)]=1
    Y_train_copy[(Y_train_copy==3)]=2

    Y_test_copy[(Y_test_copy==1)]=0
    Y_test_copy[(Y_test_copy==2)]=1
    Y_test_copy[(Y_test_copy==3)]=2
    
    #Put all the data in datasets as required by pybrain
    Y_train_copy = np.expand_dims(Y_train_copy, axis=1)
    Y_test_copy = np.expand_dims(Y_test_copy, axis=1)
    traindata = ClassificationDataSet(X_train.shape[1], nb_classes = len(np.unique(Y_train_copy))) #Preallocate dataset
    traindata.setField('input', X_train) #Add named fields
    traindata.setField('target', Y_train_copy) 
    traindata._convertToOneOfMany() #Convert classes 0, 1, 2 to 001, 010, 100

    testdata = ClassificationDataSet(X_test.shape[1], nb_classes=len(np.unique(Y_test_copy)))
    testdata.setField('input', X_test)
    testdata.setField('target', Y_test_copy)
    testdata._convertToOneOfMany()

    #Create ANN with n_features inputs, n_classes outputs and HL_size nodes in hidden layers
    N = pb.FeedForwardNetwork()
    HL_size1 = X_train.shape[1]*2+2
    HL_size2 = X_train.shape[1]*2+2
    
    #Create layers and connections
    in_layer = LinearLayer(X_train.shape[1])
    hidden_layer1 = SigmoidLayer(HL_size1)
    hidden_layer2 = SigmoidLayer(HL_size2)
    out_layer = SoftmaxLayer(len(np.unique(Y_test_copy))) #Normalizes output so as to sum to 1

    in_to_hidden1 = FullConnection(in_layer, hidden_layer1)
    hidden1_to_hidden2 = FullConnection(hidden_layer1, hidden_layer2)
    hidden2_to_out = FullConnection(hidden_layer2, out_layer)

    #Connect them up
    N.addInputModule(in_layer)
    N.addModule(hidden_layer1)
    N.addModule(hidden_layer2)
    N.addOutputModule(out_layer)
    N.addConnection(in_to_hidden1)
    N.addConnection(hidden1_to_hidden2)
    N.addConnection(hidden2_to_out)

    N.sortModules()

    #Create the backpropagation object
    trainer = BackpropTrainer(N, dataset=traindata,  momentum=0.1, verbose=False, weightdecay=0.01)

    #Train the network on the data for some number of epochs
    for counter in np.arange(40):
        trainer.train()

    #Run the network on testing data
    probs = N.activate(X_test[0, :])
    probs = np.expand_dims(probs, axis=0)

    for counter in np.arange(X_test.shape[0]-1):
        next_probs = N.activate(X_test[counter+1, :])
        next_probs = np.expand_dims(next_probs, axis=0)
        probs = np.append(probs, next_probs, axis=0)
    
    return probs
ds.calculateStatistics()

# split of training and testing dataset
tstdata_temp, trndata_temp = ds.splitWithProportion(0.5)
tstdata = ClassificationDataSet(30, 1, nb_classes=2)
for n in range(0, tstdata_temp.getLength()):
    tstdata.appendLinked(
        tstdata_temp.getSample(n)[0],
        tstdata_temp.getSample(n)[1])

trndata = ClassificationDataSet(30, 1, nb_classes=2)
for n in range(0, trndata_temp.getLength()):
    trndata.appendLinked(
        trndata_temp.getSample(n)[0],
        trndata_temp.getSample(n)[1])
trndata._convertToOneOfMany()
tstdata._convertToOneOfMany()

##### build net and training
from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure.modules import SoftmaxLayer
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.utilities import percentError

n_hidden = 500
bp_nn = buildNetwork(trndata.indim,
                     n_hidden,
                     trndata.outdim,
                     outclass=SoftmaxLayer)
trainer = BackpropTrainer(bp_nn,
                          dataset=trndata,
Exemple #50
0
features_train = features_pd.iloc[:train_count]
# print(features_train.describe())
features_test = features_pd.iloc[train_count:]
# print(features_test.describe())
x_train, x_test, y_train, y_test = train_test_split(features_train,
                                                    labels,
                                                    test_size=0.2,
                                                    random_state=1)
X = (x_train, x_test, y_train, y_test)
print(y_train)

dsTrain = ClassificationDataSet(18, 1, nb_classes=2)
rows = len(x_train)
for row in range(rows):
    dsTrain.addSample(tuple(x_train.iloc[row]), y_train.iloc[row])
dsTrain._convertToOneOfMany()

dsTest = ClassificationDataSet(18, 1, nb_classes=2)
rows = len(x_test)
for row in range(rows):
    dsTest.addSample(tuple(x_test.iloc[row]), y_test.iloc[row])
dsTest._convertToOneOfMany()

if True:
    fnn = buildNetwork(18, 20, 20, 2, outclass=SoftmaxLayer)
    trainer = BackpropTrainer(fnn,
                              dataset=dsTrain,
                              momentum=0.1,
                              verbose=True,
                              weightdecay=0.01)
Exemple #51
0
        trnresult = percentError(trainer.testOnClassData(), trndata['class'])
        tstresult = percentError(trainer.testOnClassData(dataset=tstdata),
                                 tstdata['class'])
        print "epoch: %4d" % trainer.totalepochs, \
              "  train error: %5.2f%%" % trnresult, \
              "  test error: %5.2f%%" % tstresult
        if tstresult < previous_error:
            fnn = try_fnn
            previous_error = tstresult

    NetworkWriter.writeToFile(fnn, 'nn.xml')

    log.warning('Activating NeuralNetwork...')
    nginx_log = ClassificationDataSet(len(dictionary), 1, nb_classes=2)
    add_samples_to_training_set(nginx_log, options.log_file, 0)
    nginx_log._convertToOneOfMany(
    )  # this is still needed to make the fnn feel comfy

    out = fnn.activateOnDataset(nginx_log)
    out = out.argmax(axis=1)  # the highest output activation gives the class

    with open(options.log_file) as log_file:
        cnt = 0
        for line in log_file:
            try:
                entry = LogEntry(*nginx_log_re.match(line).groups())
                if out[cnt]:
                    print "BOT:  ",
                else:
                    print "GOOD: ",
                print "{0}".format(entry)
                cnt += 1
Exemple #52
0
def trainet2(data, nhide=8, nhide1=8, epo=10, wd=.1, fn=''):

    alldata = data
    tstdata_temp, trndata_temp = alldata.splitWithProportion(0.5)

    tstdata = ClassificationDataSet(alldata.indim, nb_classes=alldata.nClasses)
    for n in range(0, tstdata_temp.getLength()):
        tstdata.addSample(
            tstdata_temp.getSample(n)[0],
            tstdata_temp.getSample(n)[1])

    trndata = ClassificationDataSet(alldata.indim, nb_classes=alldata.nClasses)
    for n in range(0, trndata_temp.getLength()):
        trndata.addSample(
            trndata_temp.getSample(n)[0],
            trndata_temp.getSample(n)[1])

    tstdata._convertToOneOfMany()
    trndata._convertToOneOfMany()

    net = FeedForwardNetwork()
    inLayer = LinearLayer(trndata.indim)
    hiddenLayer = TanhLayer(nhide)
    hiddenLayer1 = TanhLayer(nhide1)
    outLayer = LinearLayer(trndata.outdim)

    net.addInputModule(inLayer)
    net.addModule(hiddenLayer)
    net.addModule(hiddenLayer1)
    net.addOutputModule(outLayer)

    in_to_hidden = FullConnection(inLayer, hiddenLayer)
    hidden_to_hidden = FullConnection(hiddenLayer, hiddenLayer1)
    hidden_to_out = FullConnection(hiddenLayer1, outLayer)

    net.addConnection(in_to_hidden)
    net.addConnection(hidden_to_hidden)
    net.addConnection(hidden_to_out)

    net.sortModules()
    net.bias = True

    trainer = BackpropTrainer(net,
                              dataset=trndata,
                              verbose=True,
                              weightdecay=wd,
                              momentum=0.1)
    edata = []
    msedata = []
    for i in range(epo):
        trainer.trainEpochs(1)
        trnresult = percentError(trainer.testOnClassData(), trndata['class'])
        tstresult = percentError(trainer.testOnClassData(dataset=tstdata),
                                 tstdata['class'])
        tod = trainer.testOnData(verbose=False)
        print("epoch: %4d" % trainer.totalepochs,
              "  train error: %5.2f%%" % trnresult,
              "  test error: %5.2f%%" % tstresult, "  layers: ", nhide1,
              "  N_tourn: ", alldata.indim / 2)
        edata.append([trnresult, tstresult])
        msedata.append([i, tod])
    with open(fn + ".dta", 'w') as fp:
        json.dump(edata, fp)
    with open(fn + ".mse", 'w') as fp:
        json.dump(msedata, fp)
    return net
Exemple #53
0
lin_clf = svm.LinearSVC()  # creates a linear svm.
lin_clf.fit(x_train, y_train)  # trains the svm.
y_hat['svm'] = lin_clf.predict(x_test)

############## ANN ################
print '\nTraining Artificial Neural Network'
trndata = ClassificationDataSet(n_components)
for i in range(0, y_train.size):
    # add data to the pybrain structure.
    trndata.addSample(x_train[i], y_train[i])

tstdata = ClassificationDataSet(n_components)
for i in range(0, y_test.size):
    tstdata.addSample(x_test[i], y_test[i])

trndata._convertToOneOfMany()  # convert the label to multidimension label.
tstdata._convertToOneOfMany()

n = FeedForwardNetwork()
inLayer = LinearLayer(trndata.indim)
hiddenLayer = SigmoidLayer(15)
outLayer = LinearLayer(trndata.outdim)

n.addInputModule(inLayer)
n.addModule(hiddenLayer)
n.addOutputModule(outLayer)

in_to_hidden = FullConnection(inLayer, hiddenLayer)
hidden_to_out = FullConnection(hiddenLayer, outLayer)

n.addConnection(in_to_hidden)
# construimos red de forma rapido con todo lo anterior hecho con ela tajo buildnetwork
fnn = buildNetwork(trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer)
# preparamos en entrenamiento
trainer = BackpropTrainer(fnn,
                          dataset=trndata,
                          momentum=0.1,
                          verbose=True,
                          weightdecay=0.01)
# generamos una matriz de datosy
ticks = arange(-3., 6., 0.2)
X, Y = meshgrid(ticks, ticks)
# necesitamos un vecto columan en el dataset, sin punteros
griddata = ClassificationDataSet(2, 1, nb_classes=3)
for i in xrange(X.size):
    griddata.addSample([X.ravel()[i], Y.ravel()[i]], [0])
griddata._convertToOneOfMany()  # hace la red fiable
# comenzamos las iteraciones de entreno
for i in range(20):
    trainer.trainEpochs(1)
    trnresult = percentError(trainer.testOnClassData(), trndata['class'])
    tstresult = percentError(trainer.testOnClassData(dataset=tstdata),
                             tstdata['class'])

    print "epoch: %4d" % trainer.totalepochs, \
        "  train error: %5.2f%%" % trnresult, \
        "  test error: %5.2f%%" % tstresult
    out = fnn.activateOnDataset(griddata)
    out = out.argmax(axis=1)  # the highest output activation gives the class
    out = out.reshape(X.shape)
    figure(1)
    ioff()  # interactive graphics off
dataFile1.close()
dataFile2.close()
dataFile3.close()
dataFile4.close()
dataFile5.close()

images = np.append(data1['data'], [data2['data'], data3['data'], data4['data'], data5['data']])
labels = np.append(data1['labels'], [data2['labels'], data3['labels'], data4['labels'], data5['labels']])

# Construct the classification data set for learning
print 'Constructing the Data Set'
dataSet = ClassificationDataSet(3072, 1, nb_classes = 10)

for index in range(0, labels.size):
    dataSet.addSample(images[index], labels[index])
dataSet._convertToOneOfMany();
    

# Train the neural network
print 'Training the Neural Network'
trainer = BackpropTrainer(network, dataset = dataSet, momentum = 0.1, verbose = True, weightdecay = 0.01)
trainer.trainEpochs(5)

# Save the neural network to a file for later use
print 'Saving to File'
networkFile = open('trainedNet1.cpkl', 'w')
cPickle.dump(network, networkFile)
network

print 'Finished Training Network'
Exemple #56
0
    variations = [0 for y in range(n_classes)]
        #print(statistics.pvariance(points[x]))
        #variations[x] = results[x][1] / results[x][0]
    
    variations = [calculate_variance(point,center) for point,center in zip(points,centers)]    
        
    print(centers,variations)
    entries = pseudo_samples(data)
    
    
    train_data = ClassificationDataSet(n_classes, 1,nb_classes=n_output)
    
    for n in range(0, len(entries)):
        train_data.addSample( entries[n], [data[n][-1]])

    train_data._convertToOneOfMany( )
    
    
    for epochs in range(6):
        rights = 0
        cont = 0
        for i in range(len(train_data["input"])):
            #print("<")
            results = []
            for j in range(len(output_weights)):
                add_bias = [1]
                add_bias.extend(train_data["input"][i])
                #print(add_bias)
                total = 0
                
                for x in range(len(add_bias)):#CALCULANDO CADA SAÍDA
Exemple #57
0
tstdata_temp, trndata_temp = alldata.splitWithProportion(0.25)

tstdata = ClassificationDataSet(2, 1, nb_classes=3)
for n in xrange(0, tstdata_temp.getLength()):
    tstdata.addSample(
        tstdata_temp.getSample(n)[0],
        tstdata_temp.getSample(n)[1])

trndata = ClassificationDataSet(2, 1, nb_classes=3)
for n in xrange(0, trndata_temp.getLength()):
    trndata.addSample(
        trndata_temp.getSample(n)[0],
        trndata_temp.getSample(n)[1])

trndata._convertToOneOfMany()
tstdata._convertToOneOfMany()

print "Number of training patterns: ", len(trndata)
print "Input and output dimensions: ", trndata.indim, trndata.outdim
print "First sample (input, target, class):"
print trndata['input'][0], trndata['target'][0], trndata['class'][0]

fnn = buildNetwork(trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer)

trainer = BackpropTrainer(fnn,
                          dataset=trndata,
                          momentum=0.1,
                          verbose=True,
                          weightdecay=0.01)
Exemple #58
0
for line in inputFile.readlines():
    data = [float(x) for x in line.strip().split() if x != '']
    indata = tuple(data[:7])
    outdata = tuple(data[7:])
    ds.addSample(indata,outdata)
    k +=1
    if (k == size):
		testdata, traindata = ds.splitWithProportion( PorcDivTest )
		ds.clear()
		k = 0
		for inp,targ in testdata:
			testSet.appendLinked(inp,targ-1)
		for inp,targ in traindata:
			trainSet.appendLinked(inp,targ-1)

trainSet._convertToOneOfMany(bounds=[0, 1])
testSet._convertToOneOfMany(bounds=[0, 1])

if(camada2==0):
	net = buildNetwork(trainSet.indim,camada1,trainSet.outdim, recurrent = True)
else :
	net = buildNetwork(trainSet.indim,camada1,camada2,trainSet.outdim, recurrent = True)
trainer = BackpropTrainer(net,dataset = trainSet,learningrate = Learning,momentum = Momentum, verbose = True)
trainer.trainOnDataset(trainSet,Ciclos)

out = net.activateOnDataset(testSet)
out = out.argmax(axis=1) 

acerto = total = i = 0
for data in testSet:
	if data[1][0] == 1 and out[i] == 0:
Exemple #59
0
class2vec2=np.reshape(class2im2,np.size(class2im1))
class2vec3=np.reshape(class2im3,np.size(class2im1))
class2vec4=np.reshape(class2im4,np.size(class2im1))
class2vec5=np.reshape(class2im5,np.size(class2im1))
trainData1=np.array([class1vec1,class1vec2,class1vec3,class1vec4,class1vec5,class2vec1,class2vec2,clas
s2vec3,class2vec4,class2vec5])
ncomponents=9
pca = PCA(n_components=ncomponents)
pca.fit(trainData1)
trainData=pca.transform(trainData1)
trainLabels=np.array([1,1,1,1,1,2,2,2,2,2])
trnData = ClassificationDataSet(ncomponents, 1, nb_classes=2)
for i in range(len(trainLabels)):
trnData.addSample(trainData[i,:], trainLabels[i]-1)
tstdata, trndata = trnData.splitWithProportion( 0.40 )
trnData._convertToOneOfMany( )
fnn = buildNetwork( trnData.indim, 20, trnData.outdim, outclass=SoftmaxLayer )
trainer = BackpropTrainer( fnn, dataset=trnData, momentum=0.1, verbose=True, weightdecay=0.01)
for i in range(20):
trainer.trainEpochs(5)
trnresult=percentError(trainer.testOnClassData(),trnData['class'])
print "epoch: %4d" % trainer.totalepochs, \
" train error: %5.2f%%" % trnresult, \
outTrain=fnn.activateOnDataset(trnData)
outTrainLabels=outTrain.argmax(axis=1)+1
numErrTrain=numErr=sum(abs(outTrainLabels!=trainLabels))
accTrain=1-numErrTrain/len(trainLabels)
from __future__ import division
import numpy as np
from pybrain.datasets import ClassificationDataSet
from pybrain.utilities import percentError
Exemple #60
0
# reconvert to fix class issue
testingData = ClassificationDataSet(64 * 64 * 3, nb_classes=2)
for n in xrange(0, testingDataTemp.getLength()):
    testingData.addSample(
        testingDataTemp.getSample(n)[0],
        testingDataTemp.getSample(n)[1])

trainingData = ClassificationDataSet(64 * 64 * 3, nb_classes=2)
for n in xrange(0, trainingDataTemp.getLength()):
    trainingData.addSample(
        trainingDataTemp.getSample(n)[0],
        trainingDataTemp.getSample(n)[1])

# reencode outputs, necessary for training accurately
testingData._convertToOneOfMany()
trainingData._convertToOneOfMany()

##### BUILD ANN #####
# build feed-forward multi-layer perceptron ANN
fnn = FeedForwardNetwork()

# create layers: 9 input layer nodes (8 features + 1 bias), 3 hidden layer nodes, 10 output layer nodes
bias = BiasUnit(name='bias unit')
input_layer = LinearLayer(64 * 64 * 3, name='input layer')
hidden_layer = SigmoidLayer(64 * 64 * 3 / 2, name='hidden layer')
output_layer = SigmoidLayer(2, name='output layer')

# create connections with full connectivity between layers
bias_to_hidden = FullConnection(bias, hidden_layer, name='bias-hid')
bias_to_output = FullConnection(bias, output_layer, name='bias-out')