def splitWithProportion(self, proportion = 0.7): """Produce two new datasets, the first one containing the fraction given by `proportion` of the samples.""" indicies = random.permutation(len(self)) separator = int(len(self) * proportion) leftIndicies = indicies[:separator] rightIndicies = indicies[separator:] leftDs = ClassificationDataSet(inp=self['input'][leftIndicies].copy(), target=self['target'][leftIndicies].copy()) rightDs = ClassificationDataSet(inp=self['input'][rightIndicies].copy(), target=self['target'][rightIndicies].copy()) return leftDs, rightDs
def createTrainingSupervisedDataSet(self, msrcImages, scale, keepClassDistTrain): print "\tSplitting MSRC data into train, test, valid data sets." splitData = pomio.splitInputDataset_msrcData(msrcImages, scale, keepClassDistTrain) print "\tNow generating features for each training image." trainData = FeatureGenerator.processLabeledImageData(splitData[0], ignoreVoid=True) features = trainData[0] numDataPoints = np.shape(features)[0] numFeatures = np.shape(features)[1] labels = trainData[1] numLabels = np.size(labels) #!!error! nb unique labels, or max label assert numDataPoints == numLabels, "Number of feature data points and number of labels not equal!" dataSetTrain = ClassificationDataSet(numFeatures, numClasses) print "\tNow adding all data points to the ClassificationDataSet..." for idx in range(0, numDataPoints): feature = trainData[0][idx] label = trainData[1][idx] binaryLabels = np.zeros(numClasses) # to cope with the removal of void class (idx 13) if label < voidClass: binaryLabels[label] = 1 else: binaryLabels[label - 1] = 1 dataSetTrain.addSample(feature, binaryLabels) print "\tAdded", np.size(trainData), " labeled data points to DataSet." return dataSetTrain
def buildXor(self): self.params['dataset'] = 'XOR' d = ClassificationDataSet(2) d.addSample([0., 0.], [0.]) d.addSample([0., 1.], [1.]) d.addSample([1., 0.], [1.]) d.addSample([1., 1.], [0.]) d.setField('class', [[0.], [1.], [1.], [0.]]) self.trn_data = d self.tst_data = d global trn_data trn_data = self.trn_data nn = FeedForwardNetwork() inLayer = TanhLayer(2, name='in') hiddenLayer = TanhLayer(3, name='hidden0') outLayer = ThresholdLayer(1, name='out') nn.addInputModule(inLayer) nn.addModule(hiddenLayer) nn.addOutputModule(outLayer) in_to_hidden = FullConnection(inLayer, hiddenLayer) hidden_to_out = FullConnection(hiddenLayer, outLayer) nn.addConnection(in_to_hidden) nn.addConnection(hidden_to_out) nn.sortModules() nn.randomize() self.net_settings = str(nn.connections) self.nn = nn
def nntester(tx, ty, rx, ry, iterations): """ builds, tests, and graphs a neural network over a series of trials as it is constructed """ resultst = [] resultsr = [] positions = range(iterations) network = buildNetwork(100, 50, 1, bias=True) ds = ClassificationDataSet(100, 1, class_labels=["valley", "hill"]) for i in xrange(len(tx)): ds.addSample(tx[i], [ty[i]]) trainer = BackpropTrainer(network, ds, learningrate=0.01) for i in positions: print trainer.train() resultst.append( sum((np.array([round(network.activate(test)) for test in tx]) - ty)**2) / float(len(ty))) resultsr.append( sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2) / float(len(ry))) print i, resultst[i], resultsr[i] NetworkWriter.writeToFile(network, "network.xml") plt.plot(positions, resultst, 'ro', positions, resultsr, 'bo') plt.axis([0, iterations, 0, 1]) plt.ylabel("Percent Error") plt.xlabel("Network Epoch") plt.title("Neural Network Error") plt.savefig('3Lnn.png', dpi=300)
def nn(tx, ty, rx, ry, add="", iterations=250): """ trains and plots a neural network on the data we have """ resultst = [] resultsr = [] positions = range(iterations) network = buildNetwork(tx[1].size, 5, 1, bias=True) ds = ClassificationDataSet(tx[1].size, 1) for i in xrange(len(tx)): ds.addSample(tx[i], [ty[i]]) trainer = BackpropTrainer(network, ds, learningrate=0.01) train = zip(tx, ty) test = zip(rx, ry) for i in positions: trainer.train() resultst.append( sum( np.array([(round(network.activate(t_x)) - t_y)**2 for t_x, t_y in train]) / float(len(train)))) resultsr.append( sum( np.array([(round(network.activate(t_x)) - t_y)**2 for t_x, t_y in test]) / float(len(test)))) # resultsr.append(sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2)/float(len(ry))) print i, resultst[-1], resultsr[-1] plot([0, iterations, 0, 1], (positions, resultst, "ro", positions, resultsr, "bo"), "Network Epoch", "Percent Error", "Neural Network Error", "NN" + add)
def xorDataSet(): d = ClassificationDataSet(2) d.addSample([0., 0.], [0.]) d.addSample([0., 1.], [1.]) d.addSample([1., 0.], [1.]) d.addSample([1., 1.], [0.]) d.setField('class', [[0.], [1.], [1.], [0.]]) return d
def main(): print "Calculating mfcc...." mfcc_coeff_vectors_dict = {} for i in range(1, 201): extractor = FeatureExtractor( '/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Happiness/HappinessAudios/' + str(i) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) for i in range(201, 401): extractor = FeatureExtractor( '/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Sadness/SadnessAudios/' + str(i - 200) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) audio_with_min_frames, min_frames = get_min_frames_audio( mfcc_coeff_vectors_dict) processed_mfcc_coeff = preprocess_input_vectors( mfcc_coeff_vectors_dict, min_frames) # frames = min_frames # print frames # print len(processed_mfcc_coeff['1']) # for each_vector in processed_mfcc_coeff['1']: # print len(each_vector) print "mffcc found..." classes = ["happiness", "sadness"] training_data = ClassificationDataSet( 26, target=1, nb_classes=2, class_labels=classes) # training_data = SupervisedDataSet(13, 1) try: network = NetworkReader.readFrom( 'network_state_frame_level_new2_no_pp1.xml') except: for i in range(1, 51): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] for each_vector in mfcc_coeff_vectors: training_data.appendLinked(each_vector, [1]) for i in range(201, 251): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] for each_vector in mfcc_coeff_vectors: training_data.appendLinked(each_vector, [0]) training_data._convertToOneOfMany() print "prepared training data.." print training_data.indim, training_data.outdim network = buildNetwork( training_data.indim, 5, training_data.outdim, fast=True) trainer = BackpropTrainer(network, learningrate=0.01, momentum=0.99) print "Before training...", trainer.testOnData(training_data) trainer.trainOnDataset(training_data, 1000) print "After training...", trainer.testOnData(training_data) NetworkWriter.writeToFile( network, "network_state_frame_level_new2_no_pp.xml")
def nn(tx, ty, rx, ry, iterations): network = buildNetwork(14, 5, 5, 1) ds = ClassificationDataSet(14, 1, class_labels=["<50K", ">=50K"]) for i in xrange(len(tx)): ds.addSample(tx[i], [ty[i]]) trainer = BackpropTrainer(network, ds) trainer.trainOnDataset(ds, iterations) NetworkWriter.writeToFile(network, "network.xml") results = sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2) / float(len(ry)) return results
def cifar_nn(offset=None): data_ = cifar(one_hot=True, ten_percent=False) x_dim = len(data_['train']['data'][0]) data = ClassificationDataSet(x_dim, 10) if offset: max_sample = offset else: max_sample = len(data_['train']['data']) for i in xrange(max_sample): data.addSample(data_['train']['data'][i], data_['train']['labels'][i]) data_['train_nn'] = data return data_
def createTrainingSetFromMatrix( self, dataMat, labelsVec=None ): assert labelsVec==None or dataMat.shape[0] == len(labelsVec) #nbFtrs = dataMat.shape[1] #nbClasses = np.max(labelsVec) + 1 if labelsVec != None and np.unique(labelsVec) != range(self.nbClasses): print 'WARNING: class labels only contain these values %s ' % (str( np.unique(labelsVec) )) dataSetTrain = ClassificationDataSet(self.nbFeatures, numClasses) for i in range(dataMat.shape[0]): binaryLabels = np.zeros(numClasses) if labelsVec != None: binaryLabels[labelsVec[i]] = 1 dataSetTrain.addSample( dataMat[i,:], binaryLabels ) return dataSetTrain
def sentiment_nn(bag_size=100, offset=None): data_ = sentiment(bag_size) x_dim = len(data_['train']['data'][0]) data = ClassificationDataSet(x_dim, 1) if offset: max_sample = offset else: max_sample = len(data_['train']['data']) for i in xrange(max_sample): data.addSample(data_['train']['data'][i], [data_['train']['labels'][i]]) data_['train_nn'] = data return data_
def main(): print "Calculating mfcc...." mfcc_coeff_vectors_dict = {} for i in range(1, 201): extractor = FeatureExtractor('/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Happiness/HappinessAudios/' + str(i) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) for i in range(201, 401): extractor = FeatureExtractor('/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Sadness/SadnessAudios/' + str(i - 200) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) audio_with_min_frames, min_frames = get_min_frames_audio(mfcc_coeff_vectors_dict) processed_mfcc_coeff = preprocess_input_vectors(mfcc_coeff_vectors_dict, min_frames) frames = min_frames print "mfcc found...." classes = ["happiness", "sadness"] try: network = NetworkReader.readFrom('network_state_new_.xml') except: # Create new network and start Training training_data = ClassificationDataSet(frames * 26, target=1, nb_classes=2, class_labels=classes) # training_data = SupervisedDataSet(frames * 39, 1) for i in range(1, 151): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] training_data.appendLinked(mfcc_coeff_vectors.ravel(), [1]) # training_data.addSample(mfcc_coeff_vectors.ravel(), [1]) for i in range(201, 351): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] training_data.appendLinked(mfcc_coeff_vectors.ravel(), [0]) # training_data.addSample(mfcc_coeff_vectors.ravel(), [0]) training_data._convertToOneOfMany() network = buildNetwork(training_data.indim, 5, training_data.outdim) trainer = BackpropTrainer(network, learningrate=0.01, momentum=0.99) print "Before training...", trainer.testOnData(training_data) trainer.trainOnDataset(training_data, 1000) print "After training...", trainer.testOnData(training_data) NetworkWriter.writeToFile(network, "network_state_new_.xml") print "*" * 30 , "Happiness Detection", "*" * 30 for i in range(151, 201): output = network.activate(processed_mfcc_coeff[str(i)].ravel()) # print output, # if output > 0.7: # print "happiness" class_index = max(xrange(len(output)), key=output.__getitem__) class_name = classes[class_index] print class_name
def cvnntester(tx, ty, rx, ry, iterations, folds): network = buildNetwork(100, 50, 1, bias=True) ds = ClassificationDataSet(100, 1, class_labels=["valley", "hill"]) for i in xrange(len(tx)): ds.addSample(tx[i], [ty[i]]) trainer = BackpropTrainer(network, ds, learningrate=0.005) cv = CrossValidator(trainer, ds, n_folds=folds, max_epochs=iterations, verbosity=True) print cv.validate() print sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2) / float(len(ry))
def pybrainData(split, data=None): # taken from iris data set at machine learning repository if not data: pat = cat1 + cat2 + cat3 else: pat = data alldata = ClassificationDataSet(4, 1, nb_classes=3, class_labels=['set', 'vers', 'virg']) for p in pat: t = p[2] alldata.addSample(p[0], t) tstdata, trndata = alldata.splitWithProportion(split) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() return trndata, tstdata
def train(network_file, input_length, output_length, training_data_file, learning_rate, momentum, stop_on_convergence, epochs, classify): n = get_network(network_file) if classify: ds = ClassificationDataSet(int(input_length), int(output_length) * 2) ds._convertToOneOfMany() else: ds = SupervisedDataSet(int(input_length), int(output_length)) training_data = get_training_data(training_data_file) NetworkManager.last_training_set_length = 0 for line in training_data: data = [float(x) for x in line.strip().split(',') if x != ''] input_data = tuple(data[:(int(input_length))]) output_data = tuple(data[(int(input_length)):]) ds.addSample(input_data, output_data) NetworkManager.last_training_set_length += 1 t = BackpropTrainer(n, learningrate=learning_rate, momentum=momentum, verbose=True) print "training network " + network_storage_path + network_file if stop_on_convergence: t.trainUntilConvergence(ds, epochs) else: if classify: t.trainOnDataset(ds['class'], epochs) else: t.trainOnDataset(ds, epochs) error = t.testOnData() print "training done" if not math.isnan(error): save_network(n, network_file) else: print "error occured, network not saved" print "network saved" return error
def montaDatasetConvertido(dadosTemporario): """ função que converte o objeto python.datasets.classficication.ClassificationDataSet para python.datasets.supervised.SupervisedDataSet Será utilizando tanto para o dataset de treino quanto para o dataset de teste e validação :return: dataset convertindo ao objeto python.datasets.supervised.SupervisedDataSet """ dataset = ClassificationDataSet(4, 1) for i in range(dadosTemporario.getLength()): dataset.addSample( dadosTemporario.getSample(i)[0], dadosTemporario.getSample(i)[1]) return dataset
def test_ann(self): from pybrain.datasets.classification import ClassificationDataSet # below line can be replaced with the algorithm of choice e.g. # from pybrain.optimization.hillclimber import HillClimber from pybrain.optimization.populationbased.ga import GA from pybrain.tools.shortcuts import buildNetwork # create XOR dataset d = ClassificationDataSet(2) d.addSample([181, 80], [1]) d.addSample([177, 70], [1]) d.addSample([160, 60], [0]) d.addSample([154, 54], [0]) d.setField('class', [[0.], [1.], [1.], [0.]]) nn = buildNetwork(2, 3, 1) # d.evaluateModuleMSE takes nn as its first and only argument ga = GA(d.evaluateModuleMSE, nn, minimize=True) for i in range(100): nn = ga.learn(0)[0] print nn.activate([181, 80])
def montaDataset(): """ Função que monta o dataset dos dados temporários do dataset :return: dataset montando """ # carregando o dataset do iris # pelo sktlearn iris = datasets.load_iris() dadosEntrada, dadosSaida = iris.data, iris.target # criando o dataset da iris onde : terá um array de tamanho 4 como dados de entrada # um array de tamanho 1 como dado de saida terá # 3 classes para classificar dataset = ClassificationDataSet(4, 1, nb_classes=3) for i in range(len(dadosEntrada)): dataset.addSample(dadosEntrada[i], dadosSaida[i]) return dataset
from pybrain.datasets.classification import ClassificationDataSet from pybrain.optimization.populationbased.ga import GA from pybrain.tools.shortcuts import buildNetwork # create XOR dataset d = ClassificationDataSet(2) d.addSample([0., 0.], [0.]) d.addSample([0., 1.], [1.]) d.addSample([1., 0.], [1.]) d.addSample([1., 1.], [0.]) d.setField('class', [ [0.],[1.],[1.],[0.]]) nn = buildNetwork(2, 3, 1) ga = GA(d.evaluateModuleMSE, nn, minimize=True) for i in range(100): nn = ga.learn(0)[0] # test results after the above script In [68]: nn.activate([0,0]) Out[68]: array([-0.07944574]) In [69]: nn.activate([1,0]) Out[69]: array([ 0.97635635]) In [70]: nn.activate([0,1]) Out[70]: array([ 1.0216745]) In [71]: nn.activate([1,1]) Out[71]: array([ 0.03604205])
from sklearn import datasets from pybrain.datasets.classification import ClassificationDataSet from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer iris = datasets.load_iris() x, y = iris.data, iris.target print(len(x)) dataset = ClassificationDataSet(4, 1, nb_classes=3) for i in range(len(x)): dataset.addSample(x[i], y[i]) train_data, part_data = dataset.splitWithProportion(0.6) test_data, val_data = part_data.splitWithProportion(0.5) net = buildNetwork(dataset.indim, 3, dataset.outdim) trainer = BackpropTrainer(net, dataset=train_data, learningrate=0.01, momentum=0.1, verbose=True) train_errors, val_errors = trainer.trainUntilConvergence(dataset=train_data, maxEpochs=100) trainer.totalepochs
from sklearn import datasets from pybrain.datasets.classification import ClassificationDataSet from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer iris = datasets.load_iris() x, y = iris.data, iris.target dataset = ClassificationDataSet(4, 1, nb_classes=3) for i in range(len(x)): dataset.addSample(x[i], y[i]) train_data_temp, part_data_temp = dataset.splitWithProportion(0.6) test_data_temp, val_data_temp = part_data_temp.splitWithProportion(0.5) train_data = ClassificationDataSet(4, 1, nb_classes=3) for n in range(train_data_temp.getLength()): train_data.addSample( train_data_temp.getSample(n)[0], train_data_temp.getSample(n)[1]) test_data = ClassificationDataSet(4, 1, nb_classes=3) for n in range(test_data_temp.getLength()): train_data.addSample( test_data_temp.getSample(n)[0], test_data_temp.getSample(n)[1]) val_data = ClassificationDataSet(4, 1, nb_classes=3) for n in range(val_data_temp.getLength()): val_data.addSample( val_data_temp.getSample(n)[0],
def _get_classification_dataset(): return ClassificationDataSet(INPUT, OUTPUT, nb_classes=CLASSES)
@author: Leonardo """ #Carregando os dados do Iris Sataset com skLearn from sklearn import datasets iris = datasets.load_iris() #Obtendo as entradas e saídas X, y = iris.data, iris.target print(len(X)) print(len(y)) from pybrain.datasets.classification import ClassificationDataSet datasets = ClassificationDataSet(4, 1, nb_classes=3) #nb_classes = numeros de saidas # adicionando as amostras for i in range(len(X)): datasets.addSample(X[i], y[i]) len(datasets) ''' print(datasets['input']) print(datasets['target']) ''' # psrticonando os dados para treinamento train_data, part_data = datasets.splitWithProportion( 0.6) #sera dividido em 60% print('Quantidade para treino: %d' % len(train_data))
n.addInputModule(inLayer) n.addModule(hiddenLayer) n.addOutputModule(outLayer) in_to_hidden = FullConnection(inLayer,hiddenLayer) hidden_to_out = FullConnection(hiddenLayer,outLayer) n.addConnection(in_to_hidden) n.addConnection(hidden_to_out) n.sortModules() print 'build set' alldata = ClassificationDataSet(dim, 1, nb_classes=2) (data,label,items) = BinReader.readData(ur'F:\AliRecommendHomeworkData\1212新版\train15_17.expand.samp.norm.bin') #(train,label,data) = BinReader.readData(r'C:\data\small\norm\train1217.bin') for i in range(len(data)): alldata.addSample(data[i],label[i]) tstdata, trndata = alldata.splitWithProportion(0.25) trainer = BackpropTrainer(n,trndata,momentum=0.1,verbose=True,weightdecay=0.01) print 'start' #trainer.trainEpochs(1) trainer.trainUntilConvergence(maxEpochs=2) trnresult = percentError(trainer.testOnClassData(),trndata['class'])
numdata[i][10] = qualidict[numdata[i][10].strip()] numdata[i][11] = modedict[numdata[i][11].strip()] numdata[i][12] = unidict[numdata[i][12].strip()] fobj = open('02 select_data_num.csv', 'wb') [(fobj.write(item), fobj.write(',')) for item in header] fobj.write('\n') [([(fobj.write(str(it).replace(',', ' ')), fobj.write(',')) for it in item], fobj.write('\n')) for item in numdata] fobj.close() npdata = np.array(numdata, dtype=np.float) npdata[:, 2:] = preprocessing.scale(npdata[:, 2:]) numdata = copy.deepcopy(npdata) net = buildNetwork(14, 14, 1, bias=True, outclass=SoftmaxLayer) ds = ClassificationDataSet(14, 1, nb_classes=2) for item in numdata: ds.addSample(tuple(item[2:]), (item[1])) dsTrain, dsTest = ds.splitWithProportion(0.8) print('Trainging') trainer = BackpropTrainer(net, ds, momentum=0.1, verbose=True, weightdecay=0.01) # trainer.train() trainer.trainUntilConvergence(maxEpochs=20) print('Finish training') Traininp = dsTrain['input']
def compare_l2_regularization(): train_features, train_labels, test_features, test_labels = get_breast_cancer_data( ) optimal_num_layers = 6 num_neurons = [optimal_num_layers * [16]] start_time = datetime.now() train_accuracy1 = [] test_accuracy1 = [] train_accuracy2 = [] test_accuracy2 = [] iterations = range(250) nn1 = buildNetwork(30, 16, 1, bias=True) nn2 = buildNetwork(30, 16, 1, bias=True) dataset = ClassificationDataSet(len(train_features[0]), len(train_labels[0]), class_labels=["1", "2"]) for instance in range(len(train_features)): dataset.addSample(train_features[instance], train_labels[instance]) trainer1 = BackpropTrainer(nn1, dataset, weightdecay=0.0001) validator1 = CrossValidator(trainer1, dataset) print(validator1.validate()) trainer2 = BackpropTrainer(nn2, dataset, weightdecay=0.001) validator2 = CrossValidator(trainer2, dataset) print(validator2.validate()) for iteration in iterations: train_accuracy1.append( sum((np.array( [np.round(nn1.activate(test)) for test in train_features]) - train_labels)**2) / float(len(train_labels))) test_accuracy1.append( sum((np.array( [np.round(nn1.activate(test)) for test in test_features]) - test_labels)**2) / float(len(test_labels))) train_accuracy2.append( sum((np.array( [np.round(nn2.activate(test)) for test in train_features]) - train_labels)**2) / float(len(train_labels))) test_accuracy2.append( sum((np.array( [np.round(nn2.activate(test)) for test in test_features]) - test_labels)**2) / float(len(test_labels))) plt.plot(iterations, train_accuracy1) plt.plot(iterations, test_accuracy1) plt.plot(iterations, train_accuracy2) plt.plot(iterations, test_accuracy2) plt.legend([ "Train Accuracy (0.0001)", "Test Accuracy (0.0001)", "Train Accuracy (0.001)", "Test Accuracy (0.001" ]) plt.xlabel("Num Epoch") plt.ylabel("Percent Error") plt.title("Neural Network on Breast Cancer Data with " + str(num_neurons) + " layers") plt.savefig("nn_breast_cancer_weight_decay.png")