예제 #1
0
 def buildXor(self):
     self.params['dataset'] = 'XOR'
     d = ClassificationDataSet(2)
     d.addSample([0., 0.], [0.])
     d.addSample([0., 1.], [1.])
     d.addSample([1., 0.], [1.])
     d.addSample([1., 1.], [0.])
     d.setField('class', [[0.], [1.], [1.], [0.]])
     self.trn_data = d
     self.tst_data = d
     global trn_data
     trn_data = self.trn_data
     nn = FeedForwardNetwork()
     inLayer = TanhLayer(2, name='in')
     hiddenLayer = TanhLayer(3, name='hidden0')
     outLayer = ThresholdLayer(1, name='out')
     nn.addInputModule(inLayer)
     nn.addModule(hiddenLayer)
     nn.addOutputModule(outLayer)
     in_to_hidden = FullConnection(inLayer, hiddenLayer)
     hidden_to_out = FullConnection(hiddenLayer, outLayer)
     nn.addConnection(in_to_hidden)
     nn.addConnection(hidden_to_out)
     nn.sortModules()
     nn.randomize()
     self.net_settings = str(nn.connections)
     self.nn = nn
예제 #2
0
    def createTrainingSupervisedDataSet(self, msrcImages, scale,
                                        keepClassDistTrain):
        print "\tSplitting MSRC data into train, test, valid data sets."
        splitData = pomio.splitInputDataset_msrcData(msrcImages, scale,
                                                     keepClassDistTrain)

        print "\tNow generating features for each training image."
        trainData = FeatureGenerator.processLabeledImageData(splitData[0],
                                                             ignoreVoid=True)
        features = trainData[0]
        numDataPoints = np.shape(features)[0]
        numFeatures = np.shape(features)[1]
        labels = trainData[1]
        numLabels = np.size(labels)  #!!error! nb unique labels, or max label
        assert numDataPoints == numLabels, "Number of feature data points and number of labels not equal!"

        dataSetTrain = ClassificationDataSet(numFeatures, numClasses)

        print "\tNow adding all data points to the ClassificationDataSet..."
        for idx in range(0, numDataPoints):
            feature = trainData[0][idx]
            label = trainData[1][idx]

            binaryLabels = np.zeros(numClasses)
            # to cope with the removal of void class (idx 13)
            if label < voidClass:
                binaryLabels[label] = 1
            else:
                binaryLabels[label - 1] = 1

            dataSetTrain.addSample(feature, binaryLabels)

        print "\tAdded", np.size(trainData), " labeled data points to DataSet."
        return dataSetTrain
예제 #3
0
def nntester(tx, ty, rx, ry, iterations):
    """
    builds, tests, and graphs a neural network over a series of trials as it is
    constructed
    """
    resultst = []
    resultsr = []
    positions = range(iterations)
    network = buildNetwork(100, 50, 1, bias=True)
    ds = ClassificationDataSet(100, 1, class_labels=["valley", "hill"])
    for i in xrange(len(tx)):
        ds.addSample(tx[i], [ty[i]])
    trainer = BackpropTrainer(network, ds, learningrate=0.01)
    for i in positions:
        print trainer.train()
        resultst.append(
            sum((np.array([round(network.activate(test))
                           for test in tx]) - ty)**2) / float(len(ty)))
        resultsr.append(
            sum((np.array([round(network.activate(test))
                           for test in rx]) - ry)**2) / float(len(ry)))
        print i, resultst[i], resultsr[i]
    NetworkWriter.writeToFile(network, "network.xml")
    plt.plot(positions, resultst, 'ro', positions, resultsr, 'bo')
    plt.axis([0, iterations, 0, 1])
    plt.ylabel("Percent Error")
    plt.xlabel("Network Epoch")
    plt.title("Neural Network Error")
    plt.savefig('3Lnn.png', dpi=300)
 def buildXor(self):
     self.params['dataset'] = 'XOR'
     d = ClassificationDataSet(2)
     d.addSample([0., 0.], [0.])
     d.addSample([0., 1.], [1.])
     d.addSample([1., 0.], [1.])
     d.addSample([1., 1.], [0.])
     d.setField('class', [[0.], [1.], [1.], [0.]])
     self.trn_data = d
     self.tst_data = d
     global trn_data
     trn_data = self.trn_data
     nn = FeedForwardNetwork()
     inLayer = TanhLayer(2, name='in')
     hiddenLayer = TanhLayer(3, name='hidden0')
     outLayer = ThresholdLayer(1, name='out')
     nn.addInputModule(inLayer)
     nn.addModule(hiddenLayer)
     nn.addOutputModule(outLayer)
     in_to_hidden = FullConnection(inLayer, hiddenLayer)
     hidden_to_out = FullConnection(hiddenLayer, outLayer)
     nn.addConnection(in_to_hidden)
     nn.addConnection(hidden_to_out)
     nn.sortModules()
     nn.randomize()
     self.net_settings = str(nn.connections)
     self.nn = nn
예제 #5
0
 def createTrainingSupervisedDataSet(self,msrcImages , scale , keepClassDistTrain):
     print "\tSplitting MSRC data into train, test, valid data sets."
     splitData = pomio.splitInputDataset_msrcData(msrcImages, scale, keepClassDistTrain)
     
     print "\tNow generating features for each training image."
     trainData = FeatureGenerator.processLabeledImageData(splitData[0], ignoreVoid=True)
     features = trainData[0]
     numDataPoints = np.shape(features)[0]
     numFeatures = np.shape(features)[1]
     labels = trainData[1]
     numLabels = np.size(labels) #!!error! nb unique labels, or max label
     assert numDataPoints == numLabels , "Number of feature data points and number of labels not equal!"
     
     dataSetTrain = ClassificationDataSet(numFeatures , numClasses)
     
     print "\tNow adding all data points to the ClassificationDataSet..."
     for idx in range(0,numDataPoints):
         feature = trainData[0][idx]
         label =  trainData[1][idx]
         
         binaryLabels = np.zeros(numClasses)
         # to cope with the removal of void class (idx 13)
         if label < voidClass:
             binaryLabels[label] = 1
         else:
             binaryLabels[label-1] = 1
             
         dataSetTrain.addSample(feature , binaryLabels) 
 
     print "\tAdded" , np.size(trainData) , " labeled data points to DataSet."
     return dataSetTrain
def nn(tx, ty, rx, ry, add="", iterations=250):
    """
    trains and plots a neural network on the data we have
    """
    resultst = []
    resultsr = []
    positions = range(iterations)
    network = buildNetwork(tx[1].size, 5, 1, bias=True)
    ds = ClassificationDataSet(tx[1].size, 1)
    for i in xrange(len(tx)):
        ds.addSample(tx[i], [ty[i]])
    trainer = BackpropTrainer(network, ds, learningrate=0.01)
    train = zip(tx, ty)
    test = zip(rx, ry)
    for i in positions:
        trainer.train()
        resultst.append(
            sum(
                np.array([(round(network.activate(t_x)) - t_y)**2
                          for t_x, t_y in train]) / float(len(train))))
        resultsr.append(
            sum(
                np.array([(round(network.activate(t_x)) - t_y)**2
                          for t_x, t_y in test]) / float(len(test))))
        # resultsr.append(sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2)/float(len(ry)))
        print i, resultst[-1], resultsr[-1]
    plot([0, iterations, 0, 1],
         (positions, resultst, "ro", positions, resultsr, "bo"),
         "Network Epoch", "Percent Error", "Neural Network Error", "NN" + add)
예제 #7
0
def nntester(tx, ty, rx, ry, iterations):
    """
    builds, tests, and graphs a neural network over a series of trials as it is
    constructed
    """
    resultst = []
    resultsr = []
    positions = range(iterations)
    network = buildNetwork(100, 50, 1, bias=True)
    ds = ClassificationDataSet(100,1, class_labels=["valley", "hill"])
    for i in xrange(len(tx)):
        ds.addSample(tx[i], [ty[i]])
    trainer = BackpropTrainer(network, ds, learningrate=0.01)
    for i in positions:
        print trainer.train()
        resultst.append(sum((np.array([round(network.activate(test)) for test in tx]) - ty)**2)/float(len(ty)))
        resultsr.append(sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2)/float(len(ry)))
        print i, resultst[i], resultsr[i]
    NetworkWriter.writeToFile(network, "network.xml")
    plt.plot(positions, resultst, 'ro', positions, resultsr, 'bo')
    plt.axis([0, iterations, 0, 1])
    plt.ylabel("Percent Error")
    plt.xlabel("Network Epoch")
    plt.title("Neural Network Error")
    plt.savefig('3Lnn.png', dpi=300)
예제 #8
0
def nnTest(tx, ty, rx, ry, iterations):
    print "NN start"
    print strftime("%a, %d %b %Y %H:%M:%S", localtime())

    resultst = []
    resultsr = []
    positions = range(iterations)
    network = buildNetwork(16, 16, 1, bias=True)
    ds = ClassificationDataSet(16, 1, class_labels=["1", "0"])
    for i in xrange(len(tx)):
        ds.addSample(tx[i], [ty[i]])
    trainer = BackpropTrainer(network, ds, learningrate=0.05)
    validator = CrossValidator(trainer, ds, n_folds=10)
    print validator.validate()
    for i in positions:
        print trainer.train()
        resultst.append(sum((np.array([round(network.activate(test)) for test in tx]) - ty)**2)/float(len(ty)))
        resultsr.append(sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2)/float(len(ry)))
        print i, resultst[i], resultsr[i]
    plt.plot(positions, resultst, 'g-', positions, resultsr, 'r-')
    plt.axis([0, iterations, 0, 1])
    plt.ylabel("Percent Error")
    plt.xlabel("Network Epoch")
    plt.title("Neural Network Error")
    plt.savefig('nn.png', dpi=500)
    print "NN end"
    print strftime("%a, %d %b %Y %H:%M:%S", localtime())
def xorDataSet():
    d = ClassificationDataSet(2)
    d.addSample([0., 0.], [0.])
    d.addSample([0., 1.], [1.])
    d.addSample([1., 0.], [1.])
    d.addSample([1., 1.], [0.])
    d.setField('class', [[0.], [1.], [1.], [0.]])
    return d
예제 #10
0
def xorDataSet():
    d = ClassificationDataSet(2)
    d.addSample([0., 0.], [0.])
    d.addSample([0., 1.], [1.])
    d.addSample([1., 0.], [1.])
    d.addSample([1., 1.], [0.])
    d.setField('class', [[0.], [1.], [1.], [0.]])
    return d
예제 #11
0
def cvnntester(tx, ty, rx, ry, iterations, folds):
    network = buildNetwork(100, 50, 1, bias=True)
    ds = ClassificationDataSet(100,1, class_labels=["valley", "hill"])
    for i in xrange(len(tx)):
        ds.addSample(tx[i], [ty[i]])
    trainer = BackpropTrainer(network, ds, learningrate=0.005)
    cv = CrossValidator(trainer, ds, n_folds=folds, max_epochs=iterations, verbosity=True)
    print cv.validate()
    print sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2)/float(len(ry))
예제 #12
0
def train_network(X, y, hidden_units=3, learningrate=0.04, max_epochs=8, continue_epochs=2):
    indim = X.shape[1]
    nn = buildNetwork(indim, hidden_units, 1, outclass=SigmoidLayer)
    ds = ClassificationDataSet(indim, 1)
    for i, row in enumerate(X):
        ds.addSample(row, y[i])
    trainer = BackpropTrainer(nn, ds, learningrate=learningrate)
    trainer.trainUntilConvergence(maxEpochs=max_epochs, continueEpochs=continue_epochs)
    return nn
    def initializeNetwork(self):        
        self.net = buildNetwork(26, 15, 5, hiddenclass=TanhLayer, outclass=SoftmaxLayer) # 15 is just a mean
        ds = ClassificationDataSet(26, nb_classes=5)
        
        for x in self.train:
            ds.addSample(x.frequency, self.encodingDict[x.lang])
        ds._convertToOneOfMany()

        trainer = BackpropTrainer(self.net, dataset=ds, weightdecay=0.01, momentum=0.1, verbose=True)
        trainer.trainUntilConvergence(maxEpochs=100)
예제 #14
0
def nn(tx, ty, rx, ry, iterations):
    network = buildNetwork(14, 5, 5, 1)
    ds = ClassificationDataSet(14,1, class_labels=["<50K", ">=50K"])
    for i in xrange(len(tx)):
        ds.addSample(tx[i], [ty[i]])
    trainer = BackpropTrainer(network, ds)
    trainer.trainOnDataset(ds, iterations)
    NetworkWriter.writeToFile(network, "network.xml")
    results = sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2)/float(len(ry))
    return results
예제 #15
0
def nn(tx, ty, rx, ry, iterations):
    network = buildNetwork(14, 5, 5, 1)
    ds = ClassificationDataSet(14, 1, class_labels=["<50K", ">=50K"])
    for i in xrange(len(tx)):
        ds.addSample(tx[i], [ty[i]])
    trainer = BackpropTrainer(network, ds)
    trainer.trainOnDataset(ds, iterations)
    NetworkWriter.writeToFile(network, "network.xml")
    results = sum((np.array([round(network.activate(test))
                             for test in rx]) - ry)**2) / float(len(ry))
    return results
예제 #16
0
파일: __init__.py 프로젝트: iRapha/CS4641
def cifar_nn(offset=None):
    data_ = cifar(one_hot=True, ten_percent=False)
    x_dim = len(data_['train']['data'][0])
    data = ClassificationDataSet(x_dim, 10)
    if offset:
        max_sample = offset
    else:
        max_sample = len(data_['train']['data'])
    for i in xrange(max_sample):
        data.addSample(data_['train']['data'][i], data_['train']['labels'][i])
    data_['train_nn'] = data
    return data_
예제 #17
0
파일: __init__.py 프로젝트: iRapha/CS4641
def sentiment_nn(bag_size=100, offset=None):
    data_ = sentiment(bag_size)
    x_dim = len(data_['train']['data'][0])
    data = ClassificationDataSet(x_dim, 1)
    if offset:
        max_sample = offset
    else:
        max_sample = len(data_['train']['data'])
    for i in xrange(max_sample):
        data.addSample(data_['train']['data'][i], [data_['train']['labels'][i]])
    data_['train_nn'] = data
    return data_
예제 #18
0
def cifar_nn(offset=None):
    data_ = cifar(one_hot=True, ten_percent=False)
    x_dim = len(data_['train']['data'][0])
    data = ClassificationDataSet(x_dim, 10)
    if offset:
        max_sample = offset
    else:
        max_sample = len(data_['train']['data'])
    for i in xrange(max_sample):
        data.addSample(data_['train']['data'][i], data_['train']['labels'][i])
    data_['train_nn'] = data
    return data_
예제 #19
0
def sentiment_nn(bag_size=100, offset=None):
    data_ = sentiment(bag_size)
    x_dim = len(data_['train']['data'][0])
    data = ClassificationDataSet(x_dim, 1)
    if offset:
        max_sample = offset
    else:
        max_sample = len(data_['train']['data'])
    for i in xrange(max_sample):
        data.addSample(data_['train']['data'][i],
                       [data_['train']['labels'][i]])
    data_['train_nn'] = data
    return data_
예제 #20
0
 def createTrainingSetFromMatrix( self, dataMat, labelsVec=None ):
     assert labelsVec==None or dataMat.shape[0] == len(labelsVec)
     #nbFtrs = dataMat.shape[1]
     #nbClasses = np.max(labelsVec) + 1
     if labelsVec != None and np.unique(labelsVec) != range(self.nbClasses):
         print 'WARNING: class labels only contain these values %s ' % (str( np.unique(labelsVec) ))
     dataSetTrain = ClassificationDataSet(self.nbFeatures, numClasses)
     for i in range(dataMat.shape[0]):
         binaryLabels = np.zeros(numClasses)
         if labelsVec != None:
             binaryLabels[labelsVec[i]] = 1
         dataSetTrain.addSample( dataMat[i,:], binaryLabels )
     return dataSetTrain
예제 #21
0
def cvnntester(tx, ty, rx, ry, iterations, folds):
    network = buildNetwork(100, 50, 1, bias=True)
    ds = ClassificationDataSet(100, 1, class_labels=["valley", "hill"])
    for i in xrange(len(tx)):
        ds.addSample(tx[i], [ty[i]])
    trainer = BackpropTrainer(network, ds, learningrate=0.005)
    cv = CrossValidator(trainer,
                        ds,
                        n_folds=folds,
                        max_epochs=iterations,
                        verbosity=True)
    print cv.validate()
    print sum((np.array([round(network.activate(test))
                         for test in rx]) - ry)**2) / float(len(ry))
예제 #22
0
def pybrainData(split, data=None):
	# taken from iris data set at machine learning repository
	if not data:
		pat = cat1 + cat2 + cat3
	else:
		pat = data
	alldata = ClassificationDataSet(4, 1, nb_classes=3,
		class_labels=['set', 'vers', 'virg'])
	for p in pat:
		t = p[2]
		alldata.addSample(p[0], t)
	tstdata, trndata = alldata.splitWithProportion(split)
	trndata._convertToOneOfMany()
	tstdata._convertToOneOfMany()
	return trndata, tstdata
    def testNetwork(self):
        correctAnswers = []
        for testItem in self.test:
            correctAnswers.append(self.encodingDict[testItem.lang])
        
        ds_test = ClassificationDataSet(26, nb_classes=5)
        for x in self.test:
            ds_test.addSample(x.frequency, self.encodingDict[x.lang])
        ds_test._convertToOneOfMany()

        sumCorrect = sum(self.net.activateOnDataset(ds_test).argmax(axis=1) == correctAnswers)
        
        print "\nNeural network: " + str(sumCorrect*100/float(len(self.test))) + "% efficiency"
        
        
예제 #24
0
def pybrainData(split, data=None):
    # taken from iris data set at machine learning repository
    if not data:
        pat = cat1 + cat2 + cat3
    else:
        pat = data
    alldata = ClassificationDataSet(4,
                                    1,
                                    nb_classes=3,
                                    class_labels=['set', 'vers', 'virg'])
    for p in pat:
        t = p[2]
        alldata.addSample(p[0], t)
    tstdata, trndata = alldata.splitWithProportion(split)
    trndata._convertToOneOfMany()
    tstdata._convertToOneOfMany()
    return trndata, tstdata
예제 #25
0
def main():
    logger.debug('starting')
    print 'starting'
    #create the training & test sets, skipping the header row with [1:]
    dataset = genfromtxt(open(basepath + '/train.csv','r'), delimiter=',', dtype='f8')[1:]    
    logger.debug('opened dataset')
    target = [x[0] for x in dataset]
    train = [x[1:] for x in dataset]
    print target
    logger.debug('about to build data set')
    print 'building dataset'
    cds = ClassificationDataSet(784, target=10, nb_classes=10)
    for i in range(len(target)):
        targetvec = [0 for j in range(10)]
        targetnum = float(target[i])
        targetvec[int(float(target[i]))] = 1
        cds.addSample(train[i], targetvec)
        print i
        print 'adding sample: ' + str(targetnum)
        print targetvec
    logger.debug('about to build network')
    net = buildNetwork(784, 20, 10)
    logger.debug('about to build trainer')
    trainer = BackpropTrainer(net, dataset=cds, momentum=0.1, verbose=True, weightdecay=0.01)
    logger.debug('about to start training')
    print 'training'
    trainer.trainUntilConvergence()
    #save the net
    nfile = open(basepath + '/nn.pickle', 'w')
    pickle.dump(net, nfile)
    nfile.close()
    #run the real test
    logger.debug('opening test set')
    tests = genfromtxt(open(basepath + '/test.csv','r'), delimiter=',', dtype='f8')[1:]
    results = []
    print 'testing'
    for test in tests:
        logger.debug('activating net!')
        res = net.activate(test)
        logger.debug('result: ' + str(res))
        results.append(res)
        
    resultfile = open(basepath + '/nn.output', 'w')
    resultfile.write(str(results))
    print 'done'
예제 #26
0
def nn(tx, ty, rx, ry, add="", iterations=250):
    """
    trains and plots a neural network on the data we have
    """
    resultst = []
    resultsr = []
    positions = range(iterations)
    network = buildNetwork(tx[1].size, 5, 1, bias=True)
    ds = ClassificationDataSet(tx[1].size, 1)
    for i in xrange(len(tx)):
        ds.addSample(tx[i], [ty[i]])
    trainer = BackpropTrainer(network, ds, learningrate=0.01)
    for i in positions:
        trainer.train()
        resultst.append(sum((np.array([round(network.activate(test)) for test in tx]) - ty)**2)/float(len(ty)))
        resultsr.append(sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2)/float(len(ry)))
        print i
    plot([0, iterations, 0, 1], (positions, resultst, "ro", positions, resultsr, "bo"), "Network Epoch", "Percent Error", "Neural Network Error", "NN"+add)
예제 #27
0
    def train(network_file, input_length, output_length, training_data_file,
              learning_rate, momentum, stop_on_convergence, epochs, classify):
        n = get_network(network_file)
        if classify:
            ds = ClassificationDataSet(int(input_length),
                                       int(output_length) * 2)
            ds._convertToOneOfMany()
        else:
            ds = SupervisedDataSet(int(input_length), int(output_length))
        training_data = get_training_data(training_data_file)

        NetworkManager.last_training_set_length = 0
        for line in training_data:
            data = [float(x) for x in line.strip().split(',') if x != '']
            input_data = tuple(data[:(int(input_length))])
            output_data = tuple(data[(int(input_length)):])
            ds.addSample(input_data, output_data)
            NetworkManager.last_training_set_length += 1

        t = BackpropTrainer(n,
                            learningrate=learning_rate,
                            momentum=momentum,
                            verbose=True)
        print "training network " + network_storage_path + network_file

        if stop_on_convergence:
            t.trainUntilConvergence(ds, epochs)
        else:
            if classify:
                t.trainOnDataset(ds['class'], epochs)
            else:
                t.trainOnDataset(ds, epochs)

        error = t.testOnData()
        print "training done"
        if not math.isnan(error):
            save_network(n, network_file)
        else:
            print "error occured, network not saved"

        print "network saved"

        return error
예제 #28
0
def montaDatasetConvertido(dadosTemporario):
    """
    função que converte o objeto
    python.datasets.classficication.ClassificationDataSet
    para python.datasets.supervised.SupervisedDataSet

    Será utilizando tanto para o dataset de treino
    quanto para o dataset de teste e validação

    :return: dataset convertindo ao objeto python.datasets.supervised.SupervisedDataSet
    """

    dataset = ClassificationDataSet(4, 1)

    for i in range(dadosTemporario.getLength()):

        dataset.addSample(
            dadosTemporario.getSample(i)[0],
            dadosTemporario.getSample(i)[1])

    return dataset
    def test_ann(self):
        from pybrain.datasets.classification import ClassificationDataSet
        # below line can be replaced with the algorithm of choice e.g.
        # from pybrain.optimization.hillclimber import HillClimber
        from pybrain.optimization.populationbased.ga import GA
        from pybrain.tools.shortcuts import buildNetwork

        # create XOR dataset
        d = ClassificationDataSet(2)
        d.addSample([181, 80], [1])
        d.addSample([177, 70], [1])
        d.addSample([160, 60], [0])
        d.addSample([154, 54], [0])
        d.setField('class', [ [0.],[1.],[1.],[0.]])

        nn = buildNetwork(2, 3, 1)
        # d.evaluateModuleMSE takes nn as its first and only argument
        ga = GA(d.evaluateModuleMSE, nn, minimize=True)
        for i in range(100):
            nn = ga.learn(0)[0]

        print nn.activate([181, 80])
예제 #30
0
    def test_ann(self):
        from pybrain.datasets.classification import ClassificationDataSet
        # below line can be replaced with the algorithm of choice e.g.
        # from pybrain.optimization.hillclimber import HillClimber
        from pybrain.optimization.populationbased.ga import GA
        from pybrain.tools.shortcuts import buildNetwork

        # create XOR dataset
        d = ClassificationDataSet(2)
        d.addSample([181, 80], [1])
        d.addSample([177, 70], [1])
        d.addSample([160, 60], [0])
        d.addSample([154, 54], [0])
        d.setField('class', [[0.], [1.], [1.], [0.]])

        nn = buildNetwork(2, 3, 1)
        # d.evaluateModuleMSE takes nn as its first and only argument
        ga = GA(d.evaluateModuleMSE, nn, minimize=True)
        for i in range(100):
            nn = ga.learn(0)[0]

        print nn.activate([181, 80])
예제 #31
0
def montaDataset():
    """
    Função que monta o dataset dos dados
    temporários do dataset

    :return: dataset montando
    """
    # carregando o dataset do iris
    # pelo sktlearn
    iris = datasets.load_iris()
    dadosEntrada, dadosSaida = iris.data, iris.target

    # criando o dataset da iris onde : terá um array de tamanho 4 como dados de entrada
    # um array de tamanho 1 como dado de saida terá
    # 3 classes para classificar
    dataset = ClassificationDataSet(4, 1, nb_classes=3)

    for i in range(len(dadosEntrada)):

        dataset.addSample(dadosEntrada[i], dadosSaida[i])

    return dataset
예제 #32
0
def mlp():
    mlp = buildNetwork(26, 500, 3456, bias=True, outclass=SoftmaxLayer)
    #print net['in'], net['hidden0'],  net['out']
    ds = import_data()
    #http://stackoverflow.com/questions/27887936/attributeerror-using-pybrain-splitwithportion-object-type-changed
    tstdata_temp, trndata_temp = ds.splitWithProportion(0.25)

    tstdata = ClassificationDataSet(26, 1, nb_classes=3456)
    for n in xrange(0, tstdata_temp.getLength()):
        tstdata.addSample(tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1])

    trndata = ClassificationDataSet(26, 1, nb_classes=3456)
    for n in xrange(0, trndata_temp.getLength()):
        trndata.addSample(trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1])

    trndata._convertToOneOfMany()
    tstdata._convertToOneOfMany()

    print type(trndata['class'])


    print "Number of training patterns: ", len(trndata)
    print "Input and output dimensions: ", trndata.indim, trndata.outdim
    print "First sample (input, target, class):"
    print trndata['input'][0], trndata['target'][0], trndata['class'][0]


    trainer = BackpropTrainer(mlp, trndata, verbose = True, learningrate=0.01)
    trainer.trainUntilConvergence(maxEpochs=1000)

    trnresult = percentError( trainer.testOnClassData(),
                              trndata['class'] )
    tstresult = percentError( trainer.testOnClassData(
           dataset=tstdata ), tstdata['class'] )

    print "epoch: %4d" % trainer.totalepochs, \
          "  train error: %5.2f%%" % trnresult, \
          "  test error: %5.2f%%" % tstresult
예제 #33
0
def import_data(train_file_path='../data/train_trip.csv'):
    dataset = ClassificationDataSet(26, 1, nb_classes=3456)
    train_file = open(train_file_path, "r")

    for line in train_file:
        try:
            datas = json.loads(line)
            data = []
            #CALL_TYPE:         1
            data.append(datas[2])
            #TAXI_ID:           1
            data.append(ord(datas[1].lower()) - ord('a'))
            #time embedding:    4
            for i in datas[3]:
                data.append(int(i))
            #trip:  10*2 =     20
            for i in datas[4]:
                data.append(i[0])
                data.append(i[1])
            dataset.addSample(data, [int(datas[5])])
        except:
            print 'error line:', line
    return dataset
예제 #34
0
hidden_to_out = FullConnection(hiddenLayer,outLayer)

n.addConnection(in_to_hidden)
n.addConnection(hidden_to_out)

n.sortModules()


print 'build set'

alldata = ClassificationDataSet(dim, 1, nb_classes=2)

(data,label,items) = BinReader.readData(ur'F:\AliRecommendHomeworkData\1212新版\train15_17.expand.samp.norm.bin') 
#(train,label,data) = BinReader.readData(r'C:\data\small\norm\train1217.bin')
for i in range(len(data)):
    alldata.addSample(data[i],label[i])

tstdata, trndata = alldata.splitWithProportion(0.25)

trainer = BackpropTrainer(n,trndata,momentum=0.1,verbose=True,weightdecay=0.01)

print 'start'
#trainer.trainEpochs(1)
trainer.trainUntilConvergence(maxEpochs=2)
trnresult = percentError(trainer.testOnClassData(),trndata['class'])

tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class'])

print "epoch: %4d" % trainer.totalepochs, \
        "  train error: %5.2f%%" % trnresult, \
        "  test error: %5.2f%%" % tstresult
예제 #35
0
from sklearn import datasets
from pybrain.datasets.classification import ClassificationDataSet
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer

iris = datasets.load_iris()

x, y = iris.data, iris.target
print(len(x))

dataset = ClassificationDataSet(4, 1, nb_classes=3)

for i in range(len(x)):
    dataset.addSample(x[i], y[i])

train_data, part_data = dataset.splitWithProportion(0.6)

test_data, val_data = part_data.splitWithProportion(0.5)

net = buildNetwork(dataset.indim, 3, dataset.outdim)
trainer = BackpropTrainer(net,
                          dataset=train_data,
                          learningrate=0.01,
                          momentum=0.1,
                          verbose=True)

train_errors, val_errors = trainer.trainUntilConvergence(dataset=train_data,
                                                         maxEpochs=100)

trainer.totalepochs
예제 #36
0
# To do the following you need to run command: pip install pybrain

from pybrain.datasets.classification import ClassificationDataSet
# below line can be replaced with the algorithm of choice e.g.
# from pybrain.optimization.hillclimber import HillClimber
from pybrain.optimization.populationbased.ga import GA
from pybrain.tools.shortcuts import buildNetwork

# create dataset
d = ClassificationDataSet(2)
d.addSample([181, 80], [1])
d.addSample([177, 70], [1])
d.addSample([160, 60], [0])
d.addSample([154, 54], [0])
d.setField('class', [[0.], [1.], [1.], [0.]])

nn = buildNetwork(2, 3, 1)

# d.evaluateModuleMSE takes nn as its first and only argument
ga = GA(d.evaluateModuleMSE, nn, minimize=True)

for i in range(100):
    nn = ga.learn(0)[0]

print(nn.activate([181, 80]))
예제 #37
0
import numpy as np
from sklearn import datasets
from pybrain.datasets.classification import ClassificationDataSet
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
import matplotlib.pyplot as plt

iris = datasets.load_iris()

entrada, saida = iris.data, iris.target

dataset = ClassificationDataSet(4, 1, nb_classes=3)

#Adicionar as amostras ao dataset
for i in range(len(entrada)):
    dataset.addSample(entrada[i], saida[i])

#Recuperar dados para realizar o treinamento da rede
parteTreino, parteDados = dataset.splitWithProportion(0.6)
print("Quantidade para treinamento da rede : " + str(len(parteTreino)))

#Separando a parte de dados para realização do teste e para a validação da rede
teste, validacao = parteDados.splitWithProportion(0.5)
print("Quantidade para teste da rede : " + str(len(teste)))
print("Quantidade para validação da rede : " + str(len(validacao)))

#Criando a rede
rede = buildNetwork(dataset.indim, 3, dataset.outdim)

#Realizando o treinamento e recuperando os erros
treinamento = BackpropTrainer(rede,
예제 #38
0
    storageList = []
    classification = 100
    for i in line.split(','):
        if (i == 'live' or i == 'die'):
            if i == 'live':
                classification = 1
            else:
                classification = 0
        elif (i == 'True'):
            storageList.append(1)
        elif (i == 'False'):
            storageList.append(0)
        else:
            storageList.append(i)

    d.addSample(storageList, [classification])
    print storageList

# create dataset
'''
d.addSample([181, 80], [1])
d.addSample([177, 70], [1])
d.addSample([160, 60], [0])
d.addSample([154, 54], [0])
'''

d.setField('class', [[0.], [1.], [1.], [0.]])

nn = buildNetwork(2, 3, 1)

# d.evaluateModuleMSE takes nn as its first and only argument
#! /usr/bin/env python3

import matplotlib.pyplot as plt

from sklearn import datasets
from pybrain.datasets.classification import ClassificationDataSet
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised import BackpropTrainer

iris = datasets.load_iris()
X, y = iris.data, iris.target
dataset = ClassificationDataSet(4, 1, nb_classes=3)

for sample_input, sample_output in zip(X, y):
    dataset.addSample(sample_input, sample_output)

# Partitioning data for training
training_data, partitioned_data = dataset.splitWithProportion(0.6)

# Spliting data for testing and validation
testing_data, validation_data, = partitioned_data.splitWithProportion(0.5)

network = buildNetwork(dataset.indim, 2, 2, dataset.outdim)
trainer = BackpropTrainer(network,
                          dataset=training_data,
                          learningrate=0.01,
                          momentum=0.1,
                          verbose=True)

training_errors, validation_errors = trainer.trainUntilConvergence(
    dataset=training_data, maxEpochs=200)
예제 #40
0
from sklearn import datasets
from pybrain.datasets.classification import ClassificationDataSet
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer

iris = datasets.load_iris()
x, y = iris.data, iris.target
dataset = ClassificationDataSet(4, 1, nb_classes=3)

for i in range(len(x)):
    dataset.addSample(x[i], y[i])

train_data_temp, part_data_temp = dataset.splitWithProportion(0.6)
test_data_temp, val_data_temp = part_data_temp.splitWithProportion(0.5)

train_data = ClassificationDataSet(4, 1, nb_classes=3)
for n in range(train_data_temp.getLength()):
    train_data.addSample(
        train_data_temp.getSample(n)[0],
        train_data_temp.getSample(n)[1])

test_data = ClassificationDataSet(4, 1, nb_classes=3)
for n in range(test_data_temp.getLength()):
    train_data.addSample(
        test_data_temp.getSample(n)[0],
        test_data_temp.getSample(n)[1])

val_data = ClassificationDataSet(4, 1, nb_classes=3)
for n in range(val_data_temp.getLength()):
    val_data.addSample(
        val_data_temp.getSample(n)[0],
예제 #41
0
from sklearn import datasets

iris = datasets.load_iris()
X, y = iris.data, iris.target

from pybrain.datasets.classification import ClassificationDataSet
from pybrain.utilities import percentError
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer
#import numpy as np
import matplotlib.pyplot as pl

ds = ClassificationDataSet(4, 1, nb_classes=3)
for i in range(len(X)):
    ds.addSample(X[i], y[i])

# splitting data into train,test and valid data in 60/20/20 proportions
trndata, partdata = ds.splitWithProportion(0.60)
tstdata, validdata = partdata.splitWithProportion(0.50)

# to encode classes wwith one output neuron per class
trndata._convertToOneOfMany()
tstdata._convertToOneOfMany()
validdata._convertToOneOfMany()

# original target values are stored in class created by function to
#preserve the value
print trndata['class']
# new values of target after convertion
print trndata['target']
# To do the following you need to run command: pip install pybrain

from pybrain.datasets.classification import ClassificationDataSet
# below line can be replaced with the algorithm of choice e.g.
# from pybrain.optimization.hillclimber import HillClimber
from pybrain.optimization.populationbased.ga import GA
from pybrain.tools.shortcuts import buildNetwork

# create dataset
d = ClassificationDataSet(2)
d.addSample([181, 80], [1])
d.addSample([177, 70], [1])
d.addSample([160, 60], [0])
d.addSample([154, 54], [0])
d.setField('class', [ [0.],[1.],[1.],[0.]])

nn = buildNetwork(2, 3, 1)

# d.evaluateModuleMSE takes nn as its first and only argument
ga = GA(d.evaluateModuleMSE, nn, minimize=True)

for i in range(100):
    nn = ga.learn(0)[0]

print nn.activate([181, 80])
예제 #43
0
__author__ = 'QSG'
from pybrain.datasets.classification import ClassificationDataSet
from pybrain.optimization.populationbased.ga import GA
from pybrain.tools.shortcuts import buildNetwork

d = ClassificationDataSet(3)
d.addSample([0, 0, 0], [0.])
d.addSample([0, 1, 0], [1.])
d.addSample([1, 0, 0], [1.])
d.addSample([1, 1, 0], [0.])
d.setField('class', [[0.], [1.], [1.], [0.]])

nn = buildNetwork(3, 3, 1)

print nn.activate([0, 1, 1])
ga = GA(d.evaluateModuleMSE, nn, minimize=True)
for i in range(100):
    nn = ga.learn(0)[0]

print nn.activate([0, 1, 1])[0]

# print nn
예제 #44
0
    from pybrain.datasets.classification import ClassificationDataSet
    from pybrain.optimization.populationbased.ga import GA
    from pybrain.tools.shortcuts import buildNetwork

    # create XOR dataset
    d = ClassificationDataSet(2)
    d.addSample([0., 0.], [0.])
    d.addSample([0., 1.], [1.])
    d.addSample([1., 0.], [1.])
    d.addSample([1., 1.], [0.])
    d.setField('class', [ [0.],[1.],[1.],[0.]])

    nn = buildNetwork(2, 3, 1)
    ga = GA(d.evaluateModuleMSE, nn, minimize=True)
    for i in range(100):
        nn = ga.learn(0)[0]

    # test results after the above script
    In [68]: nn.activate([0,0])
    Out[68]: array([-0.07944574])

    In [69]: nn.activate([1,0])
    Out[69]: array([ 0.97635635])

    In [70]: nn.activate([0,1])
    Out[70]: array([ 1.0216745])

    In [71]: nn.activate([1,1])
    Out[71]: array([ 0.03604205])
예제 #45
0
                                    target=self['target'][rightIndicies].copy())
        return leftDs, rightDs

irisData = datasets.load_iris()
dataFeatures = irisData.data
dataTargets = irisData.target

#plt.matshow(irisData.images[11], cmap=cm.Greys_r)
#plt.show()
#print dataTargets[11]
#print dataFeatures.shape

dataSet = ClassificationDataSet(4, 1 , nb_classes=3)

for i in range(len(dataFeatures)):
	dataSet.addSample(np.ravel(dataFeatures[i]), dataTargets[i])
	
trainingData, testData = splitWithProportion(dataSet,0.7)

trainingData._convertToOneOfMany()
testData._convertToOneOfMany()

neuralNetwork = buildNetwork(trainingData.indim, 7, trainingData.outdim, outclass=SoftmaxLayer) 
trainer = BackpropTrainer(neuralNetwork, dataset=trainingData, momentum=0.01, learningrate=0.05, verbose=True)

trainer.trainEpochs(10000)
print('Error (test dataset): ' , percentError(trainer.testOnClassData(dataset=testData), testData['class']))

print('\n\n')
counter = 0
for input in dataFeatures:
예제 #46
0
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure import SoftmaxLayer
from pybrain.tools.customxml.networkwriter import NetworkWriter
from pybrain.tools.customxml.networkreader import NetworkReader
import os

# Downloading Dataset
olivetti = datasets.fetch_olivetti_faces()
oData, oTarget = olivetti.data, olivetti.target

# Initializing Dataset
dataset = ClassificationDataSet(4096, 1, nb_classes=40)

for i in range(len(oData)):
	dataset.addSample(ravel(oData[i]), oTarget[i])

# Splitting dataset for 75% training data and 25% test data
testData, trainingData = dataset.splitWithProportion(0.25)

trainingData._convertToOneOfMany()
testData._convertToOneOfMany()

# Neural Network Construction
# Load previous training if it exists
if os.path.isfile('oliv.xml'):
	print('Loading Previous Training Data...')
	fnn = NetworkReader.readFrom('oliv.xml')
	print('Training Data Loaded!\n')
# Build fresh network if training does not exist
else:
예제 #47
0
파일: NNEye.py 프로젝트: vlall/EyeTrack
import EyeObject
from pybrain.datasets.classification import ClassificationDataSet
# below line can be replaced with the algorithm of choice e.g.
# from pybrain.optimization.hillclimber import HillClimber
from pybrain.optimization.populationbased.ga import GA
from pybrain.tools.shortcuts import buildNetwork
 
# create XOR dataset
d = ClassificationDataSet(113)
EyeTrack = EyeObject.ReadExcel("new")
EyeTrack.format_Array()
outter = EyeTrack.get_Outter()
outterLen = len(EyeTrack.get_Outter())

for i in range (outterLen-1):
	d.addSample(outter[i][0:113],outter[i][-1])

	#d.addSample([0., 0.], [0.])

#d.setField('class', [ [1],[2],[3],[4] [5]] )
 
nn = buildNetwork(113, 60, 1)
# d.evaluateModuleMSE takes nn as its first and only argument
ga = GA(d.evaluateModuleMSE, nn, minimize=True)
for i in range(100):
    nn = ga.learn(0)[0]
       
print round(nn.activate([148.8, 924.1, 161.0, 505.7, 667.3, 175.0, 553.7, 561.9, 219.0, 880.4, 1056.5, 57.0, 806.7, 459.4, 67.0, 466.2, 450.2, 401.0, 705.5, 456.9, 230.0, 391.2, 461.7, 525.0, 415.8, 469.9, 283.0, 750.1, 465.7, 262.0, 843.5, 466.9, 460.0, 609.0, 495.7, 320.0, 666.8, 1065.6, 50.0, 637.1, 617.4, 111.0, 466.5, 465.1, 186.0, 422.4, 447.6, 354.0, 473.5, 424.1, 505.0, 594.4, 428.2, 246.0, 674.1, 433.5, 546.0, 578.3, 455.3, 143.0, 402.9, 485.4, 2087.0, 546.4, 498.3, 101.0, 626.2, 829.5, 62.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]))
    numdata[i][12] = unidict[numdata[i][12].strip()]
fobj = open('02 select_data_num.csv', 'wb')
[(fobj.write(item), fobj.write(',')) for item in header]
fobj.write('\n')
[([(fobj.write(str(it).replace(',', ' ')), fobj.write(','))
   for it in item], fobj.write('\n')) for item in numdata]
fobj.close()

npdata = np.array(numdata, dtype=np.float)
npdata[:, 2:] = preprocessing.scale(npdata[:, 2:])
numdata = copy.deepcopy(npdata)

net = buildNetwork(14, 14, 1, bias=True, outclass=SoftmaxLayer)
ds = ClassificationDataSet(14, 1, nb_classes=2)
for item in numdata:
    ds.addSample(tuple(item[2:]), (item[1]))
dsTrain, dsTest = ds.splitWithProportion(0.8)

print('Trainging')
trainer = BackpropTrainer(net,
                          ds,
                          momentum=0.1,
                          verbose=True,
                          weightdecay=0.01)
# trainer.train()
trainer.trainUntilConvergence(maxEpochs=20)
print('Finish training')

Traininp = dsTrain['input']
Traintar = dsTrain['target']
Testinp = dsTest['input']
예제 #49
0
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SigmoidLayer

import src.dataloaders as d
from src.utils2 import c


D = d.testset()

a = range(D.shape[0])
random.shuffle(a)

num_train_rows = 10000
num_test_rows = 5000

tr_rows = a[:num_train_rows]
ts_rows = a[num_train_rows : (num_train_rows + num_test_rows)]

features = ["V11", "sdE5", "E9"]

X = D[tr_rows, c(*features)]
Y = D[tr_rows, c("IsAlert")]
Xt = D[ts_rows, c(*features)]
Yt = D[ts_rows, c("IsAlert")]

nn = buildNetwork(3, 3, 1, outclass=SigmoidLayer)
ds = ClassificationDataSet(3, 1)
for i, row in enumerate(X):
    ds.addSample(row, Y[i])
trainer = BackpropTrainer(nn, ds)
from pybrain.datasets.classification import ClassificationDataSet
# below line can be replaced with the algorithm of choice e.g.
# from pybrain.optimization.hillclimber import HillClimber
from pybrain.optimization.populationbased.ga import GA
from pybrain.tools.shortcuts import buildNetwork

# create XOR dataset
d = ClassificationDataSet(2)
d.addSample([0., 0.], [0.])
d.addSample([0., 1.], [1.])
d.addSample([1., 0.], [1.])
d.addSample([1., 1.], [0.])
# d.setField('class', [ [0.],[1.],[1.],[0.]])

nn = buildNetwork(2, 3, 5, 9, 5, 3, 1)
# d.evaluateModuleMSE takes nn as its first and only argument
ga = GA(d.evaluateModuleMSE, nn, minimize=True)
for i in range(500):
    nn = ga.learn(0)[0]

print nn.activate([0,0])
print nn.activate([1,0])
print nn.activate([0,1])
print nn.activate([1,1])
예제 #51
0
def compare_l2_regularization():
    train_features, train_labels, test_features, test_labels = get_breast_cancer_data(
    )
    optimal_num_layers = 6
    num_neurons = [optimal_num_layers * [16]]
    start_time = datetime.now()
    train_accuracy1 = []
    test_accuracy1 = []
    train_accuracy2 = []
    test_accuracy2 = []
    iterations = range(250)
    nn1 = buildNetwork(30, 16, 1, bias=True)
    nn2 = buildNetwork(30, 16, 1, bias=True)
    dataset = ClassificationDataSet(len(train_features[0]),
                                    len(train_labels[0]),
                                    class_labels=["1", "2"])

    for instance in range(len(train_features)):
        dataset.addSample(train_features[instance], train_labels[instance])

    trainer1 = BackpropTrainer(nn1, dataset, weightdecay=0.0001)
    validator1 = CrossValidator(trainer1, dataset)
    print(validator1.validate())

    trainer2 = BackpropTrainer(nn2, dataset, weightdecay=0.001)
    validator2 = CrossValidator(trainer2, dataset)
    print(validator2.validate())

    for iteration in iterations:
        train_accuracy1.append(
            sum((np.array(
                [np.round(nn1.activate(test))
                 for test in train_features]) - train_labels)**2) /
            float(len(train_labels)))
        test_accuracy1.append(
            sum((np.array(
                [np.round(nn1.activate(test))
                 for test in test_features]) - test_labels)**2) /
            float(len(test_labels)))
        train_accuracy2.append(
            sum((np.array(
                [np.round(nn2.activate(test))
                 for test in train_features]) - train_labels)**2) /
            float(len(train_labels)))
        test_accuracy2.append(
            sum((np.array(
                [np.round(nn2.activate(test))
                 for test in test_features]) - test_labels)**2) /
            float(len(test_labels)))

    plt.plot(iterations, train_accuracy1)
    plt.plot(iterations, test_accuracy1)
    plt.plot(iterations, train_accuracy2)
    plt.plot(iterations, test_accuracy2)
    plt.legend([
        "Train Accuracy (0.0001)", "Test Accuracy (0.0001)",
        "Train Accuracy (0.001)", "Test Accuracy (0.001"
    ])
    plt.xlabel("Num Epoch")
    plt.ylabel("Percent Error")
    plt.title("Neural Network on Breast Cancer Data with " + str(num_neurons) +
              " layers")
    plt.savefig("nn_breast_cancer_weight_decay.png")