コード例 #1
0
ファイル: neural_net.py プロジェクト: Sieru/Terrier
 def crossValidation(self, filename):
     trainer = BackpropTrainer(self.net)
     crossValidator = CrossValidator(trainer,
                                     self.createDataSetFromFile(filename),
                                     n_folds=10)
     result = crossValidator.validate()
     print result * 100, "%"
コード例 #2
0
ファイル: bank.py プロジェクト: mmanguno/machine-learning
def nnTest(tx, ty, rx, ry, iterations):
    print "NN start"
    print strftime("%a, %d %b %Y %H:%M:%S", localtime())

    resultst = []
    resultsr = []
    positions = range(iterations)
    network = buildNetwork(16, 16, 1, bias=True)
    ds = ClassificationDataSet(16, 1, class_labels=["1", "0"])
    for i in xrange(len(tx)):
        ds.addSample(tx[i], [ty[i]])
    trainer = BackpropTrainer(network, ds, learningrate=0.05)
    validator = CrossValidator(trainer, ds, n_folds=10)
    print validator.validate()
    for i in positions:
        print trainer.train()
        resultst.append(sum((np.array([round(network.activate(test)) for test in tx]) - ty)**2)/float(len(ty)))
        resultsr.append(sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2)/float(len(ry)))
        print i, resultst[i], resultsr[i]
    plt.plot(positions, resultst, 'g-', positions, resultsr, 'r-')
    plt.axis([0, iterations, 0, 1])
    plt.ylabel("Percent Error")
    plt.xlabel("Network Epoch")
    plt.title("Neural Network Error")
    plt.savefig('nn.png', dpi=500)
    print "NN end"
    print strftime("%a, %d %b %Y %H:%M:%S", localtime())
コード例 #3
0
ファイル: hills.py プロジェクト: iRapha/Machine-Learning
def cvnntester(tx, ty, rx, ry, iterations, folds):
    network = buildNetwork(100, 50, 1, bias=True)
    ds = ClassificationDataSet(100,1, class_labels=["valley", "hill"])
    for i in xrange(len(tx)):
        ds.addSample(tx[i], [ty[i]])
    trainer = BackpropTrainer(network, ds, learningrate=0.005)
    cv = CrossValidator(trainer, ds, n_folds=folds, max_epochs=iterations, verbosity=True)
    print cv.validate()
    print sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2)/float(len(ry))
コード例 #4
0
ファイル: classifier.py プロジェクト: DajeRoma/clicc-flask
 def CrossValidation(self,n_fold=5,num_neuron=50):  
     data_set_this=self.data_set
     data_set_this._convertToOneOfMany()
     print "Training with number of neuron :",num_neuron
     network_this=buildNetwork(data_set_this.indim,num_neuron,data_set_this.outdim,bias=True,hiddenclass=SigmoidLayer)  
     trainer_this=BackpropTrainer(network_this,dataset=data_set_this,learningrate=0.001,momentum=0,verbose=True,weightdecay=0.1)
     CV=CrossValidator(trainer_this,data_set_this,num_neuron,n_folds=n_fold,max_epochs=3)
     perf_this=CV.validate()
     print "The performance of this network with CV is: ", perf_this
コード例 #5
0
ファイル: neural_net.py プロジェクト: marsjoy/wesandersone
 def cross_validate(self, dataset=None):
     trainer = BackpropTrainer(self.neural_network,
                               dataset=dataset,
                               momentum=0.1,
                               verbose=True,
                               weightdecay=0.01)
     validator = CrossValidator(trainer=trainer,
                                dataset=dataset,
                                n_folds=10)
     mean_validation_result = validator.validate()
     self.cross_validation_result = mean_validation_result
     logger.info('cross val result: {result}'.format(
         result=self.cross_validation_result))
コード例 #6
0
def cvnntester(tx, ty, rx, ry, iterations, folds):
    network = buildNetwork(100, 50, 1, bias=True)
    ds = ClassificationDataSet(100, 1, class_labels=["valley", "hill"])
    for i in xrange(len(tx)):
        ds.addSample(tx[i], [ty[i]])
    trainer = BackpropTrainer(network, ds, learningrate=0.005)
    cv = CrossValidator(trainer,
                        ds,
                        n_folds=folds,
                        max_epochs=iterations,
                        verbosity=True)
    print cv.validate()
    print sum((np.array([round(network.activate(test))
                         for test in rx]) - ry)**2) / float(len(ry))
コード例 #7
0
ファイル: gridsearch.py プロジェクト: xj361685640/molSimplify
 def _validate(self, params):
     """ The overridden validate function, that uses cross-validation in order
         to determine the params' performance value.
     """
     trainer = self._getTrainerForParams(params)
     return CrossValidator(trainer, self._dataset, self._n_folds,
                           **self._validator_kwargs).validate()
コード例 #8
0
    def _validate(self, params):
        """ See GridSearchCostGamma """
        glob_idx = tuple(params)
        perf = self._performances

        if glob_idx not in perf:
            trainer = self._getTrainerForParams(params)
            local_perf = CrossValidator(trainer, self._dataset, self._n_folds, **self._validator_kwargs).validate()
            perf[glob_idx] = local_perf
        else:
            local_perf = perf[glob_idx]
        return local_perf
コード例 #9
0
ファイル: rep.py プロジェクト: kevcal69/thesis
def vali():
    from pybrain.tools.validation import ModuleValidator
    from pybrain.tools.validation import CrossValidator
    with open('new_data1.txt') as data_file:
        data = json.load(data_file)
    m = [d[0] for d in data]
    case = [min([a for a, s, d in m]), float(max([a for a, s, d in m])-min([a for a, s, d in m]))]
    week = [min([s for a, s, d in m]), float(max([s for a, s, d in m])-min([s for a, s, d in m]))]
    grid = [min([d for a, s, d in m]), float(max([d for a, s, d in m])-min([d for a, s, d in m]))]
    ds = SupervisedDataSet(3, 1)
    import random
    random.shuffle(data)
    print len(data)
    for i in xrange(0, len(data)):
        # print "Adding {}th data sample".format(i),
        x1 = float(data[i][0][0] - case[0])/case[1]
        x2 = float(data[i][0][1] - week[0])/week[1]
        x3 = float(data[i][0][2] - grid[0])/grid[1]
        input = (x1, x2, x3)
        output = data[i][1]
        ds.addSample(input, output)
        # print ":: Done"

    print "Train"
    net = buildNetwork(3, 3, 1, bias=True)
    tstdata, trndata = ds.splitWithProportion( 0.33 )
    trainer = BackpropTrainer(net, trndata)
    mse = []
    modval = ModuleValidator()
    for i in range(100):
        trainer.trainEpochs(1)
        trainer.trainOnDataset(dataset=trndata)
        cv = CrossValidator(trainer, trndata, n_folds=10, valfunc=modval.MSE)
        mse_val = cv.validate()
        print "MSE %f @ %i" % (mse_val, i)
        mse.append(mse_val)

    with open('cross_validation.json', 'w') as outfile:
            json.dump(mse, outfile, indent=4)
コード例 #10
0
ファイル: classifier.py プロジェクト: DajeRoma/clicc-flask
    def CV_best_struct(self,n_fold=5):  

        data_set_this = self.data_set
        
        perf=[]
        for num_neuron in np.arange(200,4000,500):
            print "Training with number of neuron :",num_neuron
            
            network_this=buildNetwork(data_set_this.indim,num_neuron,data_set_this.outdim,bias=True,hiddenclass=SigmoidLayer,outclass=SoftmaxLayer)  
            trainer_this=BackpropTrainer(network_this,dataset=data_set_this,learningrate=0.001,momentum=0,verbose=False,weightdecay=0.1)
        
            '''here, the data set should be raw data instead of pca data'''
            '''do pca after data spliting '''
            CV=CrossValidator(trainer_this,data_set_this,num_neuron,n_folds=n_fold,max_epochs=3)
            perf_this=CV.validate()
            
            perf.append(perf_this)
            print "The performance of this network with CV is: ", perf_this
        
        print "All performance: ", perf
        output=open("CV_results_200to4000.csv",'wb')
        filewriter=csv.writer(output)
        filewriter.writerow(perf)
コード例 #11
0
 def train(self, args):
     if(self.data.ds == None):
         print("Can't train without loaded data")
         return
     if(args != [] and len(args) >= 2):
         self.net.epochs = int(args[1])
     if(self.net.trainingType == "gradient"):
         if(self.trainer == None):
             self.trainer, self.returnsNet = self.__getGradientTrainer();
         self.__train(self.trainer.trainEpochs, self.returnsNet)
     elif(self.net.trainingType == "optimization"):
         if(self.trainer == None):
             self.trainer, self.returnsNet = self.__getOptimizationTrainer();
         self.__train(self.trainer.learn, self.returnsNet)
         return
     elif(self.trainingType == "crossval"):
         if(self.trainer == None):
             self.trainer, self.returnsNet = self.__getGradientTrainer();
         evaluation = ModuleValidator.classificationPerformance(self.trainer.module, self.data.ds)
         validator = CrossValidator(trainer=self.trainer, dataset=self.trainer.ds, n_folds=5, valfunc=evaluation, verbose=True, max_epochs=1)
         print(validator.validate())
     else:
         raise Exception("Cannot create trainer, no network type specified" + self.trainingType)
コード例 #12
0
    'o': 1,
    'b': 2,
}


def row_preprocess(row):
    return [translation[x] for x in row]


if __name__ == "__main__":
    raw_data = list(csv.reader(open("tic-tac-toe.data")))
    targets = [1 if x[-1] == 'positive' else 0 for x in raw_data]
    inputs = [row_preprocess(x[:-1]) for x in raw_data]
    alldata = ClassificationDataSet(
        9, class_labels=['negative', 'positive'])

    for (i, t) in zip(inputs, targets):
        alldata.addSample(i, [t])

    network = buildNetwork(9, int(sys.argv[1]), 1,
                           hiddenclass=SigmoidLayer,
                           outclass=LinearLayer)

    trainer = BackpropTrainer(network, weightdecay=0.001)

    validator = CrossValidator(trainer, alldata, n_folds=2,
                               valfunc=ModuleValidator.MSE)
    validator.setArgs(max_epochs=500)
    ret = validator.validate()
    print(ret)
コード例 #13
0
ファイル: main.py プロジェクト: yegle/homework-code-at-uga
from pybrain.tools.validation import CrossValidator, ModuleValidator

translation = {"x": 0, "o": 1, "b": 2}


def row_preprocess(row):
    return [translation[x] for x in row]


if __name__ == "__main__":
    raw_data = list(csv.reader(open("tic-tac-toe.data")))
    targets = [1 if x[-1] == "positive" else 0 for x in raw_data]
    inputs = [row_preprocess(x[:-1]) for x in raw_data]
    alldata = ClassificationDataSet(9, class_labels=["negative", "positive"])

    for (i, t) in zip(inputs, targets):
        alldata.addSample(i, [t])

    network = buildNetwork(9, 3, 1, hiddenclass=SigmoidLayer, outclass=LinearLayer)

    # validation_data, training_data = alldata.splitWithProportion(0.25)

    trainer = BackpropTrainer(network, verbose=True, weightdecay=0.001, learningrate=0.1)
    trainer.setData(alldata)

    trainer.trainUntilConvergence(maxEpochs=6000)

    validator = CrossValidator(trainer, alldata, n_folds=10, valfunc=ModuleValidator.MSE)
    ret = validator.validate()
    print(ret)
コード例 #14
0
ファイル: neural_net.py プロジェクト: chprice/Terrier
 def crossValidation(self, filename):
     trainer = BackpropTrainer(self.net)
     crossValidator = CrossValidator(trainer, self.createDataSetFromFile(filename), n_folds=10)
     result = crossValidator.validate()
     print result*100, "%"
コード例 #15
0
ファイル: neural_net.py プロジェクト: marsjoy/wesandersone
 def cross_validate(self, dataset=None):
     trainer = BackpropTrainer(self.neural_network, dataset=dataset, momentum=0.1, verbose=True, weightdecay=0.01)
     validator = CrossValidator(trainer=trainer, dataset=dataset, n_folds=10)
     mean_validation_result = validator.validate()
     self.cross_validation_result = mean_validation_result
     logger.info('cross val result: {result}'.format(result=self.cross_validation_result))
コード例 #16
0
alldata.addSample([-1, -1], [0])
alldata.addSample([-1, -1], [0])
alldata.addSample([-1, -1], [0])
alldata.addSample([-1, -1], [0])

alldata.addSample([1, 1], [1])
alldata.addSample([1, 1], [1])
alldata.addSample([1, 1], [1])
alldata.addSample([1, 1], [1])
alldata.addSample([1, 1], [1])

tstdata, trndata = alldata.splitWithProportion(0.25)
trndata._convertToOneOfMany()
tstdata._convertToOneOfMany()

# We can also examine the dataset
print "Number of training patterns: ", len(trndata)
print "Input and output dimensions: ", trndata.indim, trndata.outdim
print "First sample (input, target, class):"
print trndata['input'][0], trndata['target'][0], trndata['class'][0]

fnn     = buildNetwork( trndata.indim, 5, trndata.outdim, recurrent=False )
trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01 )

# I am not sure about this, I don't think my production code is implemented like this
modval = ModuleValidator()
trainer.trainEpochs(20)
trainer.trainOnDataset(dataset=trndata)
cv = CrossValidator( trainer, trndata, n_folds=5, valfunc=modval.MSE )
print "MSE %f" %( cv.validate() )
コード例 #17
0
ファイル: creditmlp.py プロジェクト: vinaych7/bitsandbytes
import pylab, numpy
from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure import TanhLayer
from pybrain.datasets import SupervisedDataSet
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.tools.validation import CrossValidator, ModuleValidator

results = pylab.loadtxt('credit.txt')
target = results[:, -1]
data = numpy.delete(results, -1, 1)

#print "data", tuple(data[0])
#print "target", (target[0],)

#net = buildNetwork(14, 10, 1)
net = buildNetwork(14, 10, 1, hiddenclass=TanhLayer)
#print net.activate([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])

ds = SupervisedDataSet(14, 1)

for i in range(len(data)):
    ds.addSample(tuple(data[i]), (target[i], ))

trainer = BackpropTrainer(net, ds)
evaluation = ModuleValidator()
validator = CrossValidator(trainer=trainer,
                           dataset=trainer.ds,
                           n_folds=5,
                           valfunc=evaluation.MSE)
print(validator.validate())
コード例 #18
0
ファイル: ex_neural_net.py プロジェクト: Tskatom/Finance
 
alldata.addSample([1,1],[1])
alldata.addSample([1,1],[1])
alldata.addSample([1,1],[1])
alldata.addSample([1,1],[1])
alldata.addSample([1,1],[1])

tstdata, trndata = alldata.splitWithProportion( 0.25 )
trndata._convertToOneOfMany( )
tstdata._convertToOneOfMany( )
 
#We can also examine the dataset
print "Number of training patterns: ", len(trndata)
print "Input and output dimensions: ", trndata.indim, trndata.outdim
print "First sample (input, target, class):"
print trndata['input'][0], trndata['target'][0], trndata['class'][0]

fnn     = buildNetwork( trndata.indim, 5, trndata.outdim, recurrent=False )
trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01 )

# I am not sure about this, I don't think my production code is implemented like this
modval = ModuleValidator()
for i in range(1000):
      trainer.trainEpochs(1)
      trainer.trainOnDataset(dataset=trndata)
      cv = CrossValidator( trainer, trndata, n_folds=5, valfunc=modval.MSE )
      print "MSE %f @ %i" %( cv.validate(), i )

print tstdata
print ">", trainer.testOnClassData(dataset=tstdata)
コード例 #19
0
def get_pybrain_data_set(data, input_cols, target_cols=1):
    data_set = SupervisedDataSet(input_cols, target_cols)
    for row in data:
        # get first X columns for inputs
        # (technically "all indices less than X")
        # get last column as target
        data_set.addSample(tuple(row[:input_cols]), tuple([row[input_cols]]))

    return data_set


# normalize all values
p['wti_var'] = normalize(p['wti_var'])
p['wti_skew'] = normalize(p['wti_skew'])
p['wti_curt'] = normalize(p['wti_curt'])
p['i_entr'] = normalize(p['i_entr'])

# shuffle up data
p.reindex(np.random.permutation(p.index))

trainer = BackpropTrainer(
    buildNetwork(4, 5, 1),  # 2 input nodes, 3 hidden nodes, 1 output node
    get_pybrain_data_set(p.as_matrix(), 4),
    verbose=True)

#print(trainer.train())
#evaluator = ModuleValidator.classificationPerformance(trainer.module, trainer.ds)
cv = CrossValidator(trainer, trainer.ds, n_folds=5)
cv.setArgs(max_epochs=2, verbose=True)
print(cv.validate())
コード例 #20
0
    trndata.addSample( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1] )

trndata._convertToOneOfMany( )
tstdata._convertToOneOfMany( )

fnn = buildNetwork( trndata.indim,120,trndata.outdim, outclass=SoftmaxLayer )
trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.00001)
modval = ModuleValidator()

# We define the number of iterations we want to train our model.
for i in range(100):
	trainer.trainEpochs(1)
	trnresult = percentError(trainer.testOnClassData(dataset=trndata),trndata['class'])
	print "epoch : " , trainer.totalepochs," train error: " , trnresult

# We validate our model by applying the n-folds technique and check the Mean Square Error
cv = CrossValidator( trainer, trndata, n_folds=5, valfunc=modval.MSE )
print "MSE %f at loop %i"%(cv.validate(),i)

# Finally we test our data on the model we built
perror = percentError(trainer.testOnClassData(dataset=tstdata),tstdata['class'])
print " Percent error on test data is - ",100.0 - perror

# We also take a dump of the model and the top features
fileObject2 = open('ANNDUMP', 'wb')
pickle.dump(fnn, fileObject2)
fileObject2.close()

fObject = open('VOCABDUMP','wb')
pickle.dump(vocab, fObject)
fObject.close()
コード例 #21
0
    language, num = g.split("/")[-1].split("_")
    languages.append(Language(io.open(g, "r+"), language))

n = Network(languages)
n.train()
n.trainer.verbose = True
n.trainer.trainUntilConvergence()


def correctValFunc(output, target):
    assert len(output) == len(target)

    n_correct = 0

    for idx, instance in enumerate(output):
        # This will find the maximum liklihood language
        classification = instance.argmax(axis=0)
        objective = target[idx].argmax(axis=0)
        if objective == classification:
            n_correct += 1

    return 1 - (float(n_correct) / float(len(output)))


def correct(output, target):
    return ModuleValidator.validate(correctValFunc, output, target)


cv = CrossValidator(n.trainer, n.dataSet, valfunc=correct, n_folds=2)
print cv.validate()
コード例 #22
0
ファイル: nn.py プロジェクト: peterlebrun/ml_project1
data_set = common.get_bc_data_for_nn()

test_means = []
test_std   = []
x_vals = [2, 3, 4, 5, 6, 7, 8]

for x in x_vals:
    means = []
    for i in range(20):
        trainer = BackpropTrainer(
                    buildNetwork(3, x, 1),
                    data_set,
                    verbose=True
        )

        print "%d %d" % (x, i)
        trainer.trainEpochs(3)
        cv = CrossValidator(trainer, trainer.ds, n_folds=5, valfunc=ModuleValidator.MSE)
        means.append(cv.validate())

    test_means.append(np.mean(means))
    test_std.append(np.std(means))

common.plot_nn_mse(
    title   = "Breast Cancer Survival Neural Network",
    x_label = "Number of hidden nodes",
    x_vals  = x_vals,
    y_means = np.array(test_means),
    y_std   = np.array(test_std)
)
コード例 #23
0
languages = []

for g in glob.glob("./data/*.txt"):
  language, num = g.split("/")[-1].split("_")
  languages.append(Language(io.open(g, 'r+'), language))

n = Network(languages)
n.train()
n.trainer.verbose = True
n.trainer.trainUntilConvergence()

def correctValFunc(output, target):
  assert len(output) == len(target)

  n_correct = 0

  for idx, instance in enumerate(output):
    # This will find the maximum liklihood language
    classification = instance.argmax(axis=0)
    objective = target[idx].argmax(axis=0)
    if objective == classification:
      n_correct += 1

  return 1 - (float(n_correct) / float(len(output)))

def correct(output, target):
  return ModuleValidator.validate(correctValFunc, output, target)

cv = CrossValidator(n.trainer, n.dataSet, valfunc=correct, n_folds=2)
print cv.validate()
コード例 #24
0
#将类别转化为5位
dsTrain_test._convertToOneOfMany(bounds=[0, 1])
dsTest_test._convertToOneOfMany(bounds=[0, 1])

#print dsTrain_test['target']

#划分训练集跟测试集
dsTrain,dsTest = dsBuild(data)

#训练神经网络
netModel = netBuild(dsTrain_test)

modval = ModuleValidator()
netModel.trainEpochs(20)
netModel.trainUntilConvergence(maxEpochs=1000)
cv = CrossValidator(netModel, dsTrain_test, n_folds=5, valfunc=modval.MSE )
print "MSE %f" %( cv.validate() )


from sklearn.externals import joblib
joblib.dump(netModel, "train_model.m")
netModel =joblib.load("train_model.m")


#f1值检验

pred=[]
really =[]
yuanma = []
calma = []
コード例 #25
0
def compare_l2_regularization():
    train_features, train_labels, test_features, test_labels = get_breast_cancer_data(
    )
    optimal_num_layers = 6
    num_neurons = [optimal_num_layers * [16]]
    start_time = datetime.now()
    train_accuracy1 = []
    test_accuracy1 = []
    train_accuracy2 = []
    test_accuracy2 = []
    iterations = range(250)
    nn1 = buildNetwork(30, 16, 1, bias=True)
    nn2 = buildNetwork(30, 16, 1, bias=True)
    dataset = ClassificationDataSet(len(train_features[0]),
                                    len(train_labels[0]),
                                    class_labels=["1", "2"])

    for instance in range(len(train_features)):
        dataset.addSample(train_features[instance], train_labels[instance])

    trainer1 = BackpropTrainer(nn1, dataset, weightdecay=0.0001)
    validator1 = CrossValidator(trainer1, dataset)
    print(validator1.validate())

    trainer2 = BackpropTrainer(nn2, dataset, weightdecay=0.001)
    validator2 = CrossValidator(trainer2, dataset)
    print(validator2.validate())

    for iteration in iterations:
        train_accuracy1.append(
            sum((np.array(
                [np.round(nn1.activate(test))
                 for test in train_features]) - train_labels)**2) /
            float(len(train_labels)))
        test_accuracy1.append(
            sum((np.array(
                [np.round(nn1.activate(test))
                 for test in test_features]) - test_labels)**2) /
            float(len(test_labels)))
        train_accuracy2.append(
            sum((np.array(
                [np.round(nn2.activate(test))
                 for test in train_features]) - train_labels)**2) /
            float(len(train_labels)))
        test_accuracy2.append(
            sum((np.array(
                [np.round(nn2.activate(test))
                 for test in test_features]) - test_labels)**2) /
            float(len(test_labels)))

    plt.plot(iterations, train_accuracy1)
    plt.plot(iterations, test_accuracy1)
    plt.plot(iterations, train_accuracy2)
    plt.plot(iterations, test_accuracy2)
    plt.legend([
        "Train Accuracy (0.0001)", "Test Accuracy (0.0001)",
        "Train Accuracy (0.001)", "Test Accuracy (0.001"
    ])
    plt.xlabel("Num Epoch")
    plt.ylabel("Percent Error")
    plt.title("Neural Network on Breast Cancer Data with " + str(num_neurons) +
              " layers")
    plt.savefig("nn_breast_cancer_weight_decay.png")