Esempio n. 1
0
 def crossValidation(self, filename):
     trainer = BackpropTrainer(self.net)
     crossValidator = CrossValidator(trainer,
                                     self.createDataSetFromFile(filename),
                                     n_folds=10)
     result = crossValidator.validate()
     print result * 100, "%"
Esempio n. 2
0
 def _validate(self, params):
     """ The overridden validate function, that uses cross-validation in order
         to determine the params' performance value.
     """
     trainer = self._getTrainerForParams(params)
     return CrossValidator(trainer, self._dataset, self._n_folds,
                           **self._validator_kwargs).validate()
Esempio n. 3
0
    def _validate(self, params):
        """ See GridSearchCostGamma """
        glob_idx = tuple(params)
        perf = self._performances

        if glob_idx not in perf:
            trainer = self._getTrainerForParams(params)
            local_perf = CrossValidator(trainer, self._dataset, self._n_folds, **self._validator_kwargs).validate()
            perf[glob_idx] = local_perf
        else:
            local_perf = perf[glob_idx]
        return local_perf
Esempio n. 4
0
 def cross_validate(self, dataset=None):
     trainer = BackpropTrainer(self.neural_network,
                               dataset=dataset,
                               momentum=0.1,
                               verbose=True,
                               weightdecay=0.01)
     validator = CrossValidator(trainer=trainer,
                                dataset=dataset,
                                n_folds=10)
     mean_validation_result = validator.validate()
     self.cross_validation_result = mean_validation_result
     logger.info('cross val result: {result}'.format(
         result=self.cross_validation_result))
Esempio n. 5
0
def cvnntester(tx, ty, rx, ry, iterations, folds):
    network = buildNetwork(100, 50, 1, bias=True)
    ds = ClassificationDataSet(100, 1, class_labels=["valley", "hill"])
    for i in xrange(len(tx)):
        ds.addSample(tx[i], [ty[i]])
    trainer = BackpropTrainer(network, ds, learningrate=0.005)
    cv = CrossValidator(trainer,
                        ds,
                        n_folds=folds,
                        max_epochs=iterations,
                        verbosity=True)
    print cv.validate()
    print sum((np.array([round(network.activate(test))
                         for test in rx]) - ry)**2) / float(len(ry))
 def train(self, args):
     if(self.data.ds == None):
         print("Can't train without loaded data")
         return
     if(args != [] and len(args) >= 2):
         self.net.epochs = int(args[1])
     if(self.net.trainingType == "gradient"):
         if(self.trainer == None):
             self.trainer, self.returnsNet = self.__getGradientTrainer();
         self.__train(self.trainer.trainEpochs, self.returnsNet)
     elif(self.net.trainingType == "optimization"):
         if(self.trainer == None):
             self.trainer, self.returnsNet = self.__getOptimizationTrainer();
         self.__train(self.trainer.learn, self.returnsNet)
         return
     elif(self.trainingType == "crossval"):
         if(self.trainer == None):
             self.trainer, self.returnsNet = self.__getGradientTrainer();
         evaluation = ModuleValidator.classificationPerformance(self.trainer.module, self.data.ds)
         validator = CrossValidator(trainer=self.trainer, dataset=self.trainer.ds, n_folds=5, valfunc=evaluation, verbose=True, max_epochs=1)
         print(validator.validate())
     else:
         raise Exception("Cannot create trainer, no network type specified" + self.trainingType)
Esempio n. 7
0
def get_pybrain_data_set(data, input_cols, target_cols=1):
    data_set = SupervisedDataSet(input_cols, target_cols)
    for row in data:
        # get first X columns for inputs
        # (technically "all indices less than X")
        # get last column as target
        data_set.addSample(tuple(row[:input_cols]), tuple([row[input_cols]]))

    return data_set


# normalize all values
p['wti_var'] = normalize(p['wti_var'])
p['wti_skew'] = normalize(p['wti_skew'])
p['wti_curt'] = normalize(p['wti_curt'])
p['i_entr'] = normalize(p['i_entr'])

# shuffle up data
p.reindex(np.random.permutation(p.index))

trainer = BackpropTrainer(
    buildNetwork(4, 5, 1),  # 2 input nodes, 3 hidden nodes, 1 output node
    get_pybrain_data_set(p.as_matrix(), 4),
    verbose=True)

#print(trainer.train())
#evaluator = ModuleValidator.classificationPerformance(trainer.module, trainer.ds)
cv = CrossValidator(trainer, trainer.ds, n_folds=5)
cv.setArgs(max_epochs=2, verbose=True)
print(cv.validate())
Esempio n. 8
0
alldata.addSample([-1, -1], [0])
alldata.addSample([-1, -1], [0])
alldata.addSample([-1, -1], [0])
alldata.addSample([-1, -1], [0])

alldata.addSample([1, 1], [1])
alldata.addSample([1, 1], [1])
alldata.addSample([1, 1], [1])
alldata.addSample([1, 1], [1])
alldata.addSample([1, 1], [1])

tstdata, trndata = alldata.splitWithProportion(0.25)
trndata._convertToOneOfMany()
tstdata._convertToOneOfMany()

# We can also examine the dataset
print "Number of training patterns: ", len(trndata)
print "Input and output dimensions: ", trndata.indim, trndata.outdim
print "First sample (input, target, class):"
print trndata['input'][0], trndata['target'][0], trndata['class'][0]

fnn     = buildNetwork( trndata.indim, 5, trndata.outdim, recurrent=False )
trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01 )

# I am not sure about this, I don't think my production code is implemented like this
modval = ModuleValidator()
trainer.trainEpochs(20)
trainer.trainOnDataset(dataset=trndata)
cv = CrossValidator( trainer, trndata, n_folds=5, valfunc=modval.MSE )
print "MSE %f" %( cv.validate() )
Esempio n. 9
0
#将类别转化为5位
dsTrain_test._convertToOneOfMany(bounds=[0, 1])
dsTest_test._convertToOneOfMany(bounds=[0, 1])

#print dsTrain_test['target']

#划分训练集跟测试集
dsTrain,dsTest = dsBuild(data)

#训练神经网络
netModel = netBuild(dsTrain_test)

modval = ModuleValidator()
netModel.trainEpochs(20)
netModel.trainUntilConvergence(maxEpochs=1000)
cv = CrossValidator(netModel, dsTrain_test, n_folds=5, valfunc=modval.MSE )
print "MSE %f" %( cv.validate() )


from sklearn.externals import joblib
joblib.dump(netModel, "train_model.m")
netModel =joblib.load("train_model.m")


#f1值检验

pred=[]
really =[]
yuanma = []
calma = []
languages = []

for g in glob.glob("./data/*.txt"):
  language, num = g.split("/")[-1].split("_")
  languages.append(Language(io.open(g, 'r+'), language))

n = Network(languages)
n.train()
n.trainer.verbose = True
n.trainer.trainUntilConvergence()

def correctValFunc(output, target):
  assert len(output) == len(target)

  n_correct = 0

  for idx, instance in enumerate(output):
    # This will find the maximum liklihood language
    classification = instance.argmax(axis=0)
    objective = target[idx].argmax(axis=0)
    if objective == classification:
      n_correct += 1

  return 1 - (float(n_correct) / float(len(output)))

def correct(output, target):
  return ModuleValidator.validate(correctValFunc, output, target)

cv = CrossValidator(n.trainer, n.dataSet, valfunc=correct, n_folds=2)
print cv.validate()
Esempio n. 11
0
data_set = common.get_bc_data_for_nn()

test_means = []
test_std   = []
x_vals = [2, 3, 4, 5, 6, 7, 8]

for x in x_vals:
    means = []
    for i in range(20):
        trainer = BackpropTrainer(
                    buildNetwork(3, x, 1),
                    data_set,
                    verbose=True
        )

        print "%d %d" % (x, i)
        trainer.trainEpochs(3)
        cv = CrossValidator(trainer, trainer.ds, n_folds=5, valfunc=ModuleValidator.MSE)
        means.append(cv.validate())

    test_means.append(np.mean(means))
    test_std.append(np.std(means))

common.plot_nn_mse(
    title   = "Breast Cancer Survival Neural Network",
    x_label = "Number of hidden nodes",
    x_vals  = x_vals,
    y_means = np.array(test_means),
    y_std   = np.array(test_std)
)
Esempio n. 12
0
import pylab, numpy
from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure import TanhLayer
from pybrain.datasets import SupervisedDataSet
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.tools.validation import CrossValidator, ModuleValidator

results = pylab.loadtxt('credit.txt')
target = results[:, -1]
data = numpy.delete(results, -1, 1)

#print "data", tuple(data[0])
#print "target", (target[0],)

#net = buildNetwork(14, 10, 1)
net = buildNetwork(14, 10, 1, hiddenclass=TanhLayer)
#print net.activate([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])

ds = SupervisedDataSet(14, 1)

for i in range(len(data)):
    ds.addSample(tuple(data[i]), (target[i], ))

trainer = BackpropTrainer(net, ds)
evaluation = ModuleValidator()
validator = CrossValidator(trainer=trainer,
                           dataset=trainer.ds,
                           n_folds=5,
                           valfunc=evaluation.MSE)
print(validator.validate())
Esempio n. 13
0
def compare_l2_regularization():
    train_features, train_labels, test_features, test_labels = get_breast_cancer_data(
    )
    optimal_num_layers = 6
    num_neurons = [optimal_num_layers * [16]]
    start_time = datetime.now()
    train_accuracy1 = []
    test_accuracy1 = []
    train_accuracy2 = []
    test_accuracy2 = []
    iterations = range(250)
    nn1 = buildNetwork(30, 16, 1, bias=True)
    nn2 = buildNetwork(30, 16, 1, bias=True)
    dataset = ClassificationDataSet(len(train_features[0]),
                                    len(train_labels[0]),
                                    class_labels=["1", "2"])

    for instance in range(len(train_features)):
        dataset.addSample(train_features[instance], train_labels[instance])

    trainer1 = BackpropTrainer(nn1, dataset, weightdecay=0.0001)
    validator1 = CrossValidator(trainer1, dataset)
    print(validator1.validate())

    trainer2 = BackpropTrainer(nn2, dataset, weightdecay=0.001)
    validator2 = CrossValidator(trainer2, dataset)
    print(validator2.validate())

    for iteration in iterations:
        train_accuracy1.append(
            sum((np.array(
                [np.round(nn1.activate(test))
                 for test in train_features]) - train_labels)**2) /
            float(len(train_labels)))
        test_accuracy1.append(
            sum((np.array(
                [np.round(nn1.activate(test))
                 for test in test_features]) - test_labels)**2) /
            float(len(test_labels)))
        train_accuracy2.append(
            sum((np.array(
                [np.round(nn2.activate(test))
                 for test in train_features]) - train_labels)**2) /
            float(len(train_labels)))
        test_accuracy2.append(
            sum((np.array(
                [np.round(nn2.activate(test))
                 for test in test_features]) - test_labels)**2) /
            float(len(test_labels)))

    plt.plot(iterations, train_accuracy1)
    plt.plot(iterations, test_accuracy1)
    plt.plot(iterations, train_accuracy2)
    plt.plot(iterations, test_accuracy2)
    plt.legend([
        "Train Accuracy (0.0001)", "Test Accuracy (0.0001)",
        "Train Accuracy (0.001)", "Test Accuracy (0.001"
    ])
    plt.xlabel("Num Epoch")
    plt.ylabel("Percent Error")
    plt.title("Neural Network on Breast Cancer Data with " + str(num_neurons) +
              " layers")
    plt.savefig("nn_breast_cancer_weight_decay.png")