stimalldata.addSample(X_successful_stim[xnum,:],y_successful_stim[xnum]) # split the data into testing and training data tstdata_temp, trndata_temp = alldata.splitWithProportion(0.15) # small bug with _convertToOneOfMany function. This fixes that tstdata = ClassificationDataSet(num_features,1,nb_classes=2) for n in xrange(0, tstdata_temp.getLength()): tstdata.addSample(tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) trndata = ClassificationDataSet(num_features,1,nb_classes=2) for n in xrange(0,trndata_temp.getLength()): trndata.addSample(trndata_temp.getSample(n)[0],trndata_temp.getSample(n)[1]) valdata = ClassificationDataSet(num_features,1,nb_classes=2) for n in xrange(0,stimalldata.getLength()): valdata.addSample(stimalldata.getSample(n)[0],stimalldata.getSample(n)[1]) # organizes dataset for pybrain trndata._convertToOneOfMany() tstdata._convertToOneOfMany() valdata._convertToOneOfMany() # sample printouts before running classifier print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] # build the ANN
class Network(object): def __init__(self, input_size, output_size, number_of_layers=3, size_of_hidden_layers=3, type_of_hidden_layer='sigmoid', net_bias=False, epochs=100): self.net = FeedForwardNetwork() self.num_epochs = epochs # set up layers of the network layers = [] for i in range(number_of_layers): if i == 0: layers.append(LinearLayer(input_size)) self.net.addInputModule(layers[i]) elif i == (number_of_layers-1): layers.append(LinearLayer(output_size)) self.net.addOutputModule(layers[i]) self.net.addConnection(FullConnection(layers[i-1], layers[i])) else: if type_of_hidden_layer == 'linear': layers.append(LinearLayer((input_size + output_size) / 2)) elif type_of_hidden_layer == 'sigmoid': layers.append(SigmoidLayer((input_size + output_size) / 2)) elif type_of_hidden_layer == 'tanh': layers.append(TanhLayer((input_size + output_size) / 2)) self.net.addModule(layers[i]) self.net.addConnection(FullConnection(layers[i-1], layers[i])) self.net.sortModules() self.input_size = input_size self.output_size = output_size def load(self, filedir): self.net = NetworkReader.readFrom(filedir) def save(self, filedir): NetworkWriter.writeToFile(self.net, filedir) def prepare_trainer(self, filedir): # initialize the data set self.ds = SupervisedDataSet(self.input_size, self.output_size) # train on data with open(filedir, 'rt') as csvfile: reader = csv.reader(csvfile, delimiter=',') for row in reader: # format data input_data = tuple(map(float, row[1:(self.input_size+1)])) output_data = tuple(map(float, row[(self.input_size+1):((self.input_size+1+self.output_size))])) # print (output_data) # add to dataset self.ds.addSample(input_data, output_data) # uses backpropegation to create a trainer self.trainer = BackpropTrainer(self.net, self.ds) def train(self, convergance): if convergance: self.trainer.trainUntilConvergence() else: self.trainer.trainEpochs(self.num_epochs) def query(self, input_data): return self.net.activate(input_data) def cross_vaildate(self): n_folds = 5 max_epochs = self.num_epochs l = self.ds.getLength() inp = self.ds.getField("input") tar = self.ds.getField("target") indim = self.ds.indim outdim = self.ds.outdim assert l > n_folds perms = array_split(permutation(l), n_folds) perf = 0. for i in range(n_folds): # determine train indices train_perms_idxs = list(range(n_folds)) train_perms_idxs.pop(i) temp_list = [] for train_perms_idx in train_perms_idxs: temp_list.append(perms[ train_perms_idx ]) train_idxs = concatenate(temp_list) # determine test indices test_idxs = perms[i] # train train_ds = SupervisedDataSet(indim, outdim) train_ds.setField("input" , inp[train_idxs]) train_ds.setField("target" , tar[train_idxs]) temp_trainer = copy.deepcopy(self.trainer) temp_trainer.setData(train_ds) if not max_epochs: temp_trainer.train() else: temp_trainer.trainEpochs(max_epochs) # test test_ds = SupervisedDataSet(indim, outdim) test_ds.setField("input" , inp[test_idxs]) test_ds.setField("target" , tar[test_idxs]) perf += self.myCalculatePerformance(temp_trainer, test_ds) perf /= n_folds return perf def myCalculatePerformance(self, trainer, dataset): # compute outputs output = [] for row in array(dataset.getField('input')): output.append(trainer.module.activate(row)) target = array(dataset.getField('target')) # compute and return the mean square error return Validator.MSE(output=output, target=target, )
error2 = metrics.rmse(actualA, predictedA) graph.append((i, error, error2)) with open('results/graphs/'+filename, 'w') as fp: a = csv.writer(fp, delimiter=',') a.writerows(graph)''' # # Write the output of the final network # n_folds=5 inp = DS.getField("input") tar = DS.getField("target") perms = array_split(permutation(DS.getLength()), n_folds) performances = 0 for i in range(n_folds): # determine train indices train_perms_idxs = range(n_folds) train_perms_idxs.pop(i) temp_list = [] for train_perms_idx in train_perms_idxs: temp_list.append(perms[ train_perms_idx ]) train_idxs = concatenate(temp_list) # determine test indices test_idxs = perms[i] train_ds = SupervisedDataSet(nFeatures, nOutput) train_ds.setField("input" , inp[train_idxs])