error = metrics.rmse(actualA, predictedA) predictedA, actualA = predict(n, tstdata['input'], tstdata['target']) error2 = metrics.rmse(actualA, predictedA) graph.append((i, error, error2)) with open('results/graphs/'+filename, 'w') as fp: a = csv.writer(fp, delimiter=',') a.writerows(graph)''' # # Write the output of the final network # n_folds=5 inp = DS.getField("input") tar = DS.getField("target") perms = array_split(permutation(DS.getLength()), n_folds) performances = 0 for i in range(n_folds): # determine train indices train_perms_idxs = range(n_folds) train_perms_idxs.pop(i) temp_list = [] for train_perms_idx in train_perms_idxs: temp_list.append(perms[ train_perms_idx ]) train_idxs = concatenate(temp_list) # determine test indices test_idxs = perms[i]
class Network(object): def __init__(self, input_size, output_size, number_of_layers=3, size_of_hidden_layers=3, type_of_hidden_layer='sigmoid', net_bias=False, epochs=100): self.net = FeedForwardNetwork() self.num_epochs = epochs # set up layers of the network layers = [] for i in range(number_of_layers): if i == 0: layers.append(LinearLayer(input_size)) self.net.addInputModule(layers[i]) elif i == (number_of_layers-1): layers.append(LinearLayer(output_size)) self.net.addOutputModule(layers[i]) self.net.addConnection(FullConnection(layers[i-1], layers[i])) else: if type_of_hidden_layer == 'linear': layers.append(LinearLayer((input_size + output_size) / 2)) elif type_of_hidden_layer == 'sigmoid': layers.append(SigmoidLayer((input_size + output_size) / 2)) elif type_of_hidden_layer == 'tanh': layers.append(TanhLayer((input_size + output_size) / 2)) self.net.addModule(layers[i]) self.net.addConnection(FullConnection(layers[i-1], layers[i])) self.net.sortModules() self.input_size = input_size self.output_size = output_size def load(self, filedir): self.net = NetworkReader.readFrom(filedir) def save(self, filedir): NetworkWriter.writeToFile(self.net, filedir) def prepare_trainer(self, filedir): # initialize the data set self.ds = SupervisedDataSet(self.input_size, self.output_size) # train on data with open(filedir, 'rt') as csvfile: reader = csv.reader(csvfile, delimiter=',') for row in reader: # format data input_data = tuple(map(float, row[1:(self.input_size+1)])) output_data = tuple(map(float, row[(self.input_size+1):((self.input_size+1+self.output_size))])) # print (output_data) # add to dataset self.ds.addSample(input_data, output_data) # uses backpropegation to create a trainer self.trainer = BackpropTrainer(self.net, self.ds) def train(self, convergance): if convergance: self.trainer.trainUntilConvergence() else: self.trainer.trainEpochs(self.num_epochs) def query(self, input_data): return self.net.activate(input_data) def cross_vaildate(self): n_folds = 5 max_epochs = self.num_epochs l = self.ds.getLength() inp = self.ds.getField("input") tar = self.ds.getField("target") indim = self.ds.indim outdim = self.ds.outdim assert l > n_folds perms = array_split(permutation(l), n_folds) perf = 0. for i in range(n_folds): # determine train indices train_perms_idxs = list(range(n_folds)) train_perms_idxs.pop(i) temp_list = [] for train_perms_idx in train_perms_idxs: temp_list.append(perms[ train_perms_idx ]) train_idxs = concatenate(temp_list) # determine test indices test_idxs = perms[i] # train train_ds = SupervisedDataSet(indim, outdim) train_ds.setField("input" , inp[train_idxs]) train_ds.setField("target" , tar[train_idxs]) temp_trainer = copy.deepcopy(self.trainer) temp_trainer.setData(train_ds) if not max_epochs: temp_trainer.train() else: temp_trainer.trainEpochs(max_epochs) # test test_ds = SupervisedDataSet(indim, outdim) test_ds.setField("input" , inp[test_idxs]) test_ds.setField("target" , tar[test_idxs]) perf += self.myCalculatePerformance(temp_trainer, test_ds) perf /= n_folds return perf def myCalculatePerformance(self, trainer, dataset): # compute outputs output = [] for row in array(dataset.getField('input')): output.append(trainer.module.activate(row)) target = array(dataset.getField('target')) # compute and return the mean square error return Validator.MSE(output=output, target=target, )
with open('data/' + i) as f: next(f) for counter, line in enumerate(f): line = line.replace('\n','') line = line.strip() line = line.split(',') values = line[1:] values = line[-20:] for j, value in enumerate(values): if value == '': values[j] = 0.0 values = np.array(values, dtype='float32') ds.addSample(values[0:-1], values[-1]) #%% l = len(ds.getField('input')) w = len(ds.getField('input').transpose()) nonzero = {} zero = {} for i in xrange(w): nonzero[i] = np.count_nonzero(ds['input'].transpose()[i]) zero[i] = len(ds.getField('input')) - np.count_nonzero(ds['input'].transpose()[i]) print nonzero print zero print labels d_view = [ (v,k) for k,v in zero.iteritems() ] d_view.sort(reverse=True) # natively sort tuples by first element i = 0 for v,k in d_view: i += 1 print