Ejemplo n.º 1
0
    error = metrics.rmse(actualA, predictedA)
    predictedA, actualA = predict(n, tstdata['input'], tstdata['target'])
    error2 = metrics.rmse(actualA, predictedA)

    graph.append((i, error, error2))

with open('results/graphs/'+filename, 'w') as fp:
    a = csv.writer(fp, delimiter=',')
    a.writerows(graph)'''


#
# Write the output of the final network
#
n_folds=5
inp = DS.getField("input")
tar = DS.getField("target")
perms = array_split(permutation(DS.getLength()), n_folds)
performances = 0
for i in range(n_folds):
    # determine train indices
    train_perms_idxs = range(n_folds)
    train_perms_idxs.pop(i)
    temp_list = []
    for train_perms_idx in train_perms_idxs:
        temp_list.append(perms[ train_perms_idx ])
    train_idxs = concatenate(temp_list)

    # determine test indices
    test_idxs = perms[i]
class Network(object):
  def __init__(self, input_size, output_size, number_of_layers=3, size_of_hidden_layers=3, type_of_hidden_layer='sigmoid', net_bias=False, epochs=100):
    self.net = FeedForwardNetwork()
    self.num_epochs = epochs
    # set up layers of the network
    layers = []

    for i in range(number_of_layers):
      if i == 0:
        layers.append(LinearLayer(input_size))
        self.net.addInputModule(layers[i])
      elif i == (number_of_layers-1):
        layers.append(LinearLayer(output_size))
        self.net.addOutputModule(layers[i])
        self.net.addConnection(FullConnection(layers[i-1], layers[i]))
      else:
        if type_of_hidden_layer == 'linear':
          layers.append(LinearLayer((input_size + output_size) / 2))
        elif type_of_hidden_layer == 'sigmoid':
          layers.append(SigmoidLayer((input_size + output_size) / 2))
        elif type_of_hidden_layer == 'tanh':
          layers.append(TanhLayer((input_size + output_size) / 2))
        self.net.addModule(layers[i])
        self.net.addConnection(FullConnection(layers[i-1], layers[i]))

    self.net.sortModules()
    self.input_size = input_size
    self.output_size = output_size

  def load(self, filedir):
    self.net = NetworkReader.readFrom(filedir)

  def save(self, filedir):
    NetworkWriter.writeToFile(self.net, filedir)

  def prepare_trainer(self, filedir):
    # initialize the data set
    self.ds = SupervisedDataSet(self.input_size, self.output_size)

    # train on data
    with open(filedir, 'rt') as csvfile:
      reader = csv.reader(csvfile, delimiter=',')


      for row in reader:
        # format data

        input_data = tuple(map(float, row[1:(self.input_size+1)]))
        output_data = tuple(map(float, row[(self.input_size+1):((self.input_size+1+self.output_size))]))

        # print (output_data)

        # add to dataset
        self.ds.addSample(input_data, output_data)

    # uses backpropegation to create a trainer
    self.trainer = BackpropTrainer(self.net, self.ds)

  def train(self, convergance):
    if convergance:
      self.trainer.trainUntilConvergence()
    else:
      self.trainer.trainEpochs(self.num_epochs)


  def query(self, input_data):
    return self.net.activate(input_data)

  def cross_vaildate(self):
    n_folds = 5
    max_epochs = self.num_epochs
    l = self.ds.getLength()
    inp = self.ds.getField("input")
    tar = self.ds.getField("target")
    indim = self.ds.indim
    outdim = self.ds.outdim
    assert l > n_folds

    perms = array_split(permutation(l), n_folds)

    perf = 0.
    for i in range(n_folds):
      # determine train indices
      train_perms_idxs = list(range(n_folds))
      train_perms_idxs.pop(i)
      temp_list = []
      for train_perms_idx in train_perms_idxs:
        temp_list.append(perms[ train_perms_idx ])
      train_idxs = concatenate(temp_list)

      # determine test indices
      test_idxs = perms[i]

      # train
      train_ds = SupervisedDataSet(indim, outdim)
      train_ds.setField("input"  , inp[train_idxs])
      train_ds.setField("target" , tar[train_idxs])
      temp_trainer = copy.deepcopy(self.trainer)
      temp_trainer.setData(train_ds)
      if not max_epochs:
        temp_trainer.train()
      else:
        temp_trainer.trainEpochs(max_epochs)

      # test
      test_ds = SupervisedDataSet(indim, outdim)
      test_ds.setField("input"  , inp[test_idxs])
      test_ds.setField("target" , tar[test_idxs])

      perf += self.myCalculatePerformance(temp_trainer, test_ds)

    perf /= n_folds
    return perf


  def myCalculatePerformance(self, trainer, dataset):
    # compute outputs
    output = []
    for row in array(dataset.getField('input')):
      output.append(trainer.module.activate(row))
    target = array(dataset.getField('target'))

    # compute and return the mean square error
    return Validator.MSE(output=output, target=target, )
Ejemplo n.º 3
0
        with open('data/' + i) as f:
            next(f)
            for counter, line in enumerate(f):
                line = line.replace('\n','')
                line = line.strip()
                line = line.split(',')
                values = line[1:]
                values = line[-20:]
                for j, value in enumerate(values):
                    if value == '':
                        values[j] = 0.0
                values = np.array(values, dtype='float32')
                ds.addSample(values[0:-1], values[-1])
#%%   
  
l = len(ds.getField('input'))    
w =  len(ds.getField('input').transpose())
nonzero = {}
zero = {}
for i in xrange(w):
    nonzero[i] = np.count_nonzero(ds['input'].transpose()[i])
    zero[i] = len(ds.getField('input')) - np.count_nonzero(ds['input'].transpose()[i])
print nonzero
print zero
print labels
d_view = [ (v,k) for k,v in zero.iteritems() ]
d_view.sort(reverse=True) # natively sort tuples by first element
i = 0
for v,k in d_view:
    i += 1
    print