stimalldata.addSample(X_successful_stim[xnum,:],y_successful_stim[xnum])

# split the data into testing and training data
tstdata_temp, trndata_temp = alldata.splitWithProportion(0.15)

# small bug with _convertToOneOfMany function.  This fixes that
tstdata = ClassificationDataSet(num_features,1,nb_classes=2)
for n in xrange(0, tstdata_temp.getLength()):
    tstdata.addSample(tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1])

trndata = ClassificationDataSet(num_features,1,nb_classes=2)
for n in xrange(0,trndata_temp.getLength()):
    trndata.addSample(trndata_temp.getSample(n)[0],trndata_temp.getSample(n)[1])

valdata = ClassificationDataSet(num_features,1,nb_classes=2)
for n in xrange(0,stimalldata.getLength()):
    valdata.addSample(stimalldata.getSample(n)[0],stimalldata.getSample(n)[1])

# organizes dataset for pybrain
trndata._convertToOneOfMany()
tstdata._convertToOneOfMany()

valdata._convertToOneOfMany()

# sample printouts before running classifier
print "Number of training patterns: ", len(trndata)
print "Input and output dimensions: ", trndata.indim, trndata.outdim
print "First sample (input, target, class):"
print trndata['input'][0], trndata['target'][0], trndata['class'][0]

# build the ANN
class Network(object):
  def __init__(self, input_size, output_size, number_of_layers=3, size_of_hidden_layers=3, type_of_hidden_layer='sigmoid', net_bias=False, epochs=100):
    self.net = FeedForwardNetwork()
    self.num_epochs = epochs
    # set up layers of the network
    layers = []

    for i in range(number_of_layers):
      if i == 0:
        layers.append(LinearLayer(input_size))
        self.net.addInputModule(layers[i])
      elif i == (number_of_layers-1):
        layers.append(LinearLayer(output_size))
        self.net.addOutputModule(layers[i])
        self.net.addConnection(FullConnection(layers[i-1], layers[i]))
      else:
        if type_of_hidden_layer == 'linear':
          layers.append(LinearLayer((input_size + output_size) / 2))
        elif type_of_hidden_layer == 'sigmoid':
          layers.append(SigmoidLayer((input_size + output_size) / 2))
        elif type_of_hidden_layer == 'tanh':
          layers.append(TanhLayer((input_size + output_size) / 2))
        self.net.addModule(layers[i])
        self.net.addConnection(FullConnection(layers[i-1], layers[i]))

    self.net.sortModules()
    self.input_size = input_size
    self.output_size = output_size

  def load(self, filedir):
    self.net = NetworkReader.readFrom(filedir)

  def save(self, filedir):
    NetworkWriter.writeToFile(self.net, filedir)

  def prepare_trainer(self, filedir):
    # initialize the data set
    self.ds = SupervisedDataSet(self.input_size, self.output_size)

    # train on data
    with open(filedir, 'rt') as csvfile:
      reader = csv.reader(csvfile, delimiter=',')


      for row in reader:
        # format data

        input_data = tuple(map(float, row[1:(self.input_size+1)]))
        output_data = tuple(map(float, row[(self.input_size+1):((self.input_size+1+self.output_size))]))

        # print (output_data)

        # add to dataset
        self.ds.addSample(input_data, output_data)

    # uses backpropegation to create a trainer
    self.trainer = BackpropTrainer(self.net, self.ds)

  def train(self, convergance):
    if convergance:
      self.trainer.trainUntilConvergence()
    else:
      self.trainer.trainEpochs(self.num_epochs)


  def query(self, input_data):
    return self.net.activate(input_data)

  def cross_vaildate(self):
    n_folds = 5
    max_epochs = self.num_epochs
    l = self.ds.getLength()
    inp = self.ds.getField("input")
    tar = self.ds.getField("target")
    indim = self.ds.indim
    outdim = self.ds.outdim
    assert l > n_folds

    perms = array_split(permutation(l), n_folds)

    perf = 0.
    for i in range(n_folds):
      # determine train indices
      train_perms_idxs = list(range(n_folds))
      train_perms_idxs.pop(i)
      temp_list = []
      for train_perms_idx in train_perms_idxs:
        temp_list.append(perms[ train_perms_idx ])
      train_idxs = concatenate(temp_list)

      # determine test indices
      test_idxs = perms[i]

      # train
      train_ds = SupervisedDataSet(indim, outdim)
      train_ds.setField("input"  , inp[train_idxs])
      train_ds.setField("target" , tar[train_idxs])
      temp_trainer = copy.deepcopy(self.trainer)
      temp_trainer.setData(train_ds)
      if not max_epochs:
        temp_trainer.train()
      else:
        temp_trainer.trainEpochs(max_epochs)

      # test
      test_ds = SupervisedDataSet(indim, outdim)
      test_ds.setField("input"  , inp[test_idxs])
      test_ds.setField("target" , tar[test_idxs])

      perf += self.myCalculatePerformance(temp_trainer, test_ds)

    perf /= n_folds
    return perf


  def myCalculatePerformance(self, trainer, dataset):
    # compute outputs
    output = []
    for row in array(dataset.getField('input')):
      output.append(trainer.module.activate(row))
    target = array(dataset.getField('target'))

    # compute and return the mean square error
    return Validator.MSE(output=output, target=target, )
Ejemplo n.º 3
0
    error2 = metrics.rmse(actualA, predictedA)

    graph.append((i, error, error2))

with open('results/graphs/'+filename, 'w') as fp:
    a = csv.writer(fp, delimiter=',')
    a.writerows(graph)'''


#
# Write the output of the final network
#
n_folds=5
inp = DS.getField("input")
tar = DS.getField("target")
perms = array_split(permutation(DS.getLength()), n_folds)
performances = 0
for i in range(n_folds):
    # determine train indices
    train_perms_idxs = range(n_folds)
    train_perms_idxs.pop(i)
    temp_list = []
    for train_perms_idx in train_perms_idxs:
        temp_list.append(perms[ train_perms_idx ])
    train_idxs = concatenate(temp_list)

    # determine test indices
    test_idxs = perms[i]

    train_ds = SupervisedDataSet(nFeatures, nOutput)
    train_ds.setField("input" , inp[train_idxs])