Beispiel #1
0
def entrenarSomnolencia(red):
    #Se inicializa el dataset
    ds = SupervisedDataSet(4096,1)

    """Se crea el dataset, para ello procesamos cada una de las imagenes obteniendo los rostros,
       luego se le asignan los valores deseados del resultado la red neuronal."""

    print "Somnolencia - cara"
    for i,c in enumerate(os.listdir(os.path.dirname('/home/taberu/Imágenes/img_tesis/somnoliento/'))):
        try:
            im = cv2.imread('/home/taberu/Imágenes/img_tesis/somnoliento/'+c)
            pim = pi.procesarImagen(im)
            cara = d.deteccionFacial(pim)
            if cara == None:
                print "No hay cara"
            else:
                print i
                ds.appendLinked(cara.flatten(),10)
        except:
            pass

    trainer = BackpropTrainer(red, ds)
    print "Entrenando hasta converger"
    trainer.trainUntilConvergence()
    NetworkWriter.writeToFile(red, 'rna_somnolencia.xml')
Beispiel #2
0
class dataset:
	# Initialize the dataset with input and label size
	def __init__(self, inputsize, labelsize):
		self.inputsize = inputsize
		self.labelsize = labelsize
		self.DS = SupervisedDataSet(self.inputsize, self.labelsize)
	
	# Adds data to existing training dataset
	def addTrainingData(self,inputdata, labeldata):
		try:
			if inputdata.size == self.inputsize and labeldata.size == self.labelsize:
				self.DS.appendLinked(inputdata, labeldata)
				return 1
		except AttributeError:
			print "Input error."
			return 0
	
	def getTrainingDataset(self):
		return self.DS
	
	def generateDataSet(self):
		for line in fileinput.input(['data/inputdata']):
			x = line.split(':')
	
			self.addTrainingData(ft.feature.getImageFeatureVector(x[0]),np.array([int(x[1])]))
			
	
	
		return 1
Beispiel #3
0
class dataset:
    # Initialize the dataset with input and label size
    def __init__(self, inputsize, labelsize):
        self.inputsize = inputsize
        self.labelsize = labelsize
        self.DS = SupervisedDataSet(self.inputsize, self.labelsize)
    
    # Adds data to existing training dataset
    def addTrainingData(self,inputdata, labeldata):
        try:
            if inputdata.size == self.inputsize and labeldata.size == self.labelsize:
                self.DS.appendLinked(inputdata, labeldata)
                return 1
        except AttributeError:
            print "Input error."
            return 0
    
    def getTrainingDataset(self):
        return self.DS
    
    def generateDataSet(self):
        for line in fileinput.input(['data/inputdata3.txt']):
            x = line.split(':')
#            print ft.feature.getImageFeatureVector(x[0]),np.array([int(x[1])])
            self.addTrainingData(ft.feature.getImageFeatureVector(x[0]),np.array([int(x[1])]))
        return 1
 def neuralNetwork_eval_func(self, chromosome):
     node_num, learning_rate, window_size = self.decode_chromosome(chromosome)
     if self.check_log(node_num, learning_rate, window_size):
         return self.get_means_from_log(node_num, learning_rate, window_size)[0]
     folded_dataset = self.create_folded_dataset(window_size)
     indim = 21 * (2 * window_size + 1)
     mean_AUC = 0
     mean_decision_value = 0
     mean_mcc = 0
     sample_size_over_thousand_flag = False
     for test_fold in xrange(self.fold):
         test_labels, test_dataset, train_labels, train_dataset = folded_dataset.get_test_and_training_dataset(test_fold)
         if len(test_labels) + len(train_labels) > 1000:
             sample_size_over_thousand_flag = True
         ds = SupervisedDataSet(indim, 1)
         for i in xrange(len(train_labels)):
             ds.appendLinked(train_dataset[i], [train_labels[i]])
         net = buildNetwork(indim, node_num, 1, outclass=SigmoidLayer, bias=True)
         trainer = BackpropTrainer(net, ds, learningrate=learning_rate)
         trainer.trainUntilConvergence(maxEpochs=self.maxEpochs_for_trainer)
         decision_values = [net.activate(test_dataset[i]) for i in xrange(len(test_labels))]
         decision_values = map(lambda x: x[0], decision_values)
         AUC, decision_value_and_max_mcc = validate_performance.calculate_AUC(decision_values, test_labels)
         mean_AUC += AUC
         mean_decision_value += decision_value_and_max_mcc[0]
         mean_mcc += decision_value_and_max_mcc[1]
         if sample_size_over_thousand_flag:
             break
     if not sample_size_over_thousand_flag:
         mean_AUC /= self.fold
         mean_decision_value /= self.fold
         mean_mcc /= self.fold
     self.write_log(node_num, learning_rate, window_size, mean_AUC, mean_decision_value, mean_mcc)
     self.add_log(node_num, learning_rate, window_size, mean_AUC, mean_decision_value, mean_mcc)
     return mean_AUC
def createDataset2(nInputs,inputSize,nOutputs):
    index = 1
    ds = SupervisedDataSet(inputSize,nOutputs)
    i = 0
    j =  0
    pList =candleGen()
    print len(pList)
    input = []
    z = 0
    for sub in pList:


        if nInputs == j:
            break
        elif i < inputSize:
            input.append(sub[index])
            i = i+1
        elif i == inputSize:
            ds.appendLinked(input,sub[index])
            input.pop(0)
            input.append(sub[index])
            j = j + 1
            i = i + 1
        else:
            ds.appendLinked(input,sub[index])
            input.pop(0)
            input.append(sub[index])
            j = j + 1


    return ds
def get_supervised_dataset(race_data, race_factors):

    race_bins = get_bins(race_data)
    race_bin_groups = pd.DataFrame.from_dict(race_bins).groupby('race_id')

    # Input, ouput
    data_set = SupervisedDataSet(6, 15)

    for race_id, race_bin in race_bin_groups:

        # Skipe bins with fewer than 10% race population
        if not np.count_nonzero(race_bin.population_pct) > 10:
            continue

        race_factor = race_factors[race_factors.race_id == race_id]

        # If race has missing factor data then skip
        if race_factor.empty:
            continue

        input_factors = [first(race_factor.high_temp) / 100.0,
                         first(race_factor.low_temp) / 100.0,
                         first(race_factor.high_humidity) / 100.0,
                         first(race_factor.low_humidity) / 100.0,
                         first(race_factor.starting_elevation) / 10000.0,
                         first(race_factor.gross_elevation_gain) / 10000.0
                         ]

        output_factors = race_bin.population_pct.tolist()

        data_set.appendLinked(input_factors, output_factors)

    return data_set
 def fit(self):
     trainds = SupervisedDataSet(self.INPUT_SIZE, 1)
     for i in range(self.str_train, self.end_train):
         trainds.appendLinked(self.data[i-self.INPUT_SIZE:i],self.data[i])
     
     trainer = BackpropTrainer(self.net, trainds, learningrate=self.eta, weightdecay=self.lmda, momentum=0.1, shuffle=False)
     trainer.trainEpochs(self.epochs)
                 
     trainer = None
Beispiel #8
0
 def fit(self):
     trainds = SupervisedDataSet(self.INPUT_SIZE, 1)
     for i in range(self.str_train, self.end_train):
         trainds.appendLinked(self.data[i-self.INPUT_SIZE:i],self.data[i])
     
     trainer = BackpropTrainer(self.net, trainds, learningrate=self.eta, weightdecay=self.lmda, momentum=0.1, shuffle=False)
     trainer.trainEpochs(self.epochs)
                 
     trainer = None
def buildDataSet(fTrainSet):
    ds = SupervisedDataSet(8, 1)

    for row in fTrainSet:
        inVec = row[2:10]
        tarVec = row[10]

        ds.appendLinked(inVec, tarVec)

    return ds
def main(T=10, load_brain=False, save_brain=False):
    singles = [room for room in rooms.allRooms if room.capacity == "Single"]
    preprocessed = preprocess_rooms(singles)
    all_vectors = [room_to_feature_vector(room, preprocessed) for room in singles]
    
    training_sequences = getLabeledRoomsFeaturesAndLabels(getRoomsMap(singles, all_vectors))
    
    input_units = len(all_vectors[0])

    if load_brain and "net" in brain_shelf:
        net = brain_shelf["net"]
        net.sorted = False
        net.sortModules()
    else:
        net = FeedForwardNetwork()
        layer_in = LinearLayer(input_units)
        layer_hidden = SigmoidLayer(1000)
        layer_hidden2 = SigmoidLayer(100)
        layer_out = LinearLayer(1)
        net.addInputModule(layer_in)
        net.addModule(layer_hidden)
        net.addModule(layer_hidden2)
        net.addOutputModule(layer_out)

        in_to_hidden = FullConnection(layer_in, layer_hidden)
        hidden_to_hidden = FullConnection(layer_hidden, layer_hidden2)
        hidden_to_out = FullConnection(layer_hidden2, layer_out)
        net.addConnection(in_to_hidden)
        net.addConnection(hidden_to_hidden)
        net.addConnection(hidden_to_out)

        net.sortModules()

        training_data = SupervisedDataSet(len(all_vectors[0]), 1)
        for training_seq in training_sequences: 
            training_data.appendLinked(training_seq[1], training_seq[2])
        trainer = BackpropTrainer(net, training_data)
        for i in xrange(T):
            error = trainer.train()
            print "Training iteration %d.  Error: %f" % (i + 1, error)

        if save_brain:
            brain_shelf["net"] = net
    
    labeled_rooms = []
    for i, vector in enumerate(all_vectors):
        labeled_rooms.append((singles[i], net.activate(vector)))
    
    available_rooms = available.get_available_rooms()

    labeled_rooms.sort(key=lambda x: -x[1])
    for room, label in labeled_rooms:
        if room.num in available_rooms:
            print "%16.12f: %s" % (label, room)
 def do_evaluate(eval_data, folds_number, iter_number):
     eval_set = SupervisedDataSet(len(feats), 1)
     for inst in eval_data:
         eval_set.appendLinked(inst.features(), [inst.class_idx()])
     res = evaluate(net_placeholder[0], eval_set)
     with open(os.path.join("results", str(folds_number) + ".net." + str(iter_number) + ".obj"), "w") as f:
         pickle.dump(res, f)
     res = evaluate_base(eval_set)
     with open(os.path.join("results", str(folds_number) + ".base." + str(iter_number) + ".obj"), 'w') as f:
         pickle.dump(res, f)
     print res
Beispiel #12
0
def load_from_file(filename):
    input_size = 9
    output_size = 1
    dataset = SupervisedDataSet(input_size, output_size)
    with open(filename, 'r') as datafile:
        for line in datafile:
            data = line.strip().split(' ')
            dataset.appendLinked(
                tuple(data[:input_size]),
                tuple(data[-output_size:]))
    return dataset
Beispiel #13
0
 def gettraining(self):
     DS = SupervisedDataSet(self.datainput, 8)
     for trn in self.training:
         inf = open(trn,'r')
         for line in inf:
             val = line.split(' ', 2)
             index = self.fileindex[val[0]]
             if index>=10:
                 input=self.fftfile(val[0])
                 output=self.tobit(int(val[1]))
                 DS.appendLinked(input, output)
         inf.close()
     return DS
Beispiel #14
0
def buildDataset(path,indexes):
    f = open(path)
    ds = SupervisedDataSet(len(indexes[0]),len(indexes[1]))
    indexin,indexout = indexes
    for line in f.readlines():
        outline = [float(x) for x in line.split('\t')[:-1]]
        inpt,outpt = [],[]
        for i in indexin:
            inpt.append(outline[i])
        for i in indexout:
            outpt.append(outline[i])
        ds.appendLinked(inpt,outpt)
    return ds
Beispiel #15
0
def create_NN_classifier(genes, positive_dataset, negative_dataset):
    maxEpochs_for_trainer = 60
    node_num, learning_rate, window_size = genes
    node_num, learning_rate, window_size = int(node_num), float(learning_rate), int(window_size)
    train_labels, train_dataset = create_train_labels_and_dataset(positive_dataset, negative_dataset) 
    indim = 21 * (2 * window_size + 1)
    ds = SupervisedDataSet(indim, 1)
    for i in xrange(len(train_labels)):
        ds.appendLinked(train_dataset[i], [train_labels[i]])
    net = buildNetwork(indim, node_num, 1, outclass=SigmoidLayer, bias=True)
    trainer = BackpropTrainer(net, ds, learningrate=learning_rate)
    trainer.trainUntilConvergence(maxEpochs=maxEpochs_for_trainer)
    return net
Beispiel #16
0
    def _generate_Pybrain_DS(self):

        vect_stream = []
        for word in self.sent_stream:
            vect_stream.append(self._word_to_vec(word))
        
        to_conv = zip(vect_stream, vect_stream[1:])
        to_conv.append((vect_stream[-1], vect_stream[0])) #add wrap around

        DS = SupervisedDataSet(29,29)
        for inp, targ in to_conv:
            DS.appendLinked(inp,targ)

        return DS
Beispiel #17
0
def create_int_dataset(n_input, n_output, codecs):
    ds = SupervisedDataSet(n_input, n_output)
    if n_input == 3 * 1:
        for i in range(0, len(codecs), 1):
            if i + 1 < len(codecs):
                ds.appendLinked(list(codecs[i]), list(codecs[i + 1]))
    elif n_input == 3 * 3:
        for i in range(0, len(codecs), 1):
            if i + 3 < len(codecs):
                ds.appendLinked(
                    list(codecs[i] + codecs[i + 1] + codecs[i + 2]),
                    list(codecs[i + 3]))
    elif n_input == 3 * 5:
        for i in range(0, len(codecs), 1):
            if i + 6 < len(codecs):
                ds.appendLinked(
                    list(codecs[i] + codecs[i + 1] + codecs[i + 2] +
                         codecs[i + 3] + codecs[i + 4]), list(codecs[i + 5]))

    elif n_input == 3 * 8:
        for i in range(0, len(codecs), 1):
            if i + 9 < len(codecs):
                ds.appendLinked(
                    list(codecs[i] + codecs[i + 1] + codecs[i + 2] +
                         codecs[i + 3] + codecs[i + 4] + codecs[i + 5] +
                         codecs[i + 6] + codecs[i + 7]), list(codecs[i + 8]))
    else:
        print 'not implemented yet'
        return
    return ds
Beispiel #18
0
def trainNetwork(net, data):
    dimension = WINDOW_SIZE
    ds = SupervisedDataSet(dimension, dimension)
    num_windows = int(len(data) / WINDOW_SIZE)
    for offset in range(0, num_windows):
        lower = offset * WINDOW_SIZE
        upper = min(len(data), (offset + 1) * WINDOW_SIZE)
        test_input = rfft(np.copy(data[lower:upper]))
        test_input.shape = (1, WINDOW_SIZE)
        test_output = np.copy(test_input)
        ds.appendLinked(test_input, test_output)
    trainer = BackpropTrainer(net, dataset=ds)
    for i in range(10):
        print("epoch {}".format(i))
        trainer.trainEpochs(1)
def getSeparateDataSets(testSize = 0.2):
    trnDs = ClassificationDataSet(len(feats), nb_classes=len(classes))
    tstDs = SupervisedDataSet(len(feats), 1)
    for c in classes:
        with codecs.open(os.path.join(data_root, c+".txt"), 'r', 'utf8') as f:
            lines = f.readlines()
            breakpoint = (1.0 - testSize) * len(lines)
            for i in range(len(lines)):
                r = Record("11", lines[i], c, "")
                if i < breakpoint:
                    trnDs.appendLinked(r.features(), [r.class_idx()])
                else:
                    tstDs.appendLinked(r.features(), [r.class_idx()])
    trnDs._convertToOneOfMany([0, 1])
    return trnDs, tstDs
Beispiel #20
0
def create_NN_classifier(genes, positive_dataset, negative_dataset):
    maxEpochs_for_trainer = 60
    node_num, learning_rate, window_size = genes
    node_num, learning_rate, window_size = int(node_num), float(
        learning_rate), int(window_size)
    train_labels, train_dataset = create_train_labels_and_dataset(
        positive_dataset, negative_dataset)
    indim = 21 * (2 * window_size + 1)
    ds = SupervisedDataSet(indim, 1)
    for i in xrange(len(train_labels)):
        ds.appendLinked(train_dataset[i], [train_labels[i]])
    net = buildNetwork(indim, node_num, 1, outclass=SigmoidLayer, bias=True)
    trainer = BackpropTrainer(net, ds, learningrate=learning_rate)
    trainer.trainUntilConvergence(maxEpochs=maxEpochs_for_trainer)
    return net
Beispiel #21
0
 def neuralNetwork_eval_func(self, chromosome):
     node_num, learning_rate, window_size = self.decode_chromosome(
         chromosome)
     if self.check_log(node_num, learning_rate, window_size):
         return self.get_means_from_log(node_num, learning_rate,
                                        window_size)[0]
     folded_dataset = self.create_folded_dataset(window_size)
     indim = 21 * (2 * window_size + 1)
     mean_AUC = 0
     mean_decision_value = 0
     mean_mcc = 0
     sample_size_over_thousand_flag = False
     for test_fold in xrange(self.fold):
         test_labels, test_dataset, train_labels, train_dataset = folded_dataset.get_test_and_training_dataset(
             test_fold)
         if len(test_labels) + len(train_labels) > 1000:
             sample_size_over_thousand_flag = True
         ds = SupervisedDataSet(indim, 1)
         for i in xrange(len(train_labels)):
             ds.appendLinked(train_dataset[i], [train_labels[i]])
         net = buildNetwork(indim,
                            node_num,
                            1,
                            outclass=SigmoidLayer,
                            bias=True)
         trainer = BackpropTrainer(net, ds, learningrate=learning_rate)
         trainer.trainUntilConvergence(maxEpochs=self.maxEpochs_for_trainer)
         decision_values = [
             net.activate(test_dataset[i]) for i in xrange(len(test_labels))
         ]
         decision_values = map(lambda x: x[0], decision_values)
         AUC, decision_value_and_max_mcc = validate_performance.calculate_AUC(
             decision_values, test_labels)
         mean_AUC += AUC
         mean_decision_value += decision_value_and_max_mcc[0]
         mean_mcc += decision_value_and_max_mcc[1]
         if sample_size_over_thousand_flag:
             break
     if not sample_size_over_thousand_flag:
         mean_AUC /= self.fold
         mean_decision_value /= self.fold
         mean_mcc /= self.fold
     self.write_log(node_num, learning_rate, window_size, mean_AUC,
                    mean_decision_value, mean_mcc)
     self.add_log(node_num, learning_rate, window_size, mean_AUC,
                  mean_decision_value, mean_mcc)
     return mean_AUC
def trainer(dataTrain, dataTest, hiddenLayer, validationProportion, epochs):
    # criação do data set com 9 dados de entrada e 1 de saida
    dsTrain = SupervisedDataSet(9, 1)
    dsTest = SupervisedDataSet(9, 1)

    # adicionando os dados ao dataset train
    for x in dataTrain:
        dsTrain.appendLinked(x[:9], x[9:])

    # adicionando os dados ao dataset test
    for x in dataTest:
        dsTest.appendLinked(x[:9], x[9:])

    # criação da rede
    net = buildNetwork(9, hiddenLayer, 1, bias=True)

    # treinamento da rede neural
    trainer = BackpropTrainer(net, dsTrain)
    trainer.train()
    trainer.trainUntilConvergence(verbose=False,
                                  validationProportion=validationProportion,
                                  maxEpochs=epochs)

    # testando rede
    resTrain = net.activateOnDataset(dsTrain)
    resTest = net.activateOnDataset(dsTest)

    # verificando resultado
    hitTrain = []
    for index, x in enumerate(resTrain):
        hitTrain.append(int(round(x[0], 0)) == dataTrain[index][9])

    hitTest = []
    for index, x in enumerate(resTest):
        hitTest.append(int(round(x[0], 0)) == dataTest[index][9])

    resultTrain = Counter(hitTrain)
    resultTest = Counter(hitTest)

    return Resultado(
        {
            'acertos': resultTrain[True] / len(hitTrain),
            'erros': resultTrain[False] / len(hitTrain)
        }, {
            'acertos': resultTest[True] / len(hitTest),
            'erros': resultTest[False] / len(hitTest)
        })
Beispiel #23
0
 def NN_get_nextcase(self, NNcases):
     ds_e = SupervisedDataSet( self.NN_input_size, self.NN_target_size )
     nextcase = NNcases[0]
     evalue = None
     for NNcase in NNcases:
         ds_e.appendLinked( self.gen_NN_input(NNcase[1]), None )
         e = self.NN_net.activateOnDataset( ds_e )[0][0]
         if evalue is None: evalue = e
         if self.rank_attrs['sort'].lower()=='ascend':
             if e<evalue:
                 evalue = e
                 nextcase = NNcase
         elif self.rank_attrs['sort'].lower()=='descend':
             if e>evalue:
                 evalue = e
                 nextcase = NNcase
     return nextcase + (evalue, )
Beispiel #24
0
 def NN_get_nextcase(self, NNcases):
     ds_e = SupervisedDataSet(self.NN_input_size, self.NN_target_size)
     nextcase = NNcases[0]
     evalue = None
     for NNcase in NNcases:
         ds_e.appendLinked(self.gen_NN_input(NNcase[1]), None)
         e = self.NN_net.activateOnDataset(ds_e)[0][0]
         if evalue is None: evalue = e
         if self.rank_attrs['sort'].lower() == 'ascend':
             if e < evalue:
                 evalue = e
                 nextcase = NNcase
         elif self.rank_attrs['sort'].lower() == 'descend':
             if e > evalue:
                 evalue = e
                 nextcase = NNcase
     return nextcase + (evalue, )
def createDs(func):
    outputMax = -np.inf
    outputMin = np.inf
    global outputMax
    global outputMin
    ds = SupervisedDataSet(2, 1)
    for j in range(N):
        for i in range(N):
            input = [i, j]
            output = func(j, i)  # math.sqrt(i**2+j**2)
            ds.appendLinked(input, output)
            if output > outputMax:
                outputMax = output
            if outputMin > output:
                outputMin = output
    ds.outputMax = outputMax
    ds.outputMin = outputMin
    return ds
 def train(self, training_set):
     train_points = [self._get_x(t) for t in training_set]
     train_distances = get_elements(self._distances, training_set)
     
     predictor_points = train_distances#[a + b for a, b in zip(train_distances, train_points)]   
     #print(predictor_points)
     
     ensembles = get_elements(self._best_ensemble_by_time, training_set)
     ens_combinations = [self._ens_combinatinons[ens] for ens in ensembles]
     
     print(self._m_count + self.p_count, len(self._models_combinations))
     ds = SupervisedDataSet(self._m_count , len(self._models_combinations))#+ self.p_count, len(self._models_combinations))
     for input_data, target in list(zip(predictor_points, ens_combinations)):
         print(input_data, target)
         ds.appendLinked(input_data, target)
         
     trainer = BackpropTrainer(self._ann, ds)
     trainer.trainEpochs(100)
def createDataset(nInputs,inputSize,nOutputs):
    index = 0
    ds = SupervisedDataSet(inputSize,nOutputs)
    i = 0
    j = 0
    pList =candleGen()
    input = []

    for sub in pList:
        if nInputs == j:
            break
        if i < inputSize:
            input.append(sub[index])
        else:
            ds.appendLinked(input,sub[index])
            input = []
            input.append(sub[index])
            i = 0
            j = j + 1
        i = i + 1
    return ds
def buildDataSet(fTrainSet):
    ds = SupervisedDataSet(len(homeDict) + len(awayDict) + 8, 1)

    for row in fTrainSet:
        homeTeam = [0]*len(homeDict)
        if row[0] in homeDict.keys():
            homeTeam[homeDict[row[0]]] = 1
        else:
            homeTeam[-1] = 1 #"other"
        
        awayTeam = [0]*len(awayDict)
        if row[1] in awayDict.keys():
            awayTeam[awayDict[row[1]]] = 1
        else:
            awayTeam[-1] = 1 #"other"
        
        inVec = homeTeam + awayTeam + row[2:10]
        tarVec = row[10]
                
        ds.appendLinked(inVec, tarVec)
    
    return ds
def createDataset3(pList, nInputs,inputSize,nOutputs):
    index = 1
    ds = SupervisedDataSet(inputSize,nOutputs)
    i = 0
    j =  0


    input = []
    z = 0
    for sub in pList:
        val = normalize(sub[index])
        if nInputs == j:
            break
        elif i < inputSize:
            input.append(val)
            i = i+1
        else:
            ds.appendLinked(input,val)
            input.pop(0)
            input.append(val)
            j = j + 1
    return ds
Beispiel #30
0
def get_data(path_to_data):
	print 'Loading data from', path_to_data
	data_file = open(path_to_data, 'r+b')
	mmap_file = mmap.mmap(data_file.fileno(), 0)
	summary = 0
	header = [int(item) for item in mmap_file.readline().split(' ')]
	counter = 0
	pbar = progressbar.ProgressBar(maxval=header[0])
	pbar.start()
	line = mmap_file.readline()
	data = SupervisedDataSet(header[1], header[2])
	while line != '':
		data_line = [float(item) for item in line.split(' ')]
		line = mmap_file.readline()
		result_line = [float(item) for item in line.split(' ')]
		line = mmap_file.readline()
		data.appendLinked(data_line, result_line)
		counter += 1
		pbar.update(counter)
	pbar.finish()
	print 'Data successfuly loaded'
	return [header, data]
def createDs(func):
    outputMax = -np.inf
    outputMin = np.inf
    global outputMax
    global outputMin
    ds = SupervisedDataSet( 1, 1 )
    #for j in range(N):
    rs = []
    for i in range(N):
        rs.append(random.random())
    rs.sort()
    for r in rs:
        input = r#[i]
        output = nonLinearFunc(r)
        ds.appendLinked(input ,  output)
        if output > outputMax:
            outputMax = output
        if outputMin > output:
            outputMin = output
    ds.outputMax = outputMax
    ds.outputMin = outputMin
    return ds
Beispiel #32
0
def entrenarO(red):
    #Se inicializa el dataset
    ds = SupervisedDataSet(4096,1)

    """Se crea el dataset, para ello procesamos cada una de las imagenes obteniendo las figuras,
       luego se le asignan los valores deseados del resultado la red neuronal."""

    print "O  - Figura"
    for i,c in enumerate(os.listdir(os.path.dirname('C:\\Users\\LuisD\\Desktop\\Reconocimiento\\prueba/'))):
        try:
            im = cv2.imread('C:\\Users\\LuisD\\Desktop\\Reconocimiento\\prueba/'+c)
            cv2.resize(im,(64,64))
            pim = pi.ProcesarImagen(im)
            ds.appendLinked(pim.flatten(),10)
        except:
            pass

    print len(ds)
    print i,c

    trainer = BackpropTrainer(red, ds)
    print "Entrenando hasta converger"
    trainer.trainUntilConvergence()
    NetworkWriter.writeToFile(red, 'rna_o.xml')
Beispiel #33
0
def trainMinError(trainer,dsV,minTrainer=None,batch_size=0,epochs=50,plotErr=False,i0=0):
    dsT = trainer.ds
    if minTrainer == None:
       minTrainer = deepcopy(trainer)
    for i in range(epochs):
        if batch_size==0:
            ds = dsT
        else:
            ds = SupervisedDataSet(len(dsT['input'][0]),len(dsT['target'][0]))
            data = zip(dsT['input'],dsT['target'])
            shuffle(data)
            for k in range(batch_size):
                ds.appendLinked(data[k][0],data[k][1])
        trainer.ds = ds
        trainer.train()
        TE = trainer.testOnData(dsT)
        VE = trainer.testOnData(dsV)
        MVE = minTrainer.testOnData(dsV)
        if VE<MVE:
            minTrainer = BackpropTrainer(deepcopy(trainer.module),dsT)
        if plotErr:
            plotError(i+i0,TE,VE,MVE)
    trainer.ds = dsT
    return minTrainer
Beispiel #34
0
    def fit(self, X, y):
        '''
        Train the model.

        :param X: list of numpy arrays representing the training samples.
        :param y: numpy array representing the training samples' true values.
        '''
        input_dimension = len(X[0])
        self.nnw = buildNetwork(input_dimension,
                                input_dimension * self.hidden_layer_multiplier,
                                1)

        if util.VERBOSE:
            print 'Generating neural network data set:'
            print '\tInput layer dimension: %d' % input_dimension
            print '\tHidden layer dimension: %d ' % (
                input_dimension * self.hidden_layer_multiplier)

        # Create a data set for training samples with input_demnsion number of features, and outputs with dimension 1.
        data_set = SupervisedDataSet(input_dimension, 1)
        for i in xrange(len(y)):
            data_set.appendLinked(X[i], np.array(y[i]))

        if util.VERBOSE:
            print 'Finished generating neural network data set.'
            print 'Starting to train neural network.'

        # Train the neural network with backpropagation on the data set.
        self.trainer = BackpropTrainer(self.nnw, dataset=data_set)

        for i in xrange(3):
            error = self.trainer.train()
            print 'Iteration: %d\tError: %f' % (i, error)

        if util.VERBOSE:
            print 'Finished training neural network.'
Beispiel #35
0
def create_int_dataset(n_input, n_output, codecs):
    ds = SupervisedDataSet(n_input, n_output)
    if n_input == 3 * 1:
        for i in range(0, len(codecs), 1):
            if i + 1 < len(codecs):
                ds.appendLinked(
                    list(codecs[i]),
                    list(codecs[i + 1])
                )
    elif n_input == 3 * 3:
        for i in range(0, len(codecs), 1):
            if i + 3 < len(codecs):
                ds.appendLinked(
                    list(codecs[i] + codecs[i + 1] + codecs[i + 2]),
                    list(codecs[i + 3])
                )
    elif n_input == 3 * 5:
        for i in range(0, len(codecs), 1):
            if i + 6 < len(codecs):
                ds.appendLinked(
                    list(codecs[i] + codecs[i + 1] + codecs[i + 2] + codecs[i + 3] + codecs[i + 4]),
                    list(codecs[i + 5])
                )

    elif n_input == 3 * 8:
        for i in range(0, len(codecs), 1):
            if i + 9 < len(codecs):
                ds.appendLinked(
                    list(codecs[i] + codecs[i + 1] + codecs[i + 2] + codecs[i + 3] + codecs[i + 4] + codecs[i + 5]
                         + codecs[i + 6] + codecs[i + 7]),
                    list(codecs[i + 8])
                )
    else:
        print 'not implemented yet'
        return
    return ds
    print sorted_word_dict[-9:]
    plt.plot(occurrences)
    plt.xlabel('word indices')
    plt.ylabel('occurrences')
    plt.ylim([0, 5000])
    plt.show()

######## Build training set and save to file ############
print "Saving to file..."
#PyBrain has some nice classes to do all this.
from pybrain.datasets import SupervisedDataSet
import numpy as np

DS = SupervisedDataSet(dict_size, 1)

for m_list, target in [[spamlist, 1], [hamlist, 0]]:
    for mail in m_list:
        #each data point is a list (or vector) the size of the dictionary
        wordvector = np.zeros(dict_size)
        #now go through the email and put the occurrences of each word
        #in it's respective spot (i.e. word_dict[word]) in the vector
        for word in mail:
            if word in word_dict:
                wordvector[word_dict[word]] += 1
        DS.appendLinked(np.log(wordvector + 1),
                        [target])  #put word occurrences on a log scale

#TODO: use MySQL instead of csv
DS.saveToFile('dataset.csv')
print "Done."
# Pull out all indicators into a single pandas dataframe. Prefix all rows
# with "LTC"
ltc = d.ltc.combine("LTC")

# take our ltc dataframe, and get targets (prices in next 10 minutes)
# in the form of compound return prices (other options are "PRICES",
# which are raw price movements)
dataset, tgt = dtools.gen_ds(ltc, 1, ltc_opts, "CRT")

# initialize a pybrain dataset
DS = SupervisedDataSet(len(dataset.values[0]), np.size(tgt.values[0]))

# fill it
for i in xrange(len(dataset)):
    DS.appendLinked(dataset.values[i], [tgt.values[i]])

# split 70% for training, 30% for testing
train_set, test_set = DS.splitWithProportion(0.7)

# build our recurrent network with 10 hidden neurodes, one recurrent
# connection, using tanh activation functions
net = RecurrentNetwork()
hidden_neurodes = 10
net.addInputModule(LinearLayer(len(train_set["input"][0]), name="in"))
net.addModule(TanhLayer(hidden_neurodes, name="hidden1"))
net.addOutputModule(LinearLayer(len(train_set["target"][0]), name="out"))
net.addConnection(FullConnection(net["in"], net["hidden1"], name="c1"))
net.addConnection(FullConnection(net["hidden1"], net["out"], name="c2"))
net.addRecurrentConnection(FullConnection(net["out"], net["hidden1"], name="cout"))
net.sortModules()
Beispiel #38
0
def main():
    try:
        while True:
            choice = input(
                "------------------------------------------\n1:All new\n2:Load all\n3:Add new data\n4:predict\n5:Print Data\n6:Recreate components\n7:Save\n8:Normalize\n9:print deNormKey\n10:print normalized data\n11:Generate Predicted Dataset\n12:Print generated DataSet\n13:plot generated DataSet\n14:2D plot\n15:To csv\n16:Exit\n-----------------------------------------\n"
            )
            if (choice == 1):
                global printDS
                printDS = SupervisedDataSet(2, 4)
                createNetwork()
                createDataSet()
                createTrainer()
                i = input("Enter the number of data lines you want to add: ")
                iterAppend(i)
                printDS = deepcopy(DS)
                normalize()
                print("printDS--------------------------------")
                print(printDS)
                print("DS-------------------------------------")
                print(DS)
                trainData()
                saveAll()

            if (choice == 2):
                loadAll()
                trainData()

            if (choice == 3):
                i = input("Enter the number of data lines you want to add: ")
                iterAppend(i)
                trainData()

            if (choice == 4):
                a = input("Enter the first input: ")
                b = input("Enter the second input: ")
                out = []
                out = predict(a, b)
                #remeber to add code that can append the output data to the network

            if (choice == 5):
                printData()

            if (choice == 6):
                inChoice = input("1:New network\n2:New Dataset\n")
                if (inChoice == 1):
                    createNetwork()
                    trainData()
                if (inChoice == 2):
                    createDataSet()
                    i = input(
                        "Enter the number of data lines you want to add: ")
                    iterAppend(i)
                    trainData()

            if (choice == 7):
                saveAll()

            if (choice == 8):
                normalize()

            if (choice == 9):
                printDeNormKey()

            if (choice == 10):
                printNormalizedData()

            if (choice == 11):

                global generatedDataSet
                generatedDataSet = SupervisedDataSet(2, 4)

                ll1 = input("Enter the lower limit of the first input")
                hl1 = input("Enter the higher limit of the first input")
                ll2 = input("Enter the lower limit of the second input")
                hl2 = input("Enter the higher limit of the second input")

                for i1 in np.arange(ll1, hl1, 0.02):
                    for i2 in np.arange(ll2, hl2, 0.5):
                        out = []
                        out = predict(i1, i2)
                        generatedDataSet.appendLinked(
                            [i1, i2], [out[0], out[1], out[2], out[3]])

            if (choice == 12):
                print(generatedDataSet)

            if (choice == 13):
                plotData()

            if (choice == 14):
                plot2D()

            if (choice == 15):
                to_csv()

            if (choice == 16):
                break

    except Exception as e:
        print(str(e))
Beispiel #39
0
def graphNN(ticker, date, epochs, verbose):
    """
    The function builds a data set of stock prices, normalizes that data set, builds a linked data set to
    train the neural network, generates a neural network, trains the network, makes predictions, analyzes the
    predictions against testing data to generate statistics for comparison, and uses the statistics to
    generate graphs as a png file.
    :param ticker: the stock sticker to train and predict on
    :param date: the date to split the data on to create training and testing
    :param epochs: the number of times to train the network
    :param verbose: boolean value for verbose output
    :return tomorrowPrice: the price prediction for tomorrow
    :return totalTime: the total time in seconds it took to train the network on the data set
    :return averageTimePerEpoch: the average time per training run
    :return averagePercentError:the average percent error of the predictions and the testing data
    :return minPercentError:the minimum percent error of the predictions and the testing data
    """
    # request stock prices and split by the specified date to create training and testing data sets
    if verbose: print 'Requesting data...'
    data = getStockPrices(ticker, frequency="daily", update=True)
    trainData, testData = splitByDate(data, date)
    xTrain, yTrain = preprocessStocks(trainData)
    xTest, yTest = preprocessStocks(testData)
    # allocate space for predictions and error values
    fucturePredictions = []
    trainingPredictions = []
    percentError = []
    if verbose: print 'complete.'

    if verbose: print 'Normalizing data...'
    # normalize the values to a percentage of their max values to increase network training speed
    xTrain, yTrain, xTest, yTest, priceScaleFactor, timeScaleFactor = normalize(
        xTrain, yTrain, xTest, yTest)
    if verbose: print 'complete.'

    if verbose: print 'Building dataset...'
    # build a linked data set to allow for training and error calculation
    ds = SupervisedDataSet(1, 1)
    for i in range(0, len(xTrain)):
        ds.appendLinked(xTrain[i], yTrain[i])
    if verbose: print 'complete.'

    if verbose: print 'Buidling network...'
    rnn = buildNetwork(1,
                       3,
                       3,
                       3,
                       3,
                       3,
                       3,
                       3,
                       3,
                       1,
                       bias=True,
                       recurrent=True,
                       hiddenclass=TanhLayer)
    if verbose: print 'complete'

    if verbose: print 'Training network...'
    trainer = BackpropTrainer(rnn, ds, learningrate=0.01)
    totalTime, averageTimePerEpoch, trainerErrorValues, epochTimes = trainNetwork(
        trainer, epochs, verbose)
    if verbose: print 'Training network 100.0% complete.'

    if verbose: print 'Predicting...'
    # prime the network
    for i in xTrain:
        rnn.activate(i)

    # make predictions with network on the training data to show general shape of approximated function
    for i in xTrain:
        trainingPredictions.append(rnn.activate(i))
    # make predictions with the network on the testing data to validate the accuracy of the network
    for i in xTest:
        fucturePredictions.append(rnn.activate(i))

    # predict tomorrow's price
    tomorrowPrice = rnn.activate(xTest[len(xTest) - 1] + 1) * priceScaleFactor
    if verbose: print 'complete.'

    if verbose: print 'Generating graphs...'
    # denormalize
    xTrain, yTrain, xTest, yTest, fucturePredictions, trainingPredictions = denormalize(
        xTrain, yTrain, xTest, yTest, fucturePredictions, trainingPredictions,
        priceScaleFactor, timeScaleFactor)

    # calculate percent error
    for i in range(0, len(yTest)):
        percentError.append((abs(
            (yTest[i] - fucturePredictions[i]) / yTest[i]) * 100))

    # calculates statistics on the analysis of the network
    sumPercentError = sum(percentError)
    averagePercentError = sumPercentError / len(percentError)
    numDataPoints = len(xTrain) + len(xTest)
    minPercentError = min(percentError)

    # generate the graphs and save them to the working directory
    graphOutput(xTrain, yTrain, xTest, yTest, fucturePredictions,
                trainingPredictions, ticker)
    if verbose: print 'complete.'

    # returns
    return tomorrowPrice, numDataPoints, totalTime, averageTimePerEpoch, averagePercentError, minPercentError
Beispiel #40
0
def create_binary_dataset(n_input, n_output, codecs):
    ds = SupervisedDataSet(n_input, n_output)
    if n_input == 11:
        for i in range(0, len(codecs) - n_input % 10, 1):
            ds.appendLinked(ret_Characters(codecs[i]),
                            ret_Characters(codecs[i + 1]))
    elif n_input == 22:
        for i in range(0, len(codecs) - n_input % 10, 1):
            ds.appendLinked(
                ret_Characters(codecs[i]) + ret_Characters(codecs[i + 1]),
                ret_Characters(codecs[i + 2]))

    elif n_input == 33:
        for i in range(0, len(codecs) - n_input % 10, 1):
            ds.appendLinked(
                ret_Characters(codecs[i]) + ret_Characters(codecs[i + 1]) +
                ret_Characters(codecs[i + 2]), ret_Characters(codecs[i + 3]))

    elif n_input == 44:
        for i in range(0, len(codecs) - n_input % 10, 1):
            ds.appendLinked(
                ret_Characters(codecs[i]) + ret_Characters(codecs[i + 1]) +
                ret_Characters(codecs[i + 2]) + ret_Characters(codecs[i + 3]),
                ret_Characters(codecs[i + 4]))
    elif n_input == 55:
        for i in range(0, len(codecs) - n_input % 10, 1):
            ds.appendLinked(
                ret_Characters(codecs[i]) + ret_Characters(codecs[i + 1]) +
                ret_Characters(codecs[i + 2]) + ret_Characters(codecs[i + 3]) +
                ret_Characters(codecs[i + 4]), ret_Characters(codecs[i + 5]))
    elif n_input == 66:
        for i in range(0, len(codecs) - n_input % 10, 1):
            ds.appendLinked(
                ret_Characters(codecs[i]) + ret_Characters(codecs[i + 1]) +
                ret_Characters(codecs[i + 2]) + ret_Characters(codecs[i + 3]) +
                ret_Characters(codecs[i + 4]) + ret_Characters(codecs[i + 5]),
                ret_Characters(codecs[i + 6]))
    elif n_input == 77:
        ds = SupervisedDataSet(n_input, n_output)
        for i in range(0, len(codecs) - n_input % 10, 1):
            ds.appendLinked(
                ret_Characters(codecs[i]) + ret_Characters(codecs[i + 1]) +
                ret_Characters(codecs[i + 2]) + ret_Characters(codecs[i + 3]) +
                ret_Characters(codecs[i + 4]) + ret_Characters(codecs[i + 5]) +
                ret_Characters(codecs[i + 6]), ret_Characters(codecs[i + 7]))

    elif n_input == 88:
        for i in range(0, len(codecs) - n_input % 10, 1):
            ds.appendLinked(
                ret_Characters(codecs[i]) + ret_Characters(codecs[i + 1]) +
                ret_Characters(codecs[i + 2]) + ret_Characters(codecs[i + 3]) +
                ret_Characters(codecs[i + 4]) + ret_Characters(codecs[i + 5]) +
                ret_Characters(codecs[i + 6]) + ret_Characters(codecs[i + 7]),
                ret_Characters(codecs[i + 8]))
    elif n_input == 99:
        for i in range(0, len(codecs) - n_input % 10, 1):
            ds.appendLinked(
                ret_Characters(codecs[i]) + ret_Characters(codecs[i + 1]) +
                ret_Characters(codecs[i + 2]) + ret_Characters(codecs[i + 3]) +
                ret_Characters(codecs[i + 4]) + ret_Characters(codecs[i + 5]) +
                ret_Characters(codecs[i + 6]) + ret_Characters(codecs[i + 7]) +
                ret_Characters(codecs[i + 8]), ret_Characters(codecs[i + 9]))
    return ds
    window[7] = normalize(window[7], max_volume, min_volume)
    window[9] = normalize(window[9], max_volume, min_volume)
    output = n.activate(window)

    for j in range(0, 5):
        prediction = denormalize(output[j], max_price, min_price)
        print prediction, ticks_future[j][0]
        writer.writerow([ticks_future[j][2], prediction, ticks_future[j][0]])

last_five = []

for day in range(0, 100):
    ticks = map(lambda x: data.next(), range(0, 5))
    last_five = map(lambda x: data.next(), range(0, 5))

    DS.appendLinked(*ticks_to_inputs_outputs(ticks, last_five))

with open('predictions.csv', 'wb') as output_file:
    writer = csv.writer(output_file, delimiter=',', quotechar='\"', quoting=csv.QUOTE_MINIMAL)
    
    for i in range(0, 18):
        trainer.trainUntilConvergence(validationProportion=0.55, maxEpochs=1000, verbose=False)

        ticks = map(lambda x: data.next(), range(0, 5))

        predict_next_five(last_five, ticks, writer)

        DS.appendLinked(*ticks_to_inputs_outputs(last_five, ticks))

        last_five = ticks
Beispiel #42
0
class NeuralNetworkRegression(algorithmbase):
    def ExtraParams(self, hiddenlayerscount, hiddenlayernodescount):

        self.hiddenlayerscount = hiddenlayerscount
        self.hiddenlayernodescount = hiddenlayernodescount
        return self

    def PreProcessTrainData(self):
        self.traindata = preprocess_apply(self.traindata,
                                          self.missingvaluemethod,
                                          self.preprocessingmethods)

    def PrepareModel(self, savedmodel=None):

        if savedmodel != None:
            self.trainer = savedmodel
        else:
            attributescount = len(self.traindata[0])
            self.ds = SupervisedDataSet(attributescount, 1)
            for i in range(len(self.traindata)):
                self.ds.appendLinked(self.traindata[i], self.trainlabel[i])

            self.net = FeedForwardNetwork()
            inLayer = LinearLayer(len(self.traindata[0]))
            self.net.addInputModule(inLayer)
            hiddenLayers = []
            for i in range(self.hiddenlayerscount):
                hiddenLayer = SigmoidLayer(self.hiddenlayernodescount)
                hiddenLayers.append(hiddenLayer)
                self.net.addModule(hiddenLayer)
            outLayer = LinearLayer(1)
            self.net.addOutputModule(outLayer)

            layers_connections = []
            layers_connections.append(FullConnection(inLayer, hiddenLayers[0]))
            for i in range(self.hiddenlayerscount - 1):
                layers_connections.append(
                    FullConnection(hiddenLayers[i - 1], hiddenLayers[i]))
            layers_connections.append(
                FullConnection(hiddenLayers[-1], outLayer))

            for layers_connection in layers_connections:
                self.net.addConnection(layers_connection)
            self.net.sortModules()

            #training the self.network
            self.trainer = BackpropTrainer(self.net, self.ds)
            self.trainer.train()

    def PreProcessTestDate(self):
        self.testdata = preprocess_apply(self.testdata,
                                         self.missingvaluemethod,
                                         self.preprocessingmethods)

    def Predict(self):
        prediction = []
        for testrecord in self.testdata:
            prediction.append(self.net.activate(testrecord)[0])

        self.result = [self.testlabel, prediction]

    def GetModel(self):
        return self.trainer
Beispiel #43
0
def train_network(trainer,
                  dataset=None,
                  k_fold=1,
                  bold_driver=False,
                  maxEpochs=1000):
    prev_err = 1000
    out_train = open("./errors/train_MSE.txt", "w")
    out_test = open("./errors/test_MSE.txt", "w")
    out_valid = open("./errors/valid_MSE.txt", "w")
    ptrain = open("./errors/train_progression.txt", "w")
    ptest = open("./errors/test_progression.txt", "w")

    # numero di discese lungo il gradiente ad ogni sessione di train
    test_cont = 0
    train_progression = []
    test_progression = []

    assert isinstance(trainer, myBackpropTrainer)
    net = trainer.module
    if dataset is None:
        dataset = trainer.ds

    assert isinstance(dataset, SupervisedDataSet)
    ds_dim = dataset.getLength()

    n = dataset.getLength() / k_fold
    base = 0
    for i in range(0, ds_dim - (ds_dim % k_fold), n):
        # crea un dataset vuoto per calcolare l'errore di validazione
        ds_test = SupervisedDataSet(net.indim, net.outdim)
        ds_train = SupervisedDataSet(net.indim, net.outdim)

        print 'train ', (base / n) + 1, ' on ', ((ds_dim - (ds_dim % k_fold)) /
                                                 n)

        # costruzione dei datasets di train e test per la cross validation
        for b in range(ds_dim - ds_dim % k_fold):
            if base <= b < (base + n):
                ds_test.appendLinked(*dataset.getLinked(b))
            else:
                ds_train.appendLinked(*dataset.getLinked(b))
        base += n

        # ds_test = dataset
        # ds_train = dataset

        tmp_train, tmp_test = trainer.trainUntilConvergence(
            datasetTrain=ds_train,
            datasetTest=ds_test,
            verbose=False,
            maxEpochs=maxEpochs,
            continueEpochs=maxEpochs / 2)

        # tmp_train, tmp_test = trainer.trainUntilConvergence(maxEpochs=1000, verbose=True,
        #                       continueEpochs=1000, validationProportion=0.30,
        #                       trainingData=ds_train, validationData=ds_test,
        #                       convergence_threshold=10)

        train_progression += tmp_train
        test_progression += tmp_test

        # implementa il bold driver, aggiusta il learning rate in base all'evoluzione del train error
        if bold_driver:
            if (sum(tmp_train) / len(tmp_train)) < prev_err:
                prev_err = (sum(tmp_train) / len(tmp_train))
                trainer.descent.alpha += trainer.descent.alpha * 0.01  # alpha = learning rate
            else:
                prev_err = (sum(tmp_train) / len(tmp_train))
                trainer.descent.alpha -= trainer.descent.alpha * 0.01
                trainer.descent.alpha -= trainer.descent.alpha * 0.5  # alpha = learning rate
            print trainer.descent.alpha
            print sum(tmp_train) / len(tmp_train)

        # testOnData e Validator calcolano lo stesso errore (MSE) in due modi differenti
        # implementato per vedere se le due funzioni si comportano in maniera coerente
        if net.indim % 11 == 0:
            val = evaluate_binary_error(net, ds_test, verbose=False)
        else:
            val = evaluate_int_error(net, ds_test, verbose=True)

        # scrive gli errori su file per permettere che siano pollati in seguito
        out_train.write(str(sum(tmp_train) / len(tmp_train)) + '\n')
        out_valid.write(str(val) + '\n')
        out_test.write(str(sum(tmp_test) / len(tmp_test)) + '\n')

    for i in range(len(train_progression)):
        ptrain.write(str(train_progression[i]) + '\n')
    for i in range(len(test_progression)):
        ptest.write(str(test_progression[i]) + '\n')

    out_train.close()
    out_test.close()
    out_valid.close()
    ptest.close()
    ptrain.close()
Beispiel #44
0
fo01 = open('out', 'w')
fo02 = open('dist', 'w')
fonet = open('net', 'w')

#load mat from matlab
mat = sio.loadmat('Features.mat')
#print(mat)
X = mat['X']
y = mat['y']
length = X.shape[0]

#set data
alldata = SupervisedDataSet(14, 7)
for n in arange(0, length):
    alldata.appendLinked(X[n], y[n])

#split data into test data and training data
tstdata, trndata = alldata.splitWithProportion(0.25)

#build network
fnn = buildNetwork(trndata.indim,
                   100,
                   trndata.outdim,
                   outclass=SigmoidLayer,
                   fast=True)
#print fnn

#build trainer
trainer = BackpropTrainer(fnn,
                          dataset=trndata,
            temp_inputs.append(float(item))
            counter = counter + 1
        else:
            prediction_inputs.append(temp_inputs)
            prediction_outputs.append(float(item))
            temp_inputs = []
            counter = 0


    # for x in outputs:
    #     x = x + 30

    # DS = ClassificationDataSet(20, 1, nb_classes=60)
    DS = SupervisedDataSet(10,1)
    for x, y in zip(inputs, outputs):
        DS.appendLinked(x,y)
        # DS.addSample(x,y)

    # DS._convertToOneOfMany(bounds=[0,1])
    error2 = 0.0
    local_min_error = 100000
    for x in range(0,35):

        if count == 16 and local_min_error > 5.4:
            break

        error2 = error

        fnn = buildNetwork( DS.indim, 30, DS.outdim, hiddenclass = SigmoidLayer, outclass=SoftmaxLayer )
        target = [16,14,9,16,-7,-2,16,-1,-6,8,7,10,-6,-2,12,2,3]
from pybrain.datasets import SupervisedDataSet
DS = SupervisedDataSet(16, 26)
DS.appendLinked([1, 2, 3], [4, 5])
len(DS)
1
Beispiel #47
0
# xTest = xTrain
# yTest = yTrain

xTrain, yTrain, xTest, yTest, priceScaleFactor, timeScaleFactor = normalize(
    xTrain, yTrain, xTest, yTest)
print xTrain
print yTrain
print xTest
print yTest
print priceScaleFactor
print timeScaleFactor

# build data set from x training data and y training data
ds = SupervisedDataSet(1, 1)
for i in range(0, len(xTrain)):
    ds.appendLinked(xTrain[i], yTrain[i])

# number of runs
runs = 5

rnn = buildNetwork(1,
                   3,
                   3,
                   3,
                   3,
                   3,
                   3,
                   3,
                   3,
                   1,
                   bias=True,
Beispiel #48
0
net = buildNetwork(16, 150, 26)

trainingset = open("fulltrainingset.txt", "r")
traininglines = trainingset.read().splitlines()
DS = SupervisedDataSet( 16, 26 )
for line in traininglines:
    line = line.replace("[", "")
    line = line.replace("]", "")
    splitline = line.split("!")
    entries = splitline[0].split(",")
    desired =  splitline[1].split(",")
    entries = list(map(int, entries))
    desired = list(map(int, desired))
    
    DS.appendLinked( entries, desired )
trainer = BackpropTrainer(net, DS, 0.01, momentum=0.02)
for i in range(1, 40):
    print(trainer.train())

testingset = open("fulltestingset.txt", "r")
testinglines = testingset.read().splitlines()
counter = 0
correctanswers = 0
for line in testinglines:
    line = line.replace("[", "")
    line = line.replace("]", "")
    values = line.split("!")
    entries = values[0].split(",")
    values = values[1].split(",")
    entries = list(map(int, entries))
Beispiel #49
0
from deap import base, creator, tools
import random, time

creator.create("FitnessMax", base.Fitness, weights=(1.0, ))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
# Attribute generator
toolbox.register("attr_float", random.uniform, -2, 2)
# Structure initializers
toolbox.register("individual", tools.initRepeat, creator.Individual,
                 toolbox.attr_float, 4)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

DS = SupervisedDataSet(1, 1)
DS.appendLinked([1], [-1])
DS.appendLinked([-0.83], [0.83])
DS.appendLinked([-0.3], [0.3])
DS.appendLinked([0.3], [-0.3])
DS.appendLinked([0.028], [-0.028])
DS.appendLinked([-1], [1])


def calcError(x, y):
    return abs(x - y) / x


def testNetwork(eVar):
    global net  # So that we can look at the network after we're done training
    net = buildNetwork(1, 1, 1, bias=True, outputbias=True)
Beispiel #50
0
from pybrain.supervised.trainers import BackpropTrainer

net = buildNetwork(16, 30, 26)

trainingset = open("sampple-trainingset", "r")
traininglines = trainingset.read().splitlines()
DS = SupervisedDataSet(16, 26)
for line in traininglines:
    line = line.replace("[", "")
    line = line.replace("]", "")
    values = line.split("!")
    entries = values[0].split(",")
    values = values[1].split(",")
    entries = list(map(int, entries))
    values = list(map(int, values))
    DS.appendLinked(entries, values)
trainer = BackpropTrainer(net, DS)
trainer.trainUntilConvergence()
#testingset = open("testingset.txt", "r")
#testinglines = testingset.read().splitlines()
#testing_data = []
#for line in testinglines:
#    line = line.replace("[", "")
#    line = line.replace("]", "")
#    values = line.split("!")
#    entries = values[0].split(",")
#    values = values[1].split(",")
#    entries = list(map(int, entries))
#    values = list(map(int, values))
#    testing_tuple = (entries, values)
#    testing_data.append(testing_tuple)
Beispiel #51
0
DS = SupervisedDataSet(nFeatures, nOutput)
count = 0
for sample in data:
    #
    # Discard the label row
    #
    count = count +1
    if sample[0] == 'gas [m3]':
        continue

    label, x = sample[0], sample[1:]

    #
    # Insert the sample into the Dataset
    #
    DS.appendLinked(x, label)

#
# Divide the dataset in training set and test set
#
#tstdata, DS = DS.splitWithProportion( 0.25 )
trainData, tstdata = splitWithProportion(DS, 0.75 )
print "Number of Dataset patterns: ", len(DS)
print "Number of training patterns: ", len(trainData)
print "Number of test patterns: ", len(tstdata)
print "Input and output dimensions: ", trainData.indim, trainData.outdim
print "number of units in hidden layer: ", nNeurons

#
# Build network with
#
# Pull out all indicators into a single pandas dataframe. Prefix all rows
# with "LTC"
ltc = d.ltc.combine("LTC")

# take our ltc dataframe, and get targets (prices in next 10 minutes)
# in the form of compound return prices (other options are "PRICES",
# which are raw price movements)
dataset, tgt = dtools.gen_ds(ltc, 1, ltc_opts, "CRT")

# initialize a pybrain dataset
DS = SupervisedDataSet(len(dataset.values[0]), np.size(tgt.values[0]))

# fill it
for i in xrange(len(dataset)):
    DS.appendLinked(dataset.values[i], [tgt.values[i]])

# split 70% for training, 30% for testing
train_set, test_set = DS.splitWithProportion(.7)

# build our recurrent network with 10 hidden neurodes, one recurrent
# connection, using tanh activation functions
net = RecurrentNetwork()
hidden_neurodes = 10
net.addInputModule(LinearLayer(len(train_set["input"][0]), name="in"))
net.addModule(TanhLayer(hidden_neurodes, name="hidden1"))
net.addOutputModule(LinearLayer(len(train_set["target"][0]), name="out"))
net.addConnection(FullConnection(net["in"], net["hidden1"], name="c1"))
net.addConnection(FullConnection(net["hidden1"], net["out"], name="c2"))
net.addRecurrentConnection(
    FullConnection(net["out"], net["hidden1"], name="cout"))
Beispiel #53
0
#odom_dict = csv_to_dict('out/odom.csv')
#print odom_dict
#
#r1_rssi_dict = csv_to_dict('out/r1_rssi.csv')
#print r1_rssi_dict
#
#r1_odom_rssi_dict = csv_to_dict('out/r1_odom_rssi.csv')
#print r1_odom_rssi_dict


time,odom,rssi = csv_to_arrays('out/r1_time_odom_rssi.csv')


trndata = SupervisedDataSet( 4, 1 )
trndata.appendLinked( [1,2,3,4], [5] )
print len(trndata)
print trndata['input']


#trndata.addSample((-15,-85,-25,-75), (0))
#trndata.addSample((-70,-70,-35,-35), (5))
#trndata.addSample((-85,-15,-75,-25), (50))

for i in range(len(odom)):
    trndata.addSample(rssi[i], odom[i])


print "Number of training patterns: ", len(trndata)
print "Input and output dimensions: ", trndata.indim, trndata.outdim
print "First sample (input, target, class):"
Beispiel #54
0
        VALID = 100
        TEST = 100
        INPUT_SIZE = 30

        train, valid, test = data[:TRAIN], data[TRAIN - INPUT_SIZE:TRAIN +
                                                VALID], data[TRAIN -
                                                             INPUT_SIZE +
                                                             VALID:TRAIN +
                                                             VALID + TEST]

        trainds = SupervisedDataSet(INPUT_SIZE, 1)
        testds = SupervisedDataSet(INPUT_SIZE, 1)
        validds = SupervisedDataSet(INPUT_SIZE, 1)

        for i in range(INPUT_SIZE, train.shape[0]):
            trainds.appendLinked(train[i - INPUT_SIZE:i], train[i])

        for i in range(INPUT_SIZE, test.shape[0]):
            testds.appendLinked(test[i - INPUT_SIZE:i], test[i])

        for i in range(INPUT_SIZE, valid.shape[0]):
            validds.appendLinked(valid[i - INPUT_SIZE:i], valid[i])

        THREADS = 4
        hidden_range = [4, 32]
        eta_range = [0.0001, 10.0]
        activation_func = [SigmoidLayer, TanhLayer]
        lamda_range = [1e-7, 1e-5]
        epochs_factor = 1

        besthparams = []
Beispiel #55
0
class NET():
    def __init__(self, arg):
        self.inputsize = arg[0]
        self.outputsize = arg[-1]
        self.hiden = arg[1:-1]
        self.err = 1
        self.old_err = 1
        b = []
        b.append(self.inputsize)
        b += self.hiden
        b.append(self.outputsize)
        #print b#"%s, %s, %s, hiddenclass=TanhLayer"%(self.inputsize, self.hiden, self.outputsize)
        self.net = FeedForwardNetwork()
        self.inputlayer = LinearLayer(self.inputsize, "Input")
        self.net.addInputModule(self.inputlayer)
        self.outputlayer = LinearLayer(self.outputsize, "Output")
        self.net.addOutputModule(self.outputlayer)
        self.hidenlayers = []
        for i in xrange(len(self.hiden)):
            self.hidenlayers.append(SigmoidLayer(self.hiden[i], "hiden%s" % i))
            self.net.addModule(self.hidenlayers[-1])
        self.net.addConnection(
            FullConnection(self.inputlayer, self.outputlayer))
        for i in xrange(len(self.hidenlayers)):
            self.net.addConnection(
                FullConnection(self.inputlayer, self.hidenlayers[i]))
            self.net.addConnection(
                FullConnection(self.hidenlayers[i], self.outputlayer))
        for i in xrange(len(self.hidenlayers)):
            for j in xrange(i + 1, len(self.hidenlayers)):
                self.net.addConnection(
                    FullConnection(self.hidenlayers[i], self.hidenlayers[j]))
                #self.print_conections(self.net)
        self.net.sortModules()
        self.ds = SupervisedDataSet(self.inputsize, self.outputsize)

    def Update(self, hiden, h):
        self.net = FeedForwardNetwork()
        self.inputlayer = LinearLayer(self.inputsize, "Input")
        self.net.addInputModule(self.inputlayer)
        self.outputlayer = LinearLayer(self.outputsize, "Output")
        self.net.addOutputModule(self.outputlayer)
        self.hidenlayers = []
        for i in xrange(len(hiden)):
            self.hidenlayers.append(SigmoidLayer(hiden[i], "hiden%s" % i))
            self.net.addModule(self.hidenlayers[-1])
        self.net.addConnection(
            FullConnection(self.inputlayer, self.outputlayer))
        for i in xrange(len(self.hidenlayers)):
            self.net.addConnection(
                FullConnection(self.inputlayer, self.hidenlayers[i]))
            self.net.addConnection(
                FullConnection(self.hidenlayers[i], self.outputlayer))
        for i in xrange(len(self.hidenlayers)):
            for j in xrange(i + 1, len(self.hidenlayers)):
                if i < h:
                    self.net.addConnection(
                        FullConnection(self.hidenlayers[i],
                                       self.hidenlayers[j]))
                elif i == h:
                    self.net.addConnection(
                        FullConnection(self.hidenlayers[i],
                                       self.hidenlayers[j],
                                       inSliceTo=hiden[i] - 1))
                else:
                    self.net.addConnection(
                        FullConnection(self.hidenlayers[i],
                                       self.hidenlayers[j]))
                #self.print_conections(self.net)
        self.net.sortModules()
        self.hiden = hiden

    def print_conections(self, n):
        print("BEGIN")
        for mod in n.modules:
            print(mod)
            for conn in n.connections[mod]:
                print(conn)
                for cc in range(len(conn.params)):
                    print(conn.whichBuffers(cc), conn.params[cc])
        print("END")

    def AddData(self, datainput, dataoutput, learningrate):
        if len(dataoutput) != len(datainput):
            print("Not equals data", len(dataoutput), len(datainput))
            return 1
        self.ds = SupervisedDataSet(self.inputsize, self.outputsize)
        for i in xrange(len(dataoutput)):
            self.ds.appendLinked(datainput[i], dataoutput[i])
        self.trainer = BackpropTrainer(self.net,
                                       dataset=self.ds,
                                       learningrate=learningrate)
        return 0

    def TrainNet(self, epoch, error):

        if epoch <= 5:
            epoch = 5
        i = 0
        count = 0
        while i < epoch:
            if error == self.err:
                break
            self.err = self.trainer.train()
            if self.err == self.old_err:
                count += 1
            else:
                count = 0
            if count == 3:
                self.err = self.old_err
                return (self.err, 1)
            self.old_err = self.err
            i += 1
        #self.SaveNet('%s  %s_%s_%s.work'%(self.err, self.inputsize, self.hiden, self.outputsize))
        return [self.err, 0]

    def TrainNetOnce(self):

        self.err = self.trainer.train()

        return self.err

    def SaveNet(self, filename=None):
        if filename == None:
            NetworkWriter.writeToFile(
                self.net, '%s  %s_%s_%s.xml' %
                (self.err, self.inputsize, self.hiden, self.outputsize))
        else:
            NetworkWriter.writeToFile(self.net, filename)

    def LoadNet(self, fname):
        self.net = NetworkReader.readFrom(fname)
        tree = ET.parse(fname)
        x = tree.getroot()
        l = []
        for modules in x.findall('Network/Modules/SigmoidLayer/dim'):
            l.append(int(modules.get("val")))
        self.hiden = l[:]
        self.inputsize = self.net.indim
        self.outputsize = self.net.outdim

    def TestNet(self, inp):
        if len(inp) != self.inputsize:
            return 0
        return self.net.activate(inp[:])

    def UpdateWeights(self, f1, f2=None):
        n = NetworkReader.readFrom(f1)
        if f2 != None:
            n2 = NetworkReader.readFrom(f2)

        def DictParams(n):
            l1 = []
            for mod in n.modules:
                l = []
                for conn in n.connections[mod]:

                    if conn.paramdim > 0:

                        l.append([conn.outmod.name, conn.params])
                d = dict(l)
                l1.append([mod.name, d])
            d1 = dict(l1)
            return d1

        d1 = DictParams(n)
        if f2 != None:
            d2 = DictParams(n2)
        d3 = DictParams(self.net)

        params = np.array([])
        if f2 != None:
            for i in d2:
                for j in d2[i]:
                    try:
                        b = d3[i][j][:]
                        b[:d2[i][j].size] = d2[i][j][:]
                        d3[i].update({j: b})
                    except:
                        pass
        for i in d1:
            for j in d1[i]:
                try:
                    b = d3[i][j][:]
                    b[:d1[i][j].size] = d1[i][j][:]
                    d3[i].update({j: b})
                except:
                    pass
        for i in d3["Input"]:
            params = np.hstack((params, d3["Input"][i]))
        for i in xrange(len(self.hiden)):
            for j in d3["hiden%s" % i]:
                params = np.hstack((params, d3["hiden%s" % i][j]))
        self.net._setParameters(params)
Beispiel #56
0
from pybrain.structure import RecurrentNetwork, LinearLayer, FullConnection, SigmoidLayer

import time

top = 1000
features = []
for i in range(0, top):
    features.append(i)

labels = []
for i in range(0, top):
    labels.append((i / 2.0))

ds = SupervisedDataSet(1, 1)
for i in range(0, top):
    ds.appendLinked(features[i], labels[i])

#TrainDS, TestDS = ds.splitWithProportion(0.8)

# for input, target in ds:
#    print input, target

# net = buildNetwork(1, 3, 1, bias=True, hiddenclass=TanhLayer)

# rnn = RecurrentNetwork()
rnn = buildNetwork(1, 25, 1, bias=True, recurrent=False, hiddenclass=TanhLayer)
# rnn.addInputModule(LinearLayer(1, 'in'));
# rnn.addModule(SigmoidLayer(25,'hidden'));
# rnn.addOutputModule(LinearLayer(1,'out'));
# rnn.addConnection(FullConnection(rnn['in'],rnn['hidden'],'feed'));
# rnn.addConnection(FullConnection(rnn['hidden'],rnn['out'],'give'));
Beispiel #57
0
# Initialize training data form
numInput = 2  # Number of input features
ds = SupervisedDataSet(numInput, 1)

# Load training data from text file (comma separated)
trainingDataFile = 'regrData.txt'
tf = open(trainingDataFile, 'r')
for line in tf.readlines():
    # Split the values on the current line, and convert to float
    tfData = [float(x) for x in line.strip().split(',') if x != '']
    inData = tuple(tfData[:numInput])  # Grab first numInput values
    outData = tuple(tfData[numInput:])  # Grab the rest

    # Add the data to the datasets
    ds.appendLinked(inData, outData)

# -------
# Build a feed forward neural network (that can have large output)
# -------
from pybrain.structure import SigmoidLayer, LinearLayer
from pybrain.tools.shortcuts import buildNetwork
numHidden = 100
net = buildNetwork(
    ds.indim,  # Number of input units
    numHidden,  # Number of hidden units
    ds.outdim,  # Number of output units
    bias=True,
    hiddenclass=SigmoidLayer,
    outclass=LinearLayer  # Allows for a large output
)
Beispiel #58
0
    def build_dataset(self):

        if os.path.isfile(self.dataset_file):
            with open(self.dataset_file, "rb") as f:
                dataset = cPickle.load(f)
        else:
            dataset = SupervisedDataSet(len(features), 1)

        if os.path.isfile(self.done_articles_file):
            with open(self.done_articles_file, "rb") as f:
                done_articles = cPickle.load(f)
        else:
            done_articles = {}

        value = -1
        decision = "y"

        for file_name in os.listdir(self.articles_dir):
            print "\n\n"
            print "---" * 10
            decision = raw_input("Do another article? [y/n] ")
            if decision[0].lower() != "y":
                break

            with open("articles/" + file_name) as article:
                text = ""
                first = True
                for line in article.readlines()[1:]:
                    text += line
                sentences = tokenize(text, "sentence", return_spans=False)

                article_position = done_articles.get(file_name, 0)
                if article_position >= len(sentences):
                    continue

                print "Looking at:", file_name, "from position", article_position

                for sentence in sentences[article_position:]:
                    extractor = FeatureExtractor(sentence)
                    vectors = extractor.get_feature_vectors(
                        features, "sentence")[0]
                    print sentence

                    value = -1
                    while value == -1:
                        rating = raw_input("nothing=OK, space=bad, q=quit: ")
                        if rating == "":
                            value = [0]
                        elif rating[:1].lower() == "q":
                            value = None
                        elif rating[:1] == " ":
                            value = [1]

                    # quit on q
                    if value == None:
                        break

                    dataset.appendLinked(vectors, value)
                    done_articles[file_name] = done_articles.get(file_name,
                                                                 0) + 1

        with open(self.dataset_file, "wb") as f:
            cPickle.dump(dataset, f)
        with open(self.done_articles_file, "wb") as f:
            cPickle.dump(done_articles, f)
Beispiel #59
0
n = FeedForwardNetwork()
n.addInputModule(LinearLayer(1, name='in'))
n.addInputModule(BiasUnit(name='bias'))
n.addModule(TanhLayer(3, name='gotan'))
n.addOutputModule(LinearLayer(1, name='out'))
n.addConnection(FullConnection(n['bias'], n['gotan']))
n.addConnection(FullConnection(n['in'], n['gotan']))
n.addConnection(FullConnection(n['gotan'], n['out']))
n.sortModules()

# initialize the backprop trainer and train
t = BackpropTrainer(n, learningrate=0.1, momentum=0.0, verbose=True)
#DATASET

DS = SupervisedDataSet(1, 1)
X = random.rand(100, 1) * 100
Y = X**3 + random.rand(100, 1) * 5
maxy = float(max(Y))
maxx = 100.0

for r in range(X.shape[0]):
    DS.appendLinked((X[r] / maxx), (Y[r] / maxy))

t.trainOnDataset(DS, 200)

plt.plot(X, Y, '.b')
X = [[i] for i in arange(0, 100, 0.1)]
Y = list(map(lambda x: n.activate(array(x) / maxx) * maxy, X))
plt.plot(X, Y, '-g')
#