Ejemplo n.º 1
0
    def handle(self, *args, **options):
        better_thans = BetterThan.objects.all() #.filter(pk__lte=50)

        ds = SupervisedDataSet(204960, 1)
        for better_than in better_thans:
            bt = imread(better_than.better_than.image.file)
            wt = imread(better_than.worse_than.image.file)
            better_than.better_than.image.file.close()
            better_than.worse_than.image.file.close()

            # bt = filters.sobel(bt)
            # wt = filters.sobel(wt)

            bt_input_array = np.reshape(bt, (bt.shape[0] * bt.shape[1]))
            wt_input_array = np.reshape(wt, (wt.shape[0] * wt.shape[1]))
            input_1 = np.append(bt_input_array, wt_input_array)
            input_2 = np.append(wt_input_array, bt_input_array)
            ds.addSample(np.append(bt_input_array, wt_input_array), [-1])
            ds.addSample(np.append(wt_input_array, bt_input_array), [1])
        
        net = buildNetwork(204960, 2, 1)

        train_ds, test_ds = ds.splitWithProportion(options['train_test_split'])
        _, test_ds = ds.splitWithProportion(options['test_split'])

        trainer = BackpropTrainer(net, ds)

        print 'Looking for -1: {0}'.format(net.activate(np.append(bt_input_array, wt_input_array)))
        print 'Looking for 1: {0}'.format(net.activate(np.append(wt_input_array, bt_input_array)))

        trainer.train()

        print 'Looking for -1: {0}'.format(net.activate(np.append(bt_input_array, wt_input_array)))
        print 'Looking for 1: {0}'.format(net.activate(np.append(wt_input_array, bt_input_array)))
Ejemplo n.º 2
0
def fnn3(frame,string):
    df=frame.dropna(axis=0,how='any')
    name=string
    nm1=name+"tst1.png"
    nm2=name+"cov1.png"
    x=df.drop(columns=['gvkey','date','price'])
    x=np.array(x)
    x=normalize(x, axis=0, norm='max')
    y=np.array(df['ret'])
    x=np.delete(x,1,axis=1)
    xdim=x.shape[1]
    ydim=1
    DS=SupervisedDataSet(xdim,ydim)
    for i in range(len(x)):
        DS.addSample(x[i],y[i])
    dataTrain, dataTest = DS.splitWithProportion(0.8)
    dataPlot, datadrop =DS.splitWithProportion(0.002)
    xTrain, yTrain = dataTrain['input'],dataTrain['target']
    xTest, yTest = dataTest['input'], dataTest['target']
    xPlot, yPlot= dataPlot['input'], dataPlot['target']
    fnn=buildNetwork(xdim,xdim+1,xdim+2,int(0.5*(xdim+1)),ydim,hiddenclass=TanhLayer,outclass=LinearLayer)
    trainer=BackpropTrainer(fnn,dataTrain,learningrate=0.000000001,verbose=True)
    err_train, err_valid =trainer.trainUntilConvergence(maxEpochs=100)
    
    tstresult = percentError( trainer.testOnClassData(), dataTest['target'] )
    print("epoch: %4d" % trainer.totalepochs, " test error: %5.2f%%" % tstresult)
    
    predict_resutl=[]
    for i in np.arange(len(xPlot)):
        predict_resutl.append(fnn.activate(xPlot[i])[0])
    print(predict_resutl)
    
    #yTest2=yTest([0:len(yTest):12])
    #pred2=predict_resutl([0:len(predict_resutl):12])
    
    plt.figure(figsize=(30,6), dpi=600)
    plt.xlabel("Test Timeline")
    plt.ylabel("Result")
    plt.plot(np.arange(0,len(xPlot)), yPlot,'ko-', label='true number')
    plt.plot(np.arange(0,len(xPlot)), predict_resutl,'ro--', label='predict number')
    lgnd1=plt.legend()
    plt.savefig(nm1,dpi=600, bbox_extra_artists=(lgnd1))
    
    plt.figure(figsize=(9,9), dpi=600)
    plt.plot(err_train,'b',label='train_err')
    plt.plot(err_valid,'r',label='valid_err')
    plt.xlabel("Training Times")
    plt.ylabel("Total Error")
    lgnd2=plt.legend()
    plt.savefig(nm2,dpi=600, bbox_extra_artists=(lgnd2))
    plt.show()
    
    return
    
    fnn3(Apro,"Apro")
    fnn3(Aval,"Aval")
    fnn3(Amom,"Amom")
    fnn3(Atra,"Atra")
    fnn3(Afd,"Afd")
    fnn3(Atec,"Atec")
Ejemplo n.º 3
0
def buildDataset(inpts, targets):
    i = 0
    ds = SupervisedDataSet(12, 1)
    while i != len(inpts):
        ds.addSample(inpts[i], targets[i])
        i = i + 1
    return ds.splitWithProportion(0.75)
Ejemplo n.º 4
0
    def classicNeuralNetwork(self, features, labels, autoencoder=False):
        dataSet = SupervisedDataSet(features.shape[1], 1)
        dataSet.setField('input', features)
        if autoencoder: labels = features
        dataSet.setField('target', labels)
        tstdata, trndata = dataSet.splitWithProportion(0.25)
        print features.shape
        simpleNeuralNetwork = _buildNetwork(\
                                    (LinearLayer(features.shape[1],'in'),),\
                                    (SigmoidLayer(20,'hidden0'),),\
                                    (LinearLayer(labels.shape[1],'out'),),\
                                    bias=True)
        trainer = BackpropTrainer(simpleNeuralNetwork,
                                  dataset=trndata,
                                  verbose=True)  #, momentum=0.1)
        trainer.trainUntilConvergence(maxEpochs=15)

        trnresult = percentError(trainer.testOnData(dataset=trndata),
                                 trndata['target'])
        tstresult = percentError(trainer.testOnData(dataset=tstdata),
                                 tstdata['target'])

        print "epoch: %4d" % trainer.totalepochs, \
          "  train error: %5.2f%%" % trnresult, \
          "  test error: %5.2f%%" % tstresult

        self.neuralNetwork = simpleNeuralNetwork
Ejemplo n.º 5
0
 def _buildDataset(self, inpts, targets):
     i = 0
     ds = SupervisedDataSet(len(inpts[0]), len(targets[0]))
     while i != len(inpts):
         ds.addSample(inpts[i], targets[i])
         i = i + 1
     return ds.splitWithProportion(1)
Ejemplo n.º 6
0
def dsBuild(data):
    ds = SupervisedDataSet(6, 1)
    for ele in data:
        ds.addSample((ele[0], ele[1], ele[2], ele[3], ele[4], ele[5]),
                     (ele[6]))
    dsTrain, dsTest = ds.splitWithProportion(0.8)
    return dsTrain, dsTest
Ejemplo n.º 7
0
def makeNet(learning_rate):
    ds = SupervisedDataSet(20, 20)
    with open('data/misspellingssmall.csv', 'rbU') as f:
        reader = csv.reader(f)
        for row in reader:
            ds.addSample(convert(row[0]), convert(row[1]))

    testds, trainds = ds.splitWithProportion(0.2)

    net = buildNetwork(20, 20, 20)
    trainer = BackpropTrainer(net, dataset=trainds, learningrate=learning_rate)

    myscore = float("inf")
    i = 0
    while myscore > 5:
        i += 1

        trainer.train()
        #trainer.trainEpochs(5)
        #trainer.trainUntilConvergence(verbose=True)

        myscore = score(net, testds)
        print "Epoch #" + str(i) + ": " + str(myscore) + " (" + unconvert(
            net.activate(convert("ecceptable"))) + ")"

    global lastNet
    lastNet = net

    print "Network done with score " + str(myscore)

    return score
Ejemplo n.º 8
0
def makeNet(learning_rate):
	ds = SupervisedDataSet(20, 20)
	with open('data/misspellingssmall.csv', 'rbU') as f:
		reader = csv.reader(f)
		for row in reader:
			ds.addSample(convert(row[0]),convert(row[1]))

	testds, trainds = ds.splitWithProportion(0.2)

	net = buildNetwork(20, 20, 20)
	trainer = BackpropTrainer(net, dataset=trainds, learningrate=learning_rate)
	
	myscore = float("inf")
	i = 0
	while myscore > 5:
		i += 1

		trainer.train()
		#trainer.trainEpochs(5)
		#trainer.trainUntilConvergence(verbose=True)

		myscore = score(net, testds)
		print "Epoch #" + str(i) + ": " + str(myscore) + " (" + unconvert(net.activate(convert("ecceptable"))) + ")"

	global lastNet
	lastNet = net

	print "Network done with score " + str(myscore)
	
	return score
Ejemplo n.º 9
0
    def get_portion(self, portion=1.00):
        num_portion = int(self.tot_size * portion)
        data_portion = SupervisedDataSet(self.in_dim, self.out_dim)
        for ind in xrange(num_portion):
            data_portion.addSample(copy.deepcopy(self.all_data['input'][ind]),
                                   copy.deepcopy(self.all_data['target'][ind]))

        tst_portion, trn_portion = data_portion.splitWithProportion(
            self.split_proportion)

        self.portion["training"] = trn_portion
        self.portion["test"] = tst_portion
Ejemplo n.º 10
0
def fnn_datasets(data_x, label_y, train_test_rate):
    input_demension = np.shape(data_x)[1]
    target_demension = np.shape(label_y)[1]
    print input_demension, target_demension
    DS = SupervisedDataSet(input_demension,
                           target_demension)  #定义数据集的格式是三维输入,一维输出
    for i in range(np.shape(data_x)[0]):
        DS.addSample(data_x[i], label_y[i])
    dataTrain, dataTest = DS.splitWithProportion(train_test_rate)
    #xTrain, yTrain = dataTrain['input'], dataTrain['target']
    #xTest, yTest = dataTest['input'], dataTest['target']
    return dataTrain, dataTest
Ejemplo n.º 11
0
    def __init__(self,
                 in_dim,
                 out_dim,
                 size=1000,
                 means=None,
                 covas=None,
                 split_proportion=0.25):
        if means == None or covas == None:
            means = []
            covas = []

            for i in xrange(in_dim):

                ### randomMeans
                sign_value = random.choice([-1, 1])
                means.append(random.random() * 10 * sign_value)

                ### randomCovas
                size_value = 3
                covas.append(random.random() * size_value)

        self.in_dim = in_dim
        self.out_dim = out_dim

        means = tuple(means)
        covas = diag(covas)
        entro = math.log(det(covas))

        self.means = means
        self.covas = covas
        self.entro = entro

        # TODO: investigate what the output function actually looks like
        all_data = SupervisedDataSet(in_dim, out_dim)
        for n in xrange(size):
            in_datum = multivariate_normal(means, covas)
            out_datum = []
            for z in xrange(out_dim):
                start_ind = z * (in_dim / out_dim)
                end_ind = (z + 1) * (in_dim / out_dim) + 1
                val = math.sin(sum(in_datum[start_ind:end_ind]))
                out_datum.append(val)
            all_data.addSample(in_datum, out_datum)

        tst_data, trn_data = all_data.splitWithProportion(split_proportion)

        self.tot_size = size
        self.all_data = all_data
        self.tst_data = tst_data
        self.trn_data = trn_data
        self.portion = {"training": None, "test": None}

        self.split_proportion = split_proportion
Ejemplo n.º 12
0
def buildDS(tag_num, data):
    print "Building data set..."
    # feature: 3
    ds = SupervisedDataSet(tag_num * 3, tag_num)
    
    for ele in data:
        #ds.addSample((ele[4], ele[5], ele[6], ele[7], ele[8], ele[9], ele[10], ele[11], ele[12], ele[13], ele[14], ele[15]), (ele[0], ele[1], ele[2], ele[3]))  # 4-tag_num * 3: prob of each feature (in), 0-3: tag info (out)
        ds.addSample(ele[tag_num:], ele[:tag_num])
    
    # split to training and setting
    dsTrain, dsTest = ds.splitWithProportion(0.8)
    return dsTrain, dsTest
class NNet(object):
    def __init__(self):
        self.net = buildNetwork(2, 4, 2, bias=True)
        self.net.randomize()
        print self.net
        self.ds = SupervisedDataSet(2,2)
        self.trainer = BackpropTrainer(self.net, self.ds, learningrate = 0.1, momentum=0.99)
    def addTrainDS(self, data1, data2, max):
        for x in [1,2]:
            norm1 = self.normalize(data1,max)
            norm2 = self.normalize(data2,max)
        for x in range(len(norm1)):
            self.ds.addSample(norm1[x], norm2[x])
    def train(self):
        print "Training"
        # print self.trainer.train()
        trndata, tstdata = self.ds.splitWithProportion(.25)
        self.trainer.trainUntilConvergence(verbose=True,
                                           trainingData=trndata,
                                           validationData=tstdata,
                                           validationProportion=.3,
                                           maxEpochs=500)
        # self.trainer.trainOnDataset(trndata,500)
        self.trainer.testOnData(tstdata, verbose= True)

    def activate(self, data):
        for x in data:
            self.net.activate(x)

    def normalize(self, data, max):
        normData = np.zeros((len(data), 2))
        for x in [0,1]:
            for y in range(len(data)):
                val = data[y][x]
                normData[y][x] = (val)/(max[x])
        # print normData
        return normData

    def denormalize(self, data, max):
        deNorm = np.zeros((len(data), 2))
        for x in [0,1]:
            for y in range(len(data)):
                val = data[y][x]
                deNorm[y][x] = val*max[x]
        return deNorm

    def getOutput(self, mat, max):
        norm = self.normalize(mat, max)
        out = []
        for val in norm:
            out.append(self.net.activate(val))
        return self.denormalize(out, max)
Ejemplo n.º 14
0
def getDataSet():
    X, Y = getFeatures()
    features = len(X[0])
    cases = len(X)

    DS = SupervisedDataSet(features, 1)

    i = 0
    while (i < cases):
        DS.addSample(X[i], Y[i])
        i += 1

    TrainDS, TestDS = DS.splitWithProportion(0.7)

    return TrainDS, TestDS
Ejemplo n.º 15
0
def learn(input, output):
    """
    Learn nn from data.
    """
    nn = RecurrentNeuralNetwork(13, 4)
    dataset = SupervisedDataSet(13, 4)
    for ins, out in zip(input, output):
        dataset.addSample(ins, out)

    learning, validating = dataset.splitWithProportion(0.8)
    nn.set_learning_data(learning)
    nn.train(75)

    result = nn.calculate(validating)

    return result, validating['target']
Ejemplo n.º 16
0
def get_train_data():
    # definite the dataset as two input , one output
    DS = SupervisedDataSet(2, 1)

    u1, u2, y = _generate_data()
    # add data element to the dataset
    for i in np.arange(199):
        DS.addSample([u1[i], u2[i]], [y[i + 1]])

    # you can get your input/output this way
    # X = DS['input']
    # Y = DS['target']

    # split the dataset into train dataset and test dataset
    dataTrain, dataTest = DS.splitWithProportion(0.8)

    return dataTrain, dataTest
Ejemplo n.º 17
0
def neural_network_converg(data, target, network):
    DS = SupervisedDataSet(len(data[0]), 1)
    nn = buildNetwork(len(data[0]), 7, 1, bias = True, hiddenclass = SigmoidLayer, outclass = LinearLayer) 
    for d, t in zip(data, target):
         DS.addSample(d,t)
    Train, Test = DS.splitWithProportion(0.9)
    #data_train = Train['input']
    data_test = Test['input']
    #target_train = Train['target']
    target_test = Test['target']
    bpTrain = BackpropTrainer(nn,Train, verbose = True)
    #bpTrain.train()
    bpTrain.trainUntilConvergence(maxEpochs = 10)
    p = []
    for d_test in data_test:
        p.append(nn.activate(d_test))
        
    rmse_nn = sqrt(np.mean((p - target_test)**2)) 
    print(rmse_nn) 
Ejemplo n.º 18
0
    def fit(self, x, y, params):
        from pybrain.datasets import SupervisedDataSet
        from pybrain.tools.shortcuts import buildNetwork
        from pybrain.supervised.trainers import BackpropTrainer
        from pybrain.structure import TanhLayer, LinearLayer
        from sklearn.preprocessing import MinMaxScaler
        '''
        进行归一化,将特征X和标签y都归一化到同一维度上
        '''
        scale_x = MinMaxScaler().fit(x.values.reshape(x.shape[0], -1))
        x_min_max = scale_x.transform(x.values.reshape(x.shape[0], -1))
        scale_y = MinMaxScaler().fit(y.values.reshape(y.shape[0], -1))
        y_min_max = scale_y.transform(y.values.reshape(y.shape[0], -1))

        num = x.shape[0]
        x_dim = x.shape[1]
        try:
            y_dim = y.shape[1]
        except:
            y_dim = 1
        model = buildNetwork(x_dim,
                             4,
                             16,
                             y_dim,
                             bias=True,
                             hiddenclass=TanhLayer,
                             outclass=LinearLayer)
        data_set = SupervisedDataSet(x_dim, y_dim)
        for i in range(num):
            data_set.addSample(x_min_max[i], y_min_max[i])
        train, test = data_set.splitWithProportion(0.99)
        trainer = BackpropTrainer(model,
                                  dataset=train,
                                  learningrate=0.02,
                                  lrdecay=1.0,
                                  momentum=0,
                                  verbose=True)
        trainingErrors, validationErrors = trainer.trainUntilConvergence(
            maxEpochs=15)
        model.dims = (x_dim, y_dim)
        model.scale_x = scale_x
        model.scale_y = scale_y
        return model
Ejemplo n.º 19
0
Archivo: nn.py Proyecto: dmand/ann
    def build_pybrain_dataset(self):
        field_count = len(dataset.fields)

        diagnoses_count = len(self.diagnoses)

        supervised_dataset = SupervisedDataSet(field_count, diagnoses_count)
        # supervised_dataset = ClassificationDataSet(field_count,
        #                                            diagnoses_count,
        #                                            nb_classes=diagnoses_count,
        #                                            class_labels=self.diagnoses)

        for sample in self.data:
            input = self.make_input(sample)
            diagnosis = sample['Диагноз']
            target = self.make_target(diagnosis)
            supervised_dataset.addSample(input, target)

        self.supervised_dataset = supervised_dataset
        # self.training_dataset = supervised_dataset
        # self.testing_dataset = supervised_dataset
        self.training_dataset, self.testing_dataset = supervised_dataset.splitWithProportion(0.7)
Ejemplo n.º 20
0
def vali():
    from pybrain.tools.validation import ModuleValidator
    from pybrain.tools.validation import CrossValidator
    with open('new_data1.txt') as data_file:
        data = json.load(data_file)
    m = [d[0] for d in data]
    case = [min([a for a, s, d in m]), float(max([a for a, s, d in m])-min([a for a, s, d in m]))]
    week = [min([s for a, s, d in m]), float(max([s for a, s, d in m])-min([s for a, s, d in m]))]
    grid = [min([d for a, s, d in m]), float(max([d for a, s, d in m])-min([d for a, s, d in m]))]
    ds = SupervisedDataSet(3, 1)
    import random
    random.shuffle(data)
    print len(data)
    for i in xrange(0, len(data)):
        # print "Adding {}th data sample".format(i),
        x1 = float(data[i][0][0] - case[0])/case[1]
        x2 = float(data[i][0][1] - week[0])/week[1]
        x3 = float(data[i][0][2] - grid[0])/grid[1]
        input = (x1, x2, x3)
        output = data[i][1]
        ds.addSample(input, output)
        # print ":: Done"

    print "Train"
    net = buildNetwork(3, 3, 1, bias=True)
    tstdata, trndata = ds.splitWithProportion( 0.33 )
    trainer = BackpropTrainer(net, trndata)
    mse = []
    modval = ModuleValidator()
    for i in range(100):
        trainer.trainEpochs(1)
        trainer.trainOnDataset(dataset=trndata)
        cv = CrossValidator(trainer, trndata, n_folds=10, valfunc=modval.MSE)
        mse_val = cv.validate()
        print "MSE %f @ %i" % (mse_val, i)
        mse.append(mse_val)

    with open('cross_validation.json', 'w') as outfile:
            json.dump(mse, outfile, indent=4)
    def classicNeuralNetwork(self,features,labels,autoencoder=False):
        dataSet = SupervisedDataSet(features.shape[1], 1)
        dataSet.setField('input', features)
        if autoencoder: labels = features      
        dataSet.setField('target', labels)
        tstdata, trndata = dataSet.splitWithProportion( 0.25 )
        print features.shape
        simpleNeuralNetwork = _buildNetwork(\
                                    (LinearLayer(features.shape[1],'in'),),\
                                    (SigmoidLayer(20,'hidden0'),),\
                                    (LinearLayer(labels.shape[1],'out'),),\
                                    bias=True)
        trainer = BackpropTrainer(simpleNeuralNetwork, dataset=trndata, verbose=True)#, momentum=0.1)
        trainer.trainUntilConvergence(maxEpochs=15)
        
        trnresult = percentError( trainer.testOnData( dataset=trndata ), trndata['target'] )
        tstresult = percentError( trainer.testOnData( dataset=tstdata ), tstdata['target'] )

        print "epoch: %4d" % trainer.totalepochs, \
          "  train error: %5.2f%%" % trnresult, \
          "  test error: %5.2f%%" % tstresult

        self.neuralNetwork = simpleNeuralNetwork
Ejemplo n.º 22
0
class training:
    network_name=None;
    network=None; '''a network instance'''

    '''input data'''
    descriptors=None
    target=None
    data_set=None
    data_setNormed = None
    tstdata = None
    trndata = None

    '''these for PCA'''
    scalar_X = None

    '''important outputs'''
    r_squared=None
    median_error=None
    predicted_value=None

    def __init__(self,X_filename,Y_filename=None):
        '''some important factors'''

        '''loading data'''
        self._load_training_data(X_filename, Y_filename)

    def _load_training_data(self,X_filename,Y_filename):
        '''
        X and Y values will be loaded here
        '''
        self.descriptors=X_filename
        print "Descriptors loaded!"
        if Y_filename is not None:
            self.target=Y_filename
            print "Target for training loaded!"

    def do_pca(self,user_input=0,threshold_input=0):
        '''PCA will be doen here'''

    def set_dataset(self,splitProtion = 0.15,featureNorm = True, Y_log = True):
        '''put training data into pybrain object'''

        '''Feature Selection doing here'''
#         self.descriptors = self.featureReduction(self.descriptors, threshold_input = 10)

        num_row=self.descriptors.shape[0]
        num_col=self.descriptors.shape[1]
        '''Pack data '''
        self.data_set = SupervisedDataSet(num_col , 1)
        for num_data in range(num_row):
            inputs=self.descriptors[num_data,:]
            outputs=self.target[num_data]
            self.data_set.addSample(inputs, outputs)
        print self.data_set.indim

        if featureNorm:
            '''split data'''
            self.tstdata, self.trndata = self.split_data(self.data_set,splitProtion)

            '''get the scalar for the trndata'''
            '''and normalize the tstdata with this scalar'''
            trn_scalar = self._getScalar(self.trndata['input'])

            self.trndata = self.featureNorm(self.trndata,trn_scalar,Y_log = True)
            self.tstdata = self.featureNorm(self.tstdata, trn_scalar, Y_log = True)
            self.data_setNormed = self.featureNorm(self.data_set, trn_scalar, Y_log = True)
            print 'Feature Normed'
        else:
            self.tstdata, self.trndata = self.split_data(self.data_set,splitProtion)
            print 'Feature not Normed'
        raw_input("Pybrain data object has been set up.")

    def featureReduction(self, data,threshold_input = 0.99):
        '''
        feature reduction that only keep variables that the variance
        is greater than threshold.
        '''
        selector = VarianceThreshold(threshold = threshold_input)
        data = selector.fit_transform(data)
        print 'Feature Selected with threshold ', threshold_input, data.shape
        return data

    def _getScalar(self, data):
        '''For Normalization '''
        '''get the scalar of the input data and return  '''
        thisScalar = preprocessing.StandardScaler().fit(data)
        return thisScalar

    def featureNorm(self,data,scalar,Y_log = True):
        '''
        feature Normalization, deal with self.data_set, return self.data_setNormed
        '''
        descs = data['input']
        target = data['target']
        num_col = descs.shape[1]
        data_setNormed = SupervisedDataSet(num_col,1)
        data_setNormed.setField('input', scalar.transform(descs))
        '''feature norm for Y'''
        if Y_log:
            print 'Using log value of target'
            data_setNormed.setField('target',np.log(target))
        else:
            print 'Using the original value of target'
            data_setNormed.setField('target',target)
        return data_setNormed

    def split_data(self,dataset,proportion = 0.15):
        '''
        split the data to self.tstdata and self.trndata.
        '''
        tstdata,trndata = dataset.splitWithProportion(proportion)
        return tstdata, trndata

    def train_net(self,training_times_input=100,num_neroun=200,learning_rate_input=0.1,weight_decay=0.1,momentum_in = 0,verbose_input=True):
        '''
        The main function to train the network
        '''
        print self.trndata['input'].shape
        raw_input()
        self.network=buildNetwork(self.trndata.indim,
                                  num_neroun,self.trndata.outdim,
                                  bias=True,
                                  hiddenclass=SigmoidLayer,
                                  outclass = LinearLayer)
        self.trainer=BackpropTrainer(self.network,
                                     dataset=self.trndata,
                                     learningrate=learning_rate_input,
                                     momentum=momentum_in,
                                     verbose=True,
                                     weightdecay=weight_decay )

        for iter in range(training_times_input):
            print "Training", iter+1,"times"
            self.trainer.trainEpochs(1)
            trn_error = self._net_performance(self.network, self.trndata)
            tst_error = self._net_performance(self.network, self.tstdata)
            print "the trn error is: ", trn_error
            print "the test error is: ",tst_error

        '''prediction on all data:'''
#         self.predicted_value = self.predict(self.network,self.data_setNormed['input'])

    def train_best_converge(self,training_times_input=5,num_neuron=120,learning_rate_input=0.1,weightdecay_input = 0.01,maxEpochs_input=1200,verbose_input=True):

        '''pass values'''
        self.training_time=training_times_input
        self.learning_rate=learning_rate_input
        self.maxEpo=maxEpochs_input
        self.verbose=verbose_input
        self.r_squared=np.empty([self.training_time])
        self.median_error=np.empty([self.training_time])
        test_data,training_data=self.data_set.splitWithProportion(0.15)

        #train the network 30 times
        for iter in range(self.training_time):
            print "Training", iter+1,"times"

            '''randomly split the dataset to have 20% to be test data'''
            valid_data_this,train_data_this=training_data.splitWithProportion(0.1)

            net=buildNetwork(self.data_set.indim,num_neuron,self.data_set.outdim,bias=True,outputbias=True,hiddenclass=SigmoidLayer)
            t=BackpropTrainer(net,train_data_this,learningrate=self.learning_rate,weightdecay=weightdecay_input,momentum=0.,verbose=self.verbose)
            t.trainUntilConvergence(train_data_this,maxEpochs=self.maxEpo, validationProportion=0.1,verbose=self.verbose)

            '''validate the model with validation dataset'''
            self.r_squared[iter],self.median_error[iter]=self.do_regression(net, valid_data_this.getField("input"),valid_data_this.getField("target")[:,0])

            locals()['net'+str(iter)]=net
            locals()['train_data' + str(iter)]=train_data_this
            locals()['valid_data' + str(iter)]=valid_data_this
            locals()['train' + str(iter)]=t

            print "Training",iter+1,"has done!"

        r_max = np.amax(self.r_squared)
        max_index=self.r_squared.argmax()

        print "Model ", max_index+1, "has been selected"
        self.network=locals()['net'+str(max_index)]
        self.train_best=locals()['train_data' + str(max_index)]
        self.valid_best=locals()['valid_data' + str(max_index)]

        '''run the best network on the test data'''
        print "The performance on test data........."
        descriptors_test=test_data.getField("input")
        Y_test=test_data.getField("target")[:,0]

        r2_all=self.do_regression(self.network, descriptors_test, Y_test)

        raw_input("Paused!")

        '''run the best network on the all data'''
        print "The performance on all data........."
        self.predicted_value=self.test_data(self.network, self.descriptors_pca)
        r2_test=self.do_regression(self.network, self.descriptors_pca, self.target)

    def train_CV(self,n_folds=5,num_neuron = 50,learning_rate_input=0.01,decay=0.01,maxEpochs_input=1200,verbose_input=True):
        '''call the class in model validators'''
        '''and do cross validation'''
        '''pass values'''
        dataset = self.data_set
        l = dataset.getLength()
        indim = dataset.indim
        outdim = dataset.outdim
        inp = dataset.getField("input")
        out = dataset.getField("target")
        perms = np.array_split(permutation(l), n_folds)
        perf = 0
        for i in range(n_folds):
            train_perms_idxs = list(range(n_folds))
            train_perms_idxs.pop(i)
            temp_list = []
            for train_perms_idx in train_perms_idxs:
                temp_list.append(perms[ train_perms_idx ])
            train_idxs = np.concatenate(temp_list)
            #this is the test set:
            test_idxs = perms[i]
            #train:
            print "Training on part: ", i
            train_ds = SupervisedDataSet(indim,outdim)
            train_ds.setField("input", inp[train_idxs])
            train_ds.setField("target",out[train_idxs])
            net_this = buildNetwork(indim,num_neuron,outdim,bias=True,hiddenclass = SigmoidLayer)
            t_this = BackpropTrainer(net_this,train_ds,learningrate = learning_rate_input,weightdecay=decay,
                                     momentum=0.,verbose=verbose_input)
            #train asked times:
            t_this.trainEpochs(maxEpochs_input)
            #test on testset.
            test_ds = SupervisedDataSet(indim,outdim)
            test_ds.setField("input", inp[test_idxs])
            test_ds.setField("target",out[test_idxs])
            perf_this = self._net_performance(net_this, test_ds)
            perf = perf + perf_this
        perf /=n_folds
        print perf
        return perf

    def do_CV(self,):
        '''
        call CV
        '''
        data_set_this = self.data_set
        perf_all=[]
        for num_neuron in np.arange(20,200,5):
            print "Training with number of neuron :", num_neuron
            perf_this = self.train_CV(n_folds=5, num_neuron=num_neuron, learning_rate_input=0.001, maxEpochs_input=50, verbose_input=False)
            perf_all.append(perf_this)
        print "All of the performance: ", perf_all
        output=open("CV_results_20to200.csv",'wb')
        filewriter=csv.writer(output)
        filewriter.writerow(perf_all)

    def _net_performance(self,net,test_data):
        """
        calculate the median relatively error (mre)
        """
        input = test_data.getField("input")
        target = test_data.getField("target")
        outputs = self.predict(net, input)
        abs_error = np.absolute(outputs - target)
        rel_error = np.divide(abs_error,np.absolute(target))
        mre = np.median(rel_error)
        return mre

    def predict(self,net,X):
        '''
        run the prediction of the given data (descriptors) on the given network.
        '''
        num_row=X.shape[0]
        num_col=X.shape[1]
        results=np.empty([num_row])
        for line in range(num_row):
            results[line]=net.activate(X[line])[0]
#         if self.scalar_Y is not None:
#             results = self.scalar_Y.inverse_transform(results)
        return results

    def do_regression(self,net,X_pca,Y):
        '''
        run the network prediction on descriptor X
        do regression on Y
        return R_squred value
        '''
        test_result=self.test_data(net, X_pca)
        slope,intercept,r_value,p_value,std_err =stats.linregress(Y, test_result)
        median_error_this=self.calc_Diff(Y, test_result)
        print "The R squared of this time is: ",r_value**2
        print "The median relatively error of this time is:", median_error_this
        return r_value**2,median_error_this

    def calc_Diff(self,real_value,predicted_value):
        '''
        this function calculate the median and average absolute error and relatively error
        between the real_value and the predicted value
        '''
        diff_between_abs=np.absolute(predicted_value-real_value)
        diff_between_abs_relatively=diff_between_abs/real_value
        mean_rel_error=np.mean(diff_between_abs_relatively)
        median_rel_error=np.median(diff_between_abs_relatively)
        mean_abs_error=np.mean(diff_between_abs)
        median_abs_error=np.median(diff_between_abs)

        return median_rel_error

    def plot_diff(self,real_value,predicted_value,xlab,ylab,title):
        '''
        plot the line of real value and estimated value and plot the difference bar on the same graph
        '''
        num_row=real_value.shape[0] #this is the length of x axis
        data_all=np.array((real_value,predicted_value))
        data_all=np.transpose(data_all)
        data_all_sorted=data_all[data_all[:,0].argsort()]
        diff=data_all_sorted[:,1]-data_all_sorted[:,0]
        y_value=np.arange(num_row)

        fig=plt.figure()
        ax=fig.gca()
        ax.plot(y_value,data_all_sorted[:,1],label="Estimated Values")
        ax.plot(y_value,data_all_sorted[:,0],label="Reported Values")
        plt.xlabel(xlab, fontsize = 16)
        plt.ylabel(ylab, fontsize = 16)
        plt.title(title)
        ax.legend(loc = 2)

        ax.bar(y_value,diff)
        plt.show()

    def save_toFile(self,filename,pred):
        '''this function save the Numpy object array of prediction results to csv file'''
        np.savetxt('filename', pred, delimiter=',')

    def save_network(self,name_of_the_net):
        print "Saving the trained network to file"

        if self.network is None:
            print "Network has not been trained!!"
        else:
            NetworkWriter.writeToFile(self.network, name_of_the_net)
            print "Saving Finished"

    def load_network(self,name_of_the_net):
        print "load existing trained network"
        self.network=NetworkReader.readFrom(name_of_the_net)
        print "Succeed!"
Ejemplo n.º 23
0
def run_data1():
    with open('new_data1.txt') as data_file:
        data = json.load(data_file)
    output = set([i[2] for i in [d[0] for d in data if d[1] == 1]])
    print output
    m = [d[0] for d in data]
    print (max([d for a, s, d in m]), min([d for a, s, d in m]), float(max([d for a, s, d in m])-min([d for a,s,d in m])))
    case = [min([a for a, s, d in m]), float(max([a for a, s, d in m])-min([a for a,s,d in m]))]
    week = [min([s for a, s, d in m]), float(max([s for a, s, d in m])-min([s for a,s,d in m]))]
    grid = [min([d for a, s, d in m]), float(max([d for a, s, d in m])-min([d for a,s,d in m]))]
    ds = SupervisedDataSet(3, 1)
    import random
    random.shuffle(data)
    print len(data)
    for i in xrange(0, len(data)):
        # print "Adding {}th data sample".format(i),
        x1 = float(data[i][0][0] - case[0])/case[1]
        x2 = float(data[i][0][1] - week[0])/week[1]
        x3 = float(data[i][0][2] - grid[0])/grid[1]
        input = (x1, x2, x3)
        output = data[i][1]
        ds.addSample(input, output)
        # print ":: Done"

    print "Train"
    # net = buildNetwork(3, 3, 1, bias=True)\
    net = NetworkReader.readFrom('dengue_network.xml')
    tstdata, trndata = ds.splitWithProportion( 0.33 )
    trainer = BackpropTrainer(net, trndata)
    # terrors = trainer.trainUntilConvergence(verbose = True, validationProportion = 0.33, maxEpochs = 100, continueEpochs = 10 )


    # mse = [0]
    # acceptable_error = .00001
    # for i in xrange(0,1000):
    #     print i," ",
    #     mse_c = trainer.train()
    #     if (mse_c < acceptable_error):
    #         break
    #     mse.append(mse_c)
    #     print mse_c

    threshold = [0.25, 0.30]
    for t in threshold:
        print "Testing threshold :", t
        true_positive = 0.0
        true_negative = 0.0
        false_positive = 0.0
        false_negative = 0.0

        data_to_write = []
        data_to_write_input = []
        for input, expectedOutput in tstdata:
            o = net.activate(input)
            output = 1.0 if o[0] > t else 0.0
            data_to_write.append((int((input[0]*case[1]) + case[0]), int((input[1]*week[1]) + week[0]),int((input[2]*grid[1]) + grid[0]), output))
            if (output == expectedOutput):
                if output == 1.0:
                    true_positive += 1.0
                else:
                    true_negative += 1.0
            else:
                if output == 1.0:
                    false_positive += 1.0
                else:
                    false_negative += 1.0
        # NetworkWriter.writeToFile(net, 'dengue_network1.xml')
        precision = true_positive / (true_positive + false_positive)
        recall = true_positive / (true_positive + false_negative)
        f = (2 * precision * recall)/(precision + recall)
        accuracy = (true_positive + true_negative) / (true_positive + true_negative + false_positive + false_negative)

        def getKey(item):
            return item[1]
        data_to_write = sorted(data_to_write,  key=getKey)
        counts = {
            # "MSE" : mse,
            # "DATA": data_to_write,
            "Threshold": t,
            "Precision": precision,
            "Recall": recall,
            "F-Measure": f,
            "Accuracy": accuracy,
            "Values": {
                "True Positive": true_positive,
                "True Negative": true_negative,
                "False Positive": false_positive,
                "False Negative": false_negative
            }
        }
        print "Accuracy :", accuracy
        print "Precision :", precision
        print "Recall :", recall
        print "F-Measure :", f
        print counts
        # errors = {
        #     "terrors" : terrors
        # }
        # with open('data8.json', 'w') as outfile:
        #     json.dump(counts, outfile, indent=4)
    exit()
Ejemplo n.º 24
0
Ciclos = int(sys.argv[2])
Learning = float(sys.argv[3])
Momentum = float(sys.argv[4])
camada1 = int(sys.argv[5])
camada2 = int(sys.argv[6])

k = 0
size = 70
for line in inputFile.readlines():
    data = [float(x) for x in line.strip().split() if x != '']
    indata = tuple(data[:7])
    outdata = tuple(data[7:])
    ds.addSample(indata,outdata)
    k +=1
    if (k == size):
		testdata, traindata = ds.splitWithProportion( PorcDivTest )
		ds.clear()
		k = 0
		for inp,targ in testdata:
			testSet.appendLinked(inp,targ-1)
		for inp,targ in traindata:
			trainSet.appendLinked(inp,targ-1)

trainSet._convertToOneOfMany(bounds=[0, 1])
testSet._convertToOneOfMany(bounds=[0, 1])

if(camada2==0):
	net = buildNetwork(trainSet.indim,camada1,trainSet.outdim, recurrent = True)
else :
	net = buildNetwork(trainSet.indim,camada1,camada2,trainSet.outdim, recurrent = True)
trainer = BackpropTrainer(net,dataset = trainSet,learningrate = Learning,momentum = Momentum, verbose = True)
Ejemplo n.º 25
0
def calc():
    filePath = 'asc_gyro_l.skl'
    f = open(filePath, 'r')
    headers = f.readline().split()
    indices = [2]
    numOfFeatures = len(indices)#len(ancestorMap)
    ds = SupervisedDataSet(numOfFeatures, 1)
    press0 = []
    press1 = []
    for line in f:
        splited = line.split()
        output = [float(splited[2]) - 32920.0]#, splited[3]]
        press0.append(float(output[0]))
        #press1.append(float(output[1]))
        input = np.array(splited)
        input = input[indices]#getAnccestorRelativePos(splited)#splited[7:]#
        ds.appendLinked(output[0], output)
    tstdata, trndata = ds.splitWithProportion( 0.25 )
    
    #for n in range(5):
    numOfHidden = 1#15*n + 1
    net = buildNetwork(numOfFeatures, numOfHidden, 1, bias=True)
    #net = FeedForwardNetwork()
    """
    inLayer = LinearLayer(numOfFeatures)
    hiddenLayer0 = SigmoidLayer(numOfHidden)
    #hiddenLayer1 = SigmoidLayer(numOfHidden)
    #hiddenLayer2 = SigmoidLayer(numOfHidden)
    outLayer = LinearLayer(1)
    
    net.addInputModule(inLayer)
    net.addModule(hiddenLayer0)
    #net.addModule(hiddenLayer1)
    #net.addModule(hiddenLayer2)
    net.addOutputModule(outLayer)
    
    in_to_hidden = FullConnection(inLayer, hiddenLayer0)
    #zero2one = FullConnection(hiddenLayer0, hiddenLayer1)
    #one2two = FullConnection(hiddenLayer1, hiddenLayer2)
    hidden_to_out = FullConnection(hiddenLayer0, outLayer)
    
    
    net.addConnection(in_to_hidden)
    #net.addConnection(zero2one)
    #net.addConnection(one2two)
    net.addConnection(hidden_to_out)
    net.sortModules()
    """
    trainer = BackpropTrainer(net, tstdata)
    print 'numOfHidden: ' + str(numOfHidden)
    #res = trainer.trainUntilConvergence()
    for i in range(100):
        res = trainer.train()
    evaluatedData = tstdata
    press0 = []
    press1 = []
    expectedPress0 = []
    expectedPress1 = []
    for input, expectedOutput in evaluatedData:
        output = net.activate(input)
        press0.append(output)
        #press1.append(output[1])
        expectedPress0.append(expectedOutput)
        #expectedPress1.append(expectedOutput[1])
        #errorSum0+=abs(output[0]-expectedOutput[0])
        #errorSum1+=abs(output[1]-expectedOutput[1])
    
    #print errorSum0/len(evaluatedData)
    #print errorSum1/len(evaluatedData)
    print mean_squared_error(press0, expectedPress0)
    print np.mean(expectedPress0)
    #print mean_squared_error(press1, expectedPress1)
    
    """
    arr = np.array(press0)
    print np.std(arr, axis=0)
    arr = np.array(press1)
    print np.std(arr, axis=0)
    print 'end'
    """

#calc()
Ejemplo n.º 26
0
class NaturalLanguageNetwork:
    def __init__(self,file):
        self.file = file
        self.ios = 30
        self.hns = 25
        self.epochs = 300
        self.ds = SupervisedDataSet(self.ios,self.ios)
        self.nn = None
    
    def get_concepts(self,inp_pos):
        inp = []
        ret = []
        for (word,pos) in inp_pos:
            inp.append(get_pos_tag(pos))
        inp = self.pad(inp,self.ios,-1)
        res = self.nn.activate(inp)
        print res
        cur_str = ''
        for i in range(len(res)):
            if round(res[i]) == 1:
                print 'Matched a node'
                if i < len(inp_pos):
                    print 'Appending ' + str(inp_pos[i][0])
                    cur_str += inp_pos[i][0] + ' '
            elif cur_str != '':
                ret.append(cur_str.strip())
                cur_str = ''
        return ret       
    
    def parse_and_train(self):
        f = open(self.file,'r')
        learn_lines = []
        for line in f:
            if line.strip() != '':
                learn_lines.append(line)
        i = 0
        f.close()
        while i < len(learn_lines):
            ins, outs = self.convert_to_tuple(learn_lines[i],learn_lines[i+1])
            i += 2
            self.ds.addSample(ins,outs)
        self.nn = buildNetwork(self.ios,self.hns,self.ios)
        self.train_dat, self.test_dat = self.ds.splitWithProportion(0.75)
        trnr = BackpropTrainer(self.nn,dataset=self.train_dat,momentum=0.1,verbose=False,weightdecay=0.01)
        i = 150
        trnr.trainEpochs(150)
        while i < self.epochs:
            trnr.trainEpochs(50)
            i += 50
            print 'For epoch ' + str(i)
            print 'For train:'
            self.print_current_error()
            print 'For test:'
            self.print_validation()
        self.nn.sortModules()
        #trnr.trainEpochs(self.epochs)
        #trnr.trainUntilConvergence()
        #for i in range(self.epochs):
        #    trnr.trainEpochs(1)
        
    def print_validation(self):
        res = self.nn.activateOnDataset(self.test_dat)
        ttl_misses = 0
        for i in range(len(res)):
            resul = []
            for v in res[i]:
                resul.append(round(v))
            print 'Misses: ' + str(self.num_misses(resul,self.test_dat['target'][i]))
            ttl_misses += self.num_misses(resul,self.test_dat['target'][i])
        print 'Average Misses: ' + str(float(ttl_misses)/float(len(self.test_dat['target'])))

    def print_current_error(self):
        res = self.nn.activateOnDataset(self.train_dat)
        ttl_misses = 0
        for i in range(len(res)):
            resul = []
            for v in res[i]:
                resul.append(round(v))
            #print 'Misses: ' + str(self.num_misses(resul,self.train_dat['target'][i]))
            ttl_misses += self.num_misses(resul,self.train_dat['target'][i])
        print 'Average Misses: ' + str(float(ttl_misses)/float(len(self.train_dat['target'])))
        
    def num_misses(self,first,second):
        num = 0
        for i in range(len(first)):
            if first[i] != 1 and second[i] == 1:
                num += 1
            elif first[i] == 1 and second[i] != 1:
                num += 1
        return num
    
    def convert_to_tuple(self,poses,incls):
        #I'm chopping off the last thing here because the strings being parsed 
        #are ; terminated, resulting in an empty entry
        pos_list = poses.split(';')[:-1]
        incl_list = incls.split(';')[:-1]
        pos_vals = []
        incl_vals = []
        for p in pos_list:
            pos_vals.append(get_pos_tag(p))
        for i in incl_list:
            incl_vals.append(int(i))
        pos_vals = self.pad(pos_vals,self.ios,-1)
        incl_vals = self.pad(incl_vals,self.ios,0)
        return (tuple(pos_vals), tuple(incl_vals))
        
        
    def pad(self,ls,sz,pd_val):
        while len(ls) < sz:
            ls.append(pd_val)
        return ls
        
Ejemplo n.º 27
0
        for attr in attributelist
    ]
    inputs.extend([
        Normalize(metadata['maxmins'][attr], game[attr][1])
        for attr in attributelist
    ])

    if game['points'][0] > game['points'][1]:
        outputs = [1, 0]

    else:
        outputs = [0, 1]
    print outputs
    alldata.addSample(inputs, outputs)

testdata, traindata = alldata.splitWithProportion(0.70)

print "IMPORTANT ", traindata.outdim
n = buildNetwork(traindata.indim, 5, traindata.outdim, outclass=SoftmaxLayer)
print "Number of training patterns: ", len(traindata)
trainer = BackpropTrainer(n,
                          dataset=traindata,
                          momentum=0.1,
                          verbose=True,
                          weightdecay=0.01)
trainer.trainEpochs(200)
# trainer.trainUntilConvergence()

totalcount = 0
rightcount = 0
sumerrors = 0.0
Ejemplo n.º 28
0
inp2_vec = np.zeros((1, num_words))
out_vec = np.zeros((1, num_words))
for temp_list in sorted_list:
    inp1 = word_list.index(temp_list[0])
    inp2 = word_list.index(temp_list[1])
    out = word_list.index(temp_list[2])
    inp1_vec = np.concatenate((inp1_vec, [inp[inp1, :]]), axis=0)
    inp2_vec = np.concatenate((inp2_vec, [inp[inp2, :]]), axis=0)
    out_vec = np.concatenate((out_vec, [inp[out, :]]), axis=0)
inp_vec = np.concatenate((inp1_vec, inp2_vec), axis=1)

#building the dataset
dataset = SupervisedDataSet(2 * num_words, num_words)
for i in range(len(sorted_list) + 1):
    dataset.addSample(inp_vec[i, :], out_vec[i, :])
tstdata, trndata = dataset.splitWithProportion(0.25)

#building the network
net = FeedForwardNetwork()
input_layer = LinearLayer(2 * num_words, name='input_layer')
hidden_layer = TanhLayer(num_words, name='hidden')
output_layer = SigmoidLayer(num_words, name='output_layer')
net.addInputModule(input_layer)
net.addModule(hidden_layer)
net.addOutputModule(output_layer)
net.addConnection(FullConnection(input_layer,
                                 hidden_layer,
                                 name='in_to_hidden'))
net.addConnection(FullConnection(hidden_layer,
                                 output_layer,
                                 name='hidden_to_out'))
Ejemplo n.º 29
0
# create a dataset object, make output Y a softmax matrix
allData = SupervisedDataSet(n, numLabels)
Y2 = convertToOneOfMany(Y)

# add data samples to dataset object, both ways are correct
'''for i in range(m):
    inData = X[i,:]
    outData = Y2[i, :]
    allData.addSample(inData, outData)
'''
allData.setField('input', X)
allData.setField('target', Y2)

#separate training and testing data
dataTrain, dataTest = allData.splitWithProportion(.9)

# create object for training
train = BackpropTrainer(net, dataset=dataTrain, learningrate=0.03, momentum=0.3)
#train.trainUntilConvergence(dataset=dataTrain)

# evaluate correct output for trainer
trueTrain = dataTrain['target'].argmax(axis=1)
trueTest = dataTest['target'].argmax(axis=1)

# train step by step
EPOCHS = 60

for i in range(EPOCHS):
    train.trainEpochs(1)
Ejemplo n.º 30
0
targetData.pop() #Pop off last element because it should be the next day's closing price.
#print targetData

myNet = buildNetwork(2, 2, 1, hiddenclass=SoftmaxLayer) # Build network with 2 input neurons, 2 hidden neurons, and 1 output neuron
print myNet['in'], myNet['hidden0'], myNet['out'] # Debug message to confirm network setup

myDS = SupervisedDataSet(2, 1) # Create dataset with two dimensional input, one dimensional target.

for i in enumerate(myList): #Pop off each element, then added to dataset
	myDataSetAdder = myDS.appendLinked(myList.pop(), targetData.pop())

for inpt, target in myDS: #display dataset structure
	print inpt, target

myTrainer = BackpropTrainer(myNet, myDS, verbose=True)
print """This may take awhile..."""
myTrainData, myTestData = myDS.splitWithProportion(0.25)
print "Number of training patterns: ", len(myTestData)
print "Input and output dimensions: ", myTestData.indim, myTrainData.outdim
print "First sample (input, target):"
print myTrainData['input'][0], myTrainData['target'][0]
#import pdb; pdb.set_trace() # Debugger, uncomment to run python debugger

for i in range(20):
	myTrainer.trainEpochs( 5 ) # Run the network for 5 epochs...
	trnresult = percentError (myTestData, myTrainData)
    	print "epoch: %4d" % myTrainer.totalepochs, \
        "  train error: %5.2f%%" % trnresult, \
#        "  test error: %5.2f%%" % tstresult
Ejemplo n.º 31
0
def brescia_nn(train, test, max_epochs=None, verbose=False):
    trainval_ds = SupervisedDataSet(5, 1)
    test_ds = SupervisedDataSet(5, 1)

    for datum in train:
        trainval_ds.addSample(datum[:5], (datum[5], ))

    for datum in test:
        test_ds.addSample(datum[:5], (datum[5], ))

    train_ds, val_ds = trainval_ds.splitWithProportion(0.75)

    if verbose:
        print "Train, validation, test:", len(train_ds), len(val_ds), len(
            test_ds)

    ns = {}
    min_error = -1
    min_h = -1

    # use validation to form 4-layer network with two hidden layers,
    # with (2n + 1) nodes in the first hidden layer and somewhere from
    # 1 to (n - 1) in the second hidden layer
    for h2 in range(1, 5):
        if verbose:
            start = time.time()
            print "h2 nodes:", h2

        # create the network
        if verbose:
            print "building network"

        n = FeedForwardNetwork()
        inLayer = LinearLayer(5)
        hiddenLayer1 = SigmoidLayer(11)
        hiddenLayer2 = SigmoidLayer(h2)
        outLayer = LinearLayer(1)

        n.addInputModule(inLayer)
        n.addModule(hiddenLayer1)
        n.addModule(hiddenLayer2)
        n.addOutputModule(outLayer)

        in_to_hidden = FullConnection(inLayer, hiddenLayer1)
        hidden_to_hidden = FullConnection(hiddenLayer1, hiddenLayer2)
        hidden_to_out = FullConnection(hiddenLayer2, outLayer)

        n.addConnection(in_to_hidden)
        n.addConnection(hidden_to_hidden)
        n.addConnection(hidden_to_out)

        n.sortModules()

        # training
        if verbose:
            print "beginning training"
        trainer = BackpropTrainer(n, train_ds)
        trainer.trainUntilConvergence(maxEpochs=max_epochs)

        ns[h2] = n

        # validation
        if verbose:
            print "beginning validation"

        out = n.activateOnDataset(val_ds)
        actual = val_ds['target']
        error = np.sqrt(np.sum((out - actual)**2) / len(val_ds))
        if verbose:
            print "RMSE:", error

        if min_error == -1 or error < min_error:
            min_error = error
            min_h = h2

        if verbose:
            stop = time.time()
            print "Time:", stop - start

    # iterate through
    if verbose:
        print "best number of h2 nodes:", min_h
    out_test = ns[min_h].activateOnDataset(test_ds)

    return ns[h2], out_test
Ejemplo n.º 32
0
from pybrain.datasets import SupervisedDataSet
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SigmoidLayer
from random import shuffle

print('hello world')
with open('latencias_normalizado.txt') as f:
    lines = f.readlines()

ds = SupervisedDataSet(48, 1)
for input in lines:
    input = input.split(",")
    [float(i) for i in input if i != '']
    ds.addSample(input[1:], input[0])

train, test = ds.splitWithProportion(0.25)

nn = buildNetwork(48, 10, 1, bias=True, outclass=SigmoidLayer)
nn.reset()

trainer = BackpropTrainer(nn, train, momentum=0.7)

for i in xrange(3000):
    print("%s %s" % (trainer.train(), i))

trainer.testOnData(test, verbose=True)
# for inp, targ in test:
#     mytarg = nn.activate(inp)
#     print(mytarg)
#     print(targ)
Ejemplo n.º 33
0
from os.path import isfile, join
mypath = "transcripts"
files = ["transcripts/" + f for f in listdir(mypath) if isfile(join(mypath, f))]

net = buildNetwork(775, 2, 1, bias=True, hiddenclass=SoftmaxLayer,)

ds = SupervisedDataSet(775, 1)

f = open('dataset')
for line in f.readlines():
    case = json.loads(line)
    print case
    if case["output"] == "petitioner":
        output = 1
    else:
        output = 0
    ds.addSample(case["inputs"], output)


test_data, training_data = ds.splitWithProportion(0.25)

trainer = BackpropTrainer(net, training_data)

print trainer.trainUntilConvergence(verbose=True)

NetworkWriter.writeToFile(net, "saved_network.xml")

for data in test_data:
    print "Network says: ", net.activate(data[0])
    print "Actual answer: ", data[1]
Ejemplo n.º 34
0
def load():
    print "Loading dataset..."
    with open('data/misspellings.csv', 'rbU') as f:
        reader = csv.reader(f)
        for row in reader:
            ds.addSample(convert(row[0]), convert(row[1]))

    print len(ds), "items in dataset."
    print "Load of dataset finished."


load()
timeit("Loading the data")

testds, trainds = ds.splitWithProportion(0.2)

#trainds._convertToOneOfMany()
#testds._convertToOneOfMany()

net = buildNetwork(20, 20, 20, 20, 20)
trainer = BackpropTrainer(net, trainds)
#trainer.train()
trainer.trainEpochs(10)
timeit("Training")
'''
trnresult = percentError( trainer.testOnClassData(),
                              trainds['target'] )

tstresult = percentError( trainer.testOnClassData(
           dataset=testds ), testds['target'] )
Ejemplo n.º 35
0
def ffn(nodesNum, trainingTime):
    """构建神经网络"""
    n = FeedForwardNetwork()

    inLayer = LinearLayer(6)  #构建神经网络的三层
    hiddenLayer1 = SigmoidLayer(nodesNum)
    hiddenLayer2 = SigmoidLayer(nodesNum)
    hiddenLayer3 = SigmoidLayer(nodesNum)
    hiddenLayer4 = SigmoidLayer(nodesNum)
    hiddenLayer5 = SigmoidLayer(nodesNum)
    hiddenLayer6 = SigmoidLayer(nodesNum)
    hiddenLayer7 = SigmoidLayer(nodesNum)
    hiddenLayer8 = SigmoidLayer(nodesNum)
    hiddenLayer9 = SigmoidLayer(nodesNum)
    hiddenLayer10 = SigmoidLayer(nodesNum)
    outLayer = LinearLayer(1)

    n.addInputModule(inLayer)  #将三层加入网络中
    n.addModule(hiddenLayer1)
    n.addModule(hiddenLayer2)
    n.addModule(hiddenLayer3)
    n.addModule(hiddenLayer4)
    n.addModule(hiddenLayer5)
    n.addModule(hiddenLayer6)
    n.addModule(hiddenLayer7)
    n.addModule(hiddenLayer8)
    n.addModule(hiddenLayer9)
    n.addModule(hiddenLayer10)
    n.addOutputModule(outLayer)

    in_to_hidden = FullConnection(inLayer, hiddenLayer1)  #设置连接模式
    hidden_to_hidden1 = FullConnection(hiddenLayer1, hiddenLayer2)
    hidden_to_hidden2 = FullConnection(hiddenLayer2, hiddenLayer3)
    hidden_to_hidden3 = FullConnection(hiddenLayer3, hiddenLayer4)
    hidden_to_hidden4 = FullConnection(hiddenLayer4, hiddenLayer5)
    hidden_to_hidden5 = FullConnection(hiddenLayer5, hiddenLayer6)
    hidden_to_hidden6 = FullConnection(hiddenLayer6, hiddenLayer7)
    hidden_to_hidden7 = FullConnection(hiddenLayer7, hiddenLayer8)
    hidden_to_hidden8 = FullConnection(hiddenLayer8, hiddenLayer9)
    hidden_to_hidden9 = FullConnection(hiddenLayer9, hiddenLayer10)
    hidden_to_out = FullConnection(hiddenLayer10, outLayer)

    n.addConnection(in_to_hidden)  #将连接加入网络
    n.addConnection(hidden_to_hidden1)
    n.addConnection(hidden_to_hidden2)
    n.addConnection(hidden_to_hidden3)
    n.addConnection(hidden_to_hidden4)
    n.addConnection(hidden_to_hidden5)
    n.addConnection(hidden_to_hidden6)
    n.addConnection(hidden_to_hidden7)
    n.addConnection(hidden_to_hidden8)
    n.addConnection(hidden_to_hidden9)
    n.addConnection(hidden_to_out)

    n.sortModules()  #使网络可用
    print(n)
    """建立数据集"""
    ds = SupervisedDataSet(6, 1)  #六个输入,一个输出

    #表查询语句
    cur1 = conn.cursor()
    cur1.execute(
        'select * from szzs_rise_and_fall_rate limit 2,9999999999999999;')
    result1 = cur1.fetchall()
    fv = []  #特征
    for res in result1:
        a = []
        a.append(float(list(res)[1]))
        a.append(float(list(res)[2]))
        a.append(float(list(res)[3]))
        a.append(float(list(res)[4]))
        a.append(float(list(res)[5]))
        a.append(float(list(res)[6]))
        fv.append(a)

    cur2 = conn.cursor()
    cur2.execute(
        'select rise_fall_rate_next from szzs_rise_and_fall_rate limit 2,9999999999999999;'
    )
    result2 = cur2.fetchall()
    cla = []  #分类
    for res in result2:
        cla.append(float(list(res)[0]))

    cur3 = conn.cursor()
    cur3.execute('select * from szzs_rise_and_fall_rate order by date desc;')
    result3 = cur3.fetchmany(1)
    test = []  #测试数据
    for res in result3:
        test.append(float(list(res)[1]))
        test.append(float(list(res)[2]))
        test.append(float(list(res)[3]))
        test.append(float(list(res)[4]))
        test.append(float(list(res)[5]))
        test.append(float(list(res)[6]))

    for i in range(0, len(fv)):
        ds.addSample(fv[i], cla[i])

    dataTrain, dataTest = ds.splitWithProportion(
        0.8)  #百分之八十的数据用于训练,百分之二十的数据用于测试
    """训练神经网络"""
    trainer = BackpropTrainer(n, dataset=dataTrain)  #神经网络和数据集
    trainer.trainEpochs(trainingTime)  #训练次数
    return n.activate(test)
Ejemplo n.º 36
0
    Y = preprocessing.scale(y_np)
    ######################################
    #setup the dataset (supervised classification training) for neural network
    ######################################
    from pybrain.utilities import percentError
    from pybrain.tools.shortcuts import buildNetwork
    from pybrain.supervised.trainers import BackpropTrainer
    from pybrain.structure.modules import SoftmaxLayer
    from pybrain.datasets.classification import ClassificationDataSet
    from pybrain.datasets import SupervisedDataSet

    ds = SupervisedDataSet(4, 1)
    for i in range(len(X)):
        ds.addSample(X[i], Y[i])
    # #split the dataset
    trainData, testData = ds.splitWithProportion(0.60)

    # ###################################
    # #Creating a Neural Network
    # ###################################
    # # build nerual net with 4 inputs, 5 hidden neuron and 1 output neuron
    net = buildNetwork(4, 5, 1, bias=True)
    trainer = BackpropTrainer(net, trainData)
    train_error = trainer.trainUntilConvergence(dataset=trainData,
                                                maxEpochs=50)

    # #evaluate the error rate on training data
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import mean_squared_error
    train_out = net.activateOnDataset(trainData)  #return the output
alldata = SupervisedDataSet(len(attributelist)*2, 2)

#home, away
for gameid, game in games.iteritems():
    inputs = [Normalize(metadata['maxmins'][attr], game[attr][0]) for attr in attributelist]
    inputs.extend( [Normalize(metadata['maxmins'][attr], game[attr][1]) for attr in attributelist] )

    if game['points'][0] > game['points'][1]:
        outputs = [1, 0]

    else:
        outputs = [0, 1]
    print outputs
    alldata.addSample(inputs, outputs)

testdata, traindata = alldata.splitWithProportion(0.70)


print "IMPORTANT ", traindata.outdim
n = buildNetwork(traindata.indim, 5, traindata.outdim, outclass=SoftmaxLayer)
print "Number of training patterns: ", len(traindata)
trainer = BackpropTrainer( n, dataset=traindata, momentum=0.1, verbose=True, weightdecay=0.01)
trainer.trainEpochs(200)
# trainer.trainUntilConvergence()

totalcount = 0
rightcount = 0
sumerrors = 0.0
for data in testdata:
    inputvalues = []
    for attr in attributelist:
Ejemplo n.º 38
0
result_r   = rev_map(result)

# buying       vhigh, high, med, low
# maint        vhigh, high, med, low
# doors        2, 3, 4, 5more
# persons      2, 4, more
# lug_boot     small, med, big
# safety       low, med, high

with open(fname, "r") as f:
    reader = csv.reader(f)
    for row in reader:
        sample = (price[row[0]], price[row[1]], doors[row[2]], persons[row[3]], lug_boot[row[4]], safety[row[5]])
        ds.addSample(sample, result[row[6]])

tst_ds, trn_ds = ds.splitWithProportion(0.2)

# print "train data"
# for inpt, target in trn_ds:
#     print inpt, target

# print "test data"
# for inpt, target in tst_ds:
#     print inpt, target

# More information about trainers: http://pybrain.org/docs/api/supervised/trainers.html

print "Training started"

trainer.trainOnDataset(trn_ds, 10)
Ejemplo n.º 39
0
print 'reducing data'
sample = np.random.randint(len(data), size=5000)
data = data.ix[sample]

X = data[[0, 1, 2, 3, 4]].values
y = data[['outcome']].values

print 'building net'
net = buildNetwork(5, 20, 1, recurrent=False)

dataset = SupervisedDataSet(5, 1)
dataset.setField('input', X)
dataset.setField('target', y)

trainset, testset = dataset.splitWithProportion(0.75)
trainer = BackpropTrainer(net, dataset=trainset, learningrate=0.1, momentum=0.5, verbose=False)

epochs = np.array([])
train_errors = np.array([])
test_errors = np.array([])

for i in range(10):
    print 'round', i + 1
    trainer.trainEpochs(5)

    train_error = trainer.testOnData(trainset)
    test_error = trainer.testOnData(testset)

    epochs = np.append(epochs, trainer.totalepochs)
    train_errors = np.append(train_errors, train_error)
Ejemplo n.º 40
0
def brescia_nn(train, test, max_epochs=None, verbose=False):
    trainval_ds = SupervisedDataSet(5, 1)
    test_ds = SupervisedDataSet(5, 1)
    
    for datum in train:
        trainval_ds.addSample(datum[:5], (datum[5],))

    for datum in test:
        test_ds.addSample(datum[:5], (datum[5],))
    
    train_ds, val_ds = trainval_ds.splitWithProportion(0.75)
    
    if verbose:
        print "Train, validation, test:", len(train_ds), len(val_ds), len(test_ds)
    
    ns = {}
    min_error = -1
    min_h = -1
    
    # use validation to form 4-layer network with two hidden layers,
    # with (2n + 1) nodes in the first hidden layer and somewhere from
    # 1 to (n - 1) in the second hidden layer
    for h2 in range(1, 5):
        if verbose:
            start = time.time()
            print "h2 nodes:", h2
    
        # create the network
        if verbose:
            print "building network"

        n = FeedForwardNetwork()
        inLayer = LinearLayer(5)
        hiddenLayer1 = SigmoidLayer(11)
        hiddenLayer2 = SigmoidLayer(h2)
        outLayer = LinearLayer(1)
    
        n.addInputModule(inLayer)
        n.addModule(hiddenLayer1)
        n.addModule(hiddenLayer2)
        n.addOutputModule(outLayer)
    
        in_to_hidden = FullConnection(inLayer, hiddenLayer1)
        hidden_to_hidden = FullConnection(hiddenLayer1, hiddenLayer2)
        hidden_to_out = FullConnection(hiddenLayer2, outLayer)
    
        n.addConnection(in_to_hidden)
        n.addConnection(hidden_to_hidden)
        n.addConnection(hidden_to_out)
    
        n.sortModules()
    
        # training
        if verbose:
            print "beginning training"
        trainer = BackpropTrainer(n, train_ds)
        trainer.trainUntilConvergence(maxEpochs=max_epochs)

        ns[h2] = n
    
        # validation
        if verbose:
            print "beginning validation"

        out = n.activateOnDataset(val_ds)
        actual = val_ds['target']
        error = np.sqrt(np.sum((out - actual)**2) / len(val_ds))
        if verbose:
            print "RMSE:", error
    
        if min_error == -1 or error < min_error:
            min_error = error
            min_h = h2
    
        if verbose:
            stop = time.time()
            print "Time:", stop - start
    
    # iterate through
    if verbose:
        print "best number of h2 nodes:", min_h
    out_test = ns[min_h].activateOnDataset(test_ds)

    return ns[h2], out_test
Ejemplo n.º 41
0
TD.gen()
print(TD.X.shape)
print(TD.y.shape)
X_train = TD.X
y_train = TD.y
X_test = TD.X_test
y_test = TD.y_test
ds = SupervisedDataSet(TD.indim, 2)
for i in range(TD.X.shape[0]):
    inp, outp = TD.X.iloc[i], TD.y.iloc[i]
    if outp == 1:
        outp = [1, 0]
    else:
        outp = [0, 1]
    ds.addSample(inp, outp)
training_dataset, testing_dataset = ds.splitWithProportion(0.9)
print(X_train.shape)
print(X_test.shape)

mmm = MMModel(name="model_test", n_pca=20, C_svr=1.0)  #svc分类

fnn = buildNetwork(HISTORY,
                   200,
                   100,
                   50,
                   2,
                   bias=True,
                   hiddenclass=TanhLayer,
                   outclass=SoftmaxLayer)
trainer = make_trainer(fnn, training_dataset, 0.005)
s_time = time.time()
Ejemplo n.º 42
0
inp2_vec = np.zeros((1, num_words))
out_vec = np.zeros((1, num_words))
for temp_list in sorted_list:
    inp1 = word_list.index(temp_list[0])
    inp2 = word_list.index(temp_list[1])
    out = word_list.index(temp_list[2])
    inp1_vec = np.concatenate((inp1_vec, [inp[inp1, :]]), axis=0)
    inp2_vec = np.concatenate((inp2_vec, [inp[inp2, :]]), axis=0)
    out_vec = np.concatenate((out_vec, [inp[out, :]]), axis=0)
inp_vec = np.concatenate((inp1_vec, inp2_vec), axis=1)

#building the dataset
dataset = SupervisedDataSet(2 * num_words, num_words)
for i in range(len(sorted_list) + 1):
    dataset.addSample(inp_vec[i, :], out_vec[i, :])
tstdata, trndata = dataset.splitWithProportion(0.25)

#building the network
net = FeedForwardNetwork()
input_layer = LinearLayer(2 * num_words, name='input_layer')
hidden_layer = TanhLayer(num_words, name='hidden')
output_layer = SigmoidLayer(num_words, name='output_layer')
net.addInputModule(input_layer)
net.addModule(hidden_layer)
net.addOutputModule(output_layer)
net.addConnection(
    FullConnection(input_layer, hidden_layer, name='in_to_hidden'))
net.addConnection(
    FullConnection(hidden_layer, output_layer, name='hidden_to_out'))
net.sortModules()
Ejemplo n.º 43
0
# create a dataset object, make output Y a softmax matrix
allData = SupervisedDataSet(n, numLabels)
Y2 = convertToOneOfMany(Y)

# add data samples to dataset object, both ways are correct
'''for i in range(m):
    inData = X[i,:]
    outData = Y2[i, :]
    allData.addSample(inData, outData)
'''
allData.setField('input', X)
allData.setField('target', Y2)

#separate training and testing data
dataTrain, dataTest = allData.splitWithProportion(.9)

# create object for training
train = BackpropTrainer(net,
                        dataset=dataTrain,
                        learningrate=0.03,
                        momentum=0.3)
#train.trainUntilConvergence(dataset=dataTrain)

# evaluate correct output for trainer
trueTrain = dataTrain['target'].argmax(axis=1)
trueTest = dataTest['target'].argmax(axis=1)

# train step by step
EPOCHS = 60
Ejemplo n.º 44
0
# make neural network come into effect
fnn.sortModules()

# definite the dataset as two input , one output
DS = SupervisedDataSet(1, 1)

# add data element to the dataset
for i in np.arange(199):
    DS.addSample(u[i], y[i])

# you can get your input/output this way
X = DS['input']
Y = DS['target']

# split the dataset into train dataset and test dataset
dataTrain, dataTest = DS.splitWithProportion(0.8)
xTrain, yTrain = dataTrain['input'], dataTrain['target']
xTest, yTest = dataTest['input'], dataTest['target']

# train the NN
# we use BP Algorithm
# verbose = True means print th total error
trainer = BackpropTrainer(fnn, dataTrain, verbose=True, learningrate=0.0001)
# set the epoch times to make the NN  fit
trainer.trainUntilConvergence(maxEpochs=10000)

# prediction = fnn.activate(xTest[1])
# print("the prediction number is :",prediction," the real number is:  ",yTest[1])
predict_resutl = []
for i in np.arange(len(xTest)):
    predict_resutl.append(fnn.activate(xTest[i])[0])
Ejemplo n.º 45
0
            # If the left-most bit of player_slot is set,
            # this player is on dire, so push the index accordingly
            player_slot = player['player_slot']
            if player_slot < 128:
                hero_id += NUM_HEROES

            x[hero_id] = 1.0

        ds.addSample(x, y)
        pbar.update(i)

    pbar.finish()
    print "Dataset built"
    print "Radiant {0}; Dire {1}".format(r, d)

    train_ds, test_ds = ds.splitWithProportion(1 - VALIDATION_RATIO - TEST_RATIO)
    valid_ds, test_ds = test_ds.splitWithProportion(VALIDATION_RATIO / (VALIDATION_RATIO + TEST_RATIO))
    test_ds.saveToFile(TEST_FILE_NAME)
    valid_ds.saveToFile(VALIDATION_FILE_NAME)
    train_ds.saveToFile(TRAIN_FILE_NAME)
    print "Training, validation and test dataset built"

# Network manipulation
if isfile(NETWORK_TEMP_FILE_NAME) and isfile(NETWORK_VAL_FILE_NAME):
    net = NetworkReader.readFrom(NETWORK_TEMP_FILE_NAME)
    trainer = BackpropTrainer(net, train_ds, learningrate = 0.05)
    with open(NETWORK_VAL_FILE_NAME, "rb") as f:
        epoch, additional_left, best = load(f)
    print "Network loaded with best averge validation error {0}".format(best)
else:
    net = buildNetwork(NUM_FEATURES, NUM_FEATURES + 100, 100, 20, 10, 1, \
Ejemplo n.º 46
0
def run():
    import scipy
    from scipy import linalg

    f = open("modelfitDatabase1.dat", "rb")
    import pickle

    dd = pickle.load(f)
    node = dd.children[13]

    rfs = node.children[0].data["ReversCorrelationRFs"]

    pred_act = numpy.array(node.children[0].data["ReversCorrelationPredictedActivities"])
    pred_val_act = numpy.array(node.children[0].data["ReversCorrelationPredictedValidationActivities"])

    training_set = node.data["training_set"]
    validation_set = node.data["validation_set"]
    training_inputs = node.data["training_inputs"]
    validation_inputs = node.data["validation_inputs"]

    ofs = contrib.modelfit.fit_sigmoids_to_of(numpy.mat(training_set), numpy.mat(pred_act))
    pred_act_t = contrib.modelfit.apply_sigmoid_output_function(numpy.mat(pred_act), ofs)
    pred_val_act_t = contrib.modelfit.apply_sigmoid_output_function(numpy.mat(pred_val_act), ofs)

    (sx, sy) = numpy.shape(rfs[0])
    print sx, sy
    n = FeedForwardNetwork()

    inLayer = LinearLayer(sx * sy)
    hiddenLayer = SigmoidLayer(4)
    outputLayer = SigmoidLayer(1)

    n.addInputModule(inLayer)
    n.addModule(hiddenLayer)
    n.addOutputModule(outputLayer)

    in_to_hidden = RBFConnection(sx, sy, inLayer, hiddenLayer)
    # in_to_hidden = FullConnection(inLayer, hiddenLayer)
    hidden_to_out = FullConnection(hiddenLayer, outputLayer)

    n.addConnection(in_to_hidden)
    n.addConnection(hidden_to_out)
    n.sortModules()
    gradientCheck(n)
    return

    from pybrain.datasets import SupervisedDataSet

    ds = SupervisedDataSet(sx * sy, 1)
    val = SupervisedDataSet(sx * sy, 1)

    for i in xrange(0, len(training_inputs)):
        ds.addSample(training_inputs[i], training_set[i, 0])

    for i in xrange(0, len(validation_inputs)):
        val.addSample(validation_inputs[i], validation_set[i, 0])

    tstdata, trndata = ds.splitWithProportion(0.1)

    from pybrain.supervised.trainers import BackpropTrainer

    trainer = BackpropTrainer(n, trndata, momentum=0.1, verbose=True, learningrate=0.002)

    training_set = numpy.array(numpy.mat(training_set)[:, 0])
    validation_set = numpy.array(numpy.mat(validation_set)[:, 0])
    pred_val_act_t = numpy.array(numpy.mat(pred_val_act_t)[:, 0])

    out = n.activateOnDataset(val)
    (ranks, correct, pred) = contrib.modelfit.performIdentification(validation_set, out)
    print "Correct:", correct, "Mean rank:", numpy.mean(ranks), "MSE", numpy.mean(numpy.power(validation_set - out, 2))

    print "Start training"
    for i in range(50):
        trnresult = percentError(trainer.testOnData(), trndata)
        tstresult = percentError(trainer.testOnData(dataset=tstdata), tstdata)

        print "epoch: %4d" % trainer.totalepochs, "  train error: %5.2f%%" % trnresult, "  test error: %5.2f%%" % tstresult
        trainer.trainEpochs(1)

        out = n.activateOnDataset(val)
        (ranks, correct, pred) = contrib.modelfit.performIdentification(validation_set, out)
        print "Correct:", correct, "Mean rank:", numpy.mean(ranks), "MSE", numpy.mean(
            numpy.power(validation_set - out, 2)
        )

    out = n.activateOnDataset(val)

    print numpy.shape(out)
    print numpy.shape(validation_set)

    (ranks, correct, pred) = contrib.modelfit.performIdentification(validation_set, out)
    print "Correct:", correct, "Mean rank:", numpy.mean(ranks), "MSE", numpy.mean(numpy.power(validation_set - out, 2))

    (ranks, correct, pred) = contrib.modelfit.performIdentification(validation_set, pred_val_act_t)
    print "Correct:", correct, "Mean rank:", numpy.mean(ranks), "MSE", numpy.mean(
        numpy.power(validation_set - pred_val_act_t, 2)
    )

    return n
Ejemplo n.º 47
0
data_file = fitsio.FITS('dr7qso.fit')[1].read()

alldata = SupervisedDataSet(5, 1)
length = len(data_file['UMAG'])

#for i in range(NUM_DATA):
for i in range(length):
    umag = data_file['UMAG'][i]
    gmag = data_file['GMAG'][i]
    rmag = data_file['RMAG'][i]
    imag = data_file['IMAG'][i]
    zmag = data_file['ZMAG'][i]
    redshift = data_file['z'][i]
    alldata.addSample((umag, gmag, rmag, imag, zmag), (redshift, ))

trainval_ds, test_ds = alldata.splitWithProportion(0.8)
train_ds, val_ds = trainval_ds.splitWithProportion(0.75)

print "Train, validation, test:", len(train_ds), len(val_ds), len(test_ds)

ns = {}
min_error = -1
min_h = -1

# use validation to form 4-layer network with two hidden layers,
# with (2n + 1) nodes in the first
if not CACHED:
    for h2 in range(1, 5):
        start = time.time()
        print "h2 nodes:", h2
Ejemplo n.º 48
0
fonet = open('net', 'w')

#load mat from matlab
mat = sio.loadmat('Features.mat')
#print(mat)
X = mat['X']
y = mat['y']
length = X.shape[0]

#set data
alldata = SupervisedDataSet(14, 7)
for n in arange(0, length):
    alldata.appendLinked(X[n], y[n])

#split data into test data and training data
tstdata, trndata = alldata.splitWithProportion(0.25)

#build network
fnn = buildNetwork(trndata.indim,
                   100,
                   trndata.outdim,
                   outclass=SigmoidLayer,
                   fast=True)
#print fnn

#build trainer
trainer = BackpropTrainer(fnn,
                          dataset=trndata,
                          momentum=0.0,
                          verbose=True,
                          weightdecay=0.0)
Ejemplo n.º 49
0
        trainIn = []
        for x in row[:numberOfInputs]:
            trainIn.append(x)

        trainOut = []
        for x in row[numberOfInputs:]:
            trainOut.append(x)

        d.appendLinked(trainIn, trainOut)

    # build a neural network with the second parameter being the number of hidden layers
    n = buildNetwork(d.indim, 3, d.outdim, recurrent=True)

    # configure the trainer
    t = BackpropTrainer(n, learningrate=0.01, momentum=0.99, verbose=True)

    # split the data randomly into 75% training - 25% testing
    train, test = d.splitWithProportion(0.75)
    print "{} - {}".format(len(train), len(test))

    # train the data with n number of epochs
    t.trainOnDataset(train, 10)

    # test the data with the remaining data
    t.testOnData(test, verbose=True)

    # try the same test but with a different method
    net = buildNetwork(d.indim, 3, d.outdim, bias=True, hiddenclass=TanhLayer)
    trainer = BackpropTrainer(net, d)
    trainer.trainUntilConvergence(verbose=True)
Ejemplo n.º 50
0
x2=[(x-min(x2))/(max(x2)-min(x2)) for x in x2]
x3=[(x-min(x3))/(max(x3)-min(x3)) for x in x3]
y=[(x-min(y))/(max(y)-min(y)) for x in y]

# transform x y be array format
x=np.array([x1,x2,x3]).T
y=np.array(y)
xdim=x.shape[1]  #input dimention
ydim=1  #output dimention

# create supervise dataset
DS=SupervisedDataSet(xdim,ydim);
for i in range(num):
    DS.addSample(x[i],y[i])

train,test=DS.splitWithProportion(0.75)
# DS['input']      value of input x
# DS['target']    value of output y
# DS.clear()     clear data

# create nerve net
ann=buildNetwork(xdim,10,5,ydim,hiddenclass=TanhLayer,outclass=LinearLayer)
# BP train
trainer=BackpropTrainer(ann,dataset=train,learningrate=0.1,momentum=0.1,verbose=True)
# trainer.trainEpochs(epochs=20) #times of training
trainer.trainUntilConvergence(maxEpochs=50) #times of training

# forecast and draw
# forecast test
output=ann.activateOnDataset(test);
# ann.activate(onedata) can only test one
Ejemplo n.º 51
0
from pybrain.structure import RecurrentNetwork
from pybrain.structure import LinearLayer, LSTMLayer, SoftmaxLayer, SigmoidLayer
from pybrain.structure import FullConnection
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.datasets import SupervisedDataSet
from pybrain.tools.xml.networkwriter import NetworkWriter
from pybrain.utilities import percentError
import data_parsing
from scipy import array,where

training_data = []
ds = SupervisedDataSet(28,39)
training_data = data_parsing.conversion_to_one_hot_representation()
ds = data_parsing.conversion_to_pybrain_dataset_format(training_data)
test, train = ds.splitWithProportion( 0.25 )


n = RecurrentNetwork()

input1 = LinearLayer(28)
hidden1 = LSTMLayer(512)
hidden2 = LSTMLayer(512)
hidden3 = LSTMLayer(128)
output1 = SigmoidLayer(39)
output2 = LinearLayer(39)

n.addInputModule(input1)
n.addModule(hidden1)
n.addModule(hidden2)
n.addModule(hidden3)
n.addModule(output1)
ltc = d.ltc.combine("LTC")

# take our ltc dataframe, and get targets (prices in next 10 minutes)
# in the form of compound return prices (other options are "PRICES",
# which are raw price movements)
dataset, tgt = dtools.gen_ds(ltc, 1, ltc_opts, "CRT")

# initialize a pybrain dataset
DS = SupervisedDataSet(len(dataset.values[0]), np.size(tgt.values[0]))

# fill it
for i in xrange(len(dataset)):
    DS.appendLinked(dataset.values[i], [tgt.values[i]])

# split 70% for training, 30% for testing
train_set, test_set = DS.splitWithProportion(0.7)

# build our recurrent network with 10 hidden neurodes, one recurrent
# connection, using tanh activation functions
net = RecurrentNetwork()
hidden_neurodes = 10
net.addInputModule(LinearLayer(len(train_set["input"][0]), name="in"))
net.addModule(TanhLayer(hidden_neurodes, name="hidden1"))
net.addOutputModule(LinearLayer(len(train_set["target"][0]), name="out"))
net.addConnection(FullConnection(net["in"], net["hidden1"], name="c1"))
net.addConnection(FullConnection(net["hidden1"], net["out"], name="c2"))
net.addRecurrentConnection(FullConnection(net["out"], net["hidden1"], name="cout"))
net.sortModules()
net.randomize()

# train for 30 epochs (overkill) using the rprop- training algorithm
num_trials, num_features = X_successful.shape
alldata = SupervisedDataSet(num_features,1) 

stimalldata = SupervisedDataSet(num_features,1)

# add the features and class labels into the dataset
for xnum in xrange(num_trials): 
    alldata.addSample(X_successful[xnum,:],y_successful[xnum])

# add the features and dummy class labels into the stim dataset
for xnum in xrange(len(ind_successful_stress_stim)):
	stimalldata.addSample(X_successful_stim[xnum,:],y_successful_stim[xnum])

# split the data into testing and training data
tstdata_temp, trndata_temp = alldata.splitWithProportion(0.15)

# small bug with _convertToOneOfMany function.  This fixes that
tstdata = ClassificationDataSet(num_features,1,nb_classes=2)
for n in xrange(0, tstdata_temp.getLength()):
    tstdata.addSample(tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1])

trndata = ClassificationDataSet(num_features,1,nb_classes=2)
for n in xrange(0,trndata_temp.getLength()):
    trndata.addSample(trndata_temp.getSample(n)[0],trndata_temp.getSample(n)[1])

valdata = ClassificationDataSet(num_features,1,nb_classes=2)
for n in xrange(0,stimalldata.getLength()):
    valdata.addSample(stimalldata.getSample(n)[0],stimalldata.getSample(n)[1])

# organizes dataset for pybrain
ltc = d.ltc.combine("LTC")

# take our ltc dataframe, and get targets (prices in next 10 minutes)
# in the form of compound return prices (other options are "PRICES",
# which are raw price movements)
dataset, tgt = dtools.gen_ds(ltc, 1, ltc_opts, "CRT")

# initialize a pybrain dataset
DS = SupervisedDataSet(len(dataset.values[0]), np.size(tgt.values[0]))

# fill it
for i in xrange(len(dataset)):
    DS.appendLinked(dataset.values[i], [tgt.values[i]])

# split 70% for training, 30% for testing
train_set, test_set = DS.splitWithProportion(.7)

# build our recurrent network with 10 hidden neurodes, one recurrent
# connection, using tanh activation functions
net = RecurrentNetwork()
hidden_neurodes = 10
net.addInputModule(LinearLayer(len(train_set["input"][0]), name="in"))
net.addModule(TanhLayer(hidden_neurodes, name="hidden1"))
net.addOutputModule(LinearLayer(len(train_set["target"][0]), name="out"))
net.addConnection(FullConnection(net["in"], net["hidden1"], name="c1"))
net.addConnection(FullConnection(net["hidden1"], net["out"], name="c2"))
net.addRecurrentConnection(
    FullConnection(net["out"], net["hidden1"], name="cout"))
net.sortModules()
net.randomize()
Ejemplo n.º 55
0
# buying       vhigh, high, med, low
# maint        vhigh, high, med, low
# doors        2, 3, 4, 5more
# persons      2, 4, more
# lug_boot     small, med, big
# safety       low, med, high

with open(fname, "r") as f:
    reader = csv.reader(f)
    for row in reader:
        sample = (price[row[0]], price[row[1]], doors[row[2]], persons[row[3]],
                  lug_boot[row[4]], safety[row[5]])
        ds.addSample(sample, result[row[6]])

tst_ds, trn_ds = ds.splitWithProportion(0.2)

# print "train data"
# for inpt, target in trn_ds:
#     print inpt, target

# print "test data"
# for inpt, target in tst_ds:
#     print inpt, target

# More information about trainers: http://pybrain.org/docs/api/supervised/trainers.html

print "Training started"

trainer.trainOnDataset(trn_ds, 10)
Ejemplo n.º 56
0
# center the data around the origin to
# make poses in different locations look the same
sittingData = centrePose(sittingData)
uprightData = centrePose(uprightData)

dataSet = SupervisedDataSet(18,1)

# 0 target is sitting
# 1 target is upright
for i in xrange(sittingData.shape[2]):
	dataSet.addSample((sittingData[:,:,i].flatten()),(0,))
for i in xrange(uprightData.shape[2]):
	dataSet.addSample((uprightData[:,:,i].flatten()),(1,))

testSet, trainingSet = dataSet.splitWithProportion(0.25)
testSet.saveToFile(rootPath+"/data/testSet")
trainingSet.saveToFile(rootPath+"/data/trainingSet")


for i in xrange(2,19):
	print "training network with " + str(i) + " neurons"
	network = buildNetwork(18,i,1)
	trainer = BackpropTrainer(network,dataset=trainingSet, 
		momentum=0.1, verbose=True, weightdecay=0.01)
	trainer.trainUntilConvergence(dataset=None,maxEpochs = 40,
		verbose = True, continueEpochs=10,validationProportion=0.25)

	# save the network
	networkOutFile = open(rootPath+"/networks/network"+str(i)+".pkl","w")
	pickle.dump(network, networkOutFile)
Ejemplo n.º 57
0
#To have a nice dataset for visualization, we produce a set of
# points in 2D belonging to three different classes. You could
# also read in your data from a file, e.g. using pylab.load().
# means = [(-1,0),(2,4),(3,1)]
# cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])]
# alldata = ClassificationDataSet(2, 1, nb_classes=3)
# for n in xrange(400):
#     for klass in range(3):
#         input = multivariate_normal(means[klass],cov[klass])
#         alldata.addSample(input, [klass])


#Randomly split the dataset into 75% training and 25% test
# data sets. Of course, we could also have created two different
# datasets to begin with.
tstdata, trndata = alldata.splitWithProportion( 0.25 )

#For neural network classification, it is highly advisable
# to encode classes with one output neuron per class.
# Note that this operation duplicates the original targets
# and stores them in an (integer) field named 'class'
trndata._convertToOneOfMany()
tstdata._convertToOneOfMany()

#Test our dataset by printing a little information about it.
print "Number of training patterns: ", len(trndata)
print "Input and output dimensions: ", trndata.indim, trndata.outdim
print "First sample (input, target, class):"
print trndata['input'][0], trndata['target'][0], trndata['class'][0]
print alldata
Ejemplo n.º 58
0
from pybrain.structure import FullConnection

n.addInputModule(SigmoidLayer(4, name='in'))
n.addModule(SigmoidLayer(3, name='hidden'))
n.addOutputModule(LinearLayer(1, name='out'))
n.addConnection(FullConnection(n['in'], n['hidden'], name='c1'))
n.addConnection(FullConnection(n['hidden'], n['out'], name='c2'))

n.sortModules() #initialisation


## ----------------------- Trainer ---------------------------- ##

from pybrain.supervised.trainers import BackpropTrainer

tstdata, trndata = DS.splitWithProportion(0.25)

# print len(tstdata)
# print len(trndata)

trainer = BackpropTrainer(n, DS, learningrate=0.1, momentum=0.5, weightdecay=0.0001)
trainer.trainUntilConvergence(verbose=True, maxEpochs=100)

# print trainer.trainUntilConvergence()
# trainer.trainOnDataset(trndata, 100)

#print n.activate((2, 1, 3, 0))
#print n.activate((2, 1, 3, 90))

## ----------------------- Results & Performance mesurements ---------------------------- ##
Ejemplo n.º 59
0
            continue
        purchase_Y = []
        for j in range(oo):
            purchase_Y.append(Y1[i + j])
        X = [0 for k in range(11)]
        for j in range(7):
            X[j] = Y1[i - j - 1]
    
        for j in range(4):
            X[7 + j] = Y1[i - j*7 - 14]
        DS.addSample(X,purchase_Y)

    X = DS['input']
    Y = DS['target']

    dataTrain,dataTest = DS.splitWithProportion(0.8)
    xTrain, yTrain = dataTrain['input'],dataTrain['target']
    xTest, yTest = dataTest['input'],dataTest['target']


    trainer = BackpropTrainer(fnn,dataTrain,verbose = True,learningrate = 0.01)
    trnerr,valerr = trainer.trainUntilConvergence(maxEpochs = 100)
    out = fnn.activateOnDataset(dataTest)
    i = 0
 
    out = SupervisedDataSet(11,oo)
    temp = [0 for j in range(oo)]
    d = len(purchase)
    test = [0 for j in range(11)]    
    for j in range(5):
        test[j] = Y1[d - j - 1]