예제 #1
0
def test():
    DS = loadPybrainData()
    train, test = DS.splitWithProportion(0.1)
    fnn = joblib.load(PKL)
    # 预测test情况
    output = fnn.activateOnDataset(test)
    # ann.activate(onedata)可以只对一个数据进行预测
    outputs = []
    target = []
    count = 0
    for out in output:
        outs = out.argmax()
        outputs.append(outs)
    for tar in test['target']:
        ta = tar.argmax()
        target.append(ta)
    for i in range(0, len(target)):
        if outputs[i] == target[i]:
            count += 1

    right = count / len(target)  #单个字符正确率
    rate = (right**4)
    print("分类正确率是:%.4f%%" % (rate * 100))
    v = Validator()
    print(u'均方和差为:', v.MSE(output,
                           test['target']))  #计算test的原始值和预测值的均方差和,两者格式必须相等
예제 #2
0
    def dataset_eval(dataset):
        """Return dataset hit rate and MSE"""
        # Transform output values to bit vectors, similar to the targets
        predicted = bit_array_transform(ff_network.activate(x)
                                        for x in dataset['input'])
        target = dataset['target']

        # Lists of positions holding predicted and target classes to compare
        predicted_pos = [list(x).index(1) for x in predicted]
        target_pos = [list(x).index(1) for x in target]

        hits = Validator.classificationPerformance(predicted_pos, target_pos)
        mse = Validator.MSE(predicted, target)
        return hits, mse
예제 #3
0
def nn():
    DS = ClassificationDataSet(28, 1, nb_classes=4)
    train = pickle.load(open('train_extracted_df.pkl', 'r'))
    y = train["median_relevance"]
    kfold_train_test = pickle.load(open('kfold_train_test.pkl', 'r'))
    features = ['query_tokens_in_title', 'query_tokens_in_description', 'percent_query_tokens_in_description', 'percent_query_tokens_in_title', 'query_length', 'description_length', 'title_length', 'two_grams_in_q_and_t', 'two_grams_in_q_and_d', 'q_mean_of_training_relevance', 'q_median_of_training_relevance', 'avg_relevance_variance', 'average_title_1gram_similarity_1', 'average_title_2gram_similarity_1', 'average_title_1gram_similarity_2', 'average_title_2gram_similarity_2', 'average_title_1gram_similarity_3', 'average_title_2gram_similarity_3', 'average_title_1gram_similarity_4', 'average_title_2gram_similarity_4', 'average_description_1gram_similarity_1', 'average_description_2gram_similarity_1', 'average_description_2gram_similarity_2', 'average_description_1gram_similarity_2', 'average_description_1gram_similarity_3', 'average_description_2gram_similarity_3', 'average_description_1gram_similarity_4', 'average_description_2gram_similarity_4']
    train = train[features]
    for i in range(len(y)):
        DS.addSample(train.values[i],y[i])
     X = DS['input']
    Y = DS['target']
    dataTrain, dataTest = DS.splitWithProportion(0.8)
    xTrain, yTrain = dataTrain['input'], dataTrain['target']
    xTest, yTest = dataTest['input'], dataTest['target']
    #fnn = RecurrentNetwork()
    fnn = FeedForwardNetwork()
    #fnn=buildNetwork(1,40,1,hiddenclass=TanhLayer, bias=True, outclass=SoftmaxLayer)
    #fnn=buildNetwork(1,40,1,hiddenclass=LSTMLayer, bias=True, outclass=SoftmaxLayer)
    inLayer = LinearLayer(28, name='inLayer')
    hiddenLayer = SigmoidLayer(40, name='hiddenLayer0')
    outLayer =LinearLayer(4, name='outLayer')

    fnn.addInputModule(inLayer)
    fnn.addModule(hiddenLayer)
    fnn.addOutputModule(outLayer)

    in_to_hidden = FullConnection(inLayer, hiddenLayer)
    hidden_to_out = FullConnection(hiddenLayer, outLayer)

    fnn.addConnection(in_to_hidden)
    fnn.addConnection(hidden_to_out)
    fnn.sortModules()

    trainer = BackpropTrainer(fnn, DS, verbose = True, learningrate=0.01)
    #trainer.trainUntilConvergence(maxEpochs=1000)
    trainer.trainEpochs(epochs=5)
    prediction = fnn.activateOnDataset(dataTest)
    out=[]
    total_score = 0
    for i in prediction:
        class_index = max(xrange(len(i)), key=i.__getitem__)
        out.append(class_index+1)
        print str((class_index+1-yTest[class_index+1])/yTest[class_index+1])
    df=pd.DataFrame(out,columns=['predict'])
    df['real']=dataTest['target']
    coun = 0
    for i,row in df.iterrows():
        if  row[0]== row[1]:
            coun+=1
    print coun
    print "df['real']", df['real'],type(df['real'][0])
    print "df['predict']",df['predict'],type(df['predict'][0])
    print df

    v=Validator()
    #v.MSE(out,dataTest['target'])
    print "out",out
    print "dataTest['target']",dataTest['target']
예제 #4
0
def evalRNNOnSeqClassificationDataset(net, testing_dataset, verbose = False, silent = False):
   
    # Fetch targets and calculate the modules output on dataset.
    # Output and target are in one-of-many format. The class for each sequence is
    # determined by first summing the probabilities for each individual sample over
    # the sequence, and then finding its maximum.
    target = testing_dataset.getField("target")
    
    outputs = []
    
    # print net
    
    for seq in testing_dataset._provideSequences():
        net.reset()
        # print 'seq:'
        # print seq
        for i in xrange(len(seq)):
            output = net.activate(seq[i][0])
            outputs.append(output.copy())
    outputs = array(outputs)
    
    

    # determine last indices of the sequences inside dataset
    ends = SequenceHelper.getSequenceEnds(testing_dataset)
    ##format = "%d"*len(ends)
    summed_output = zeros(testing_dataset.outdim)
    # class_output and class_target will store class labels instead of
    # one-of-many values
    class_output = []
    class_target = []
    for j in xrange(len(outputs)):
        # sum up the output values of one sequence
        # print outputs[j]
        summed_output += outputs[j]
#            print j, output[j], " --> ", summed_output
        # if we reached the end of the sequence
        if j in ends:
            # print '------------------------------------------'
            # convert summed_output and target to class labels
            class_output.append(argmax(summed_output))
            class_target.append(argmax(target[j]))

            # reset the summed_output to zeros
            summed_output = zeros(testing_dataset.outdim)

    ##print format % tuple(class_output)
    ##print format % tuple(class_target)

    class_output = array(class_output)
    class_target = array(class_target)
#    print class_target
#    print class_output
    accuracy =  Validator.classificationPerformance(class_output, class_target)
    return (class_output, class_target, accuracy)
예제 #5
0
def testOnSequenceData(module, dataset):
    """
    Fetch targets and calculate the modules output on dataset.
    Output and target are in one-of-many format. The class for each sequence is
    determined by argmax OF THE LAST ITEM IN THE SEQUENCE.
    """
    target = dataset.getField("target")
    output = ModuleValidator.calculateModuleOutput(module, dataset)

    # determine last indices of the sequences inside dataset
    ends = SequenceHelper.getSequenceEnds(dataset)

    class_output = array([argmax(output[end]) for end in ends])
    class_target = array([argmax(target[end]) for end in ends])

    return Validator.classificationPerformance(class_output, class_target)
예제 #6
0
    def __init__(self, evolino_network, dataset, **kwargs):
        """ @param evolino_network: an instance of NetworkWrapper()
            @param dataset: The evaluation dataset
            @param evalfunc: Compares output to target values and returns a scalar, denoting the fitness.
                             Defaults to -mse(output, target).
            @param wtRatio: Float array of two values denoting the ratio between washout and training length.
                            Defaults to [1,2]
            @param verbosity: Verbosity level. Defaults to 0
        """
        Filter.__init__(self)
        ap = KWArgsProcessor(self, kwargs)

        ap.add( 'verbosity', default=0 )
        ap.add( 'evalfunc',  default=lambda output, target: -Validator.MSE(output, target) )
        ap.add( 'wtRatio',   default=array([1,2], float) )

        self.network = evolino_network
        self.dataset = dataset
        self.max_fitness = -Infinity
예제 #7
0
파일: neuralnets.py 프로젝트: HKou/pybrain
    def runTraining(self, convergence=0, **kwargs):
        """ Trains the network on the stored dataset. If convergence is >0, check after that many epoch increments
        whether test error is going down again, and stop training accordingly. """
        assert isinstance(self.Trainer, Trainer)
        if self.Graph is not None:
            self.Graph.setLabels(x='epoch', y='% classification error')
            self.Graph.setLegend(['training','test'],loc='lower right')
        epoch = 0
        inc = self.epoinc
        best_error = 100.0
        best_epoch = 0
        learncurve_x = [0]
        learncurve_y = [0.0]
        valcurve_y = [0.0]
        converged = False
        convtest = 0
        if convergence>0:
            logging.info("Convergence criterion: %d batches of %d epochs w/o improvement" % (convergence, inc))
        while epoch<=self.maxepochs and not converged:
            self.Trainer.trainEpochs(inc)
            epoch+=inc
            learncurve_x.append(epoch)
            # calculate errors on TRAINING data
            if isinstance(self.DS, SequentialDataSet):
                r_trn = 100. * (1.0-testOnSequenceData(self.Trainer.module, self.DS))
            else:
                # FIXME: messy - validation does not belong into the Trainer...
                out, trueclass = self.Trainer.testOnClassData(return_targets=True)
                r_trn = 100. * (1.0-Validator.classificationPerformance(out, trueclass))
            learncurve_y.append(r_trn)
            if self.TDS is None:
                logging.info("epoch: %6d,  err_trn: %5.2f%%" % (epoch, r_trn))
            else:
                # calculate errors on TEST data
                if isinstance(self.DS, SequentialDataSet):
                    r_tst = 100. * (1.0-testOnSequenceData(self.Trainer.module, self.TDS))
                else:
                    # FIXME: messy - validation does not belong into the Trainer...
                    out, trueclass = self.Trainer.testOnClassData(return_targets=True, dataset=self.TDS)
                    r_tst = 100. * (1.0-Validator.classificationPerformance(out, trueclass))
                valcurve_y.append(r_tst)
                if r_tst < best_error:
                    best_epoch = epoch
                    best_error = r_tst
                    bestweights = self.Trainer.module.params.copy()
                    convtest = 0
                else:
                    convtest += 1
                logging.info("epoch: %6d,  err_trn: %5.2f%%,  err_tst: %5.2f%%,  best_tst: %5.2f%%" % (epoch, r_trn, r_tst, best_error))
                if self.Graph is not None:
                    self.Graph.addData(1, epoch, r_tst)
                    
                # check if convegence criterion is fulfilled (no improvement after N epoincs)
                if convtest >= convergence:
                    converged = True
                    
            if self.Graph is not None:
                self.Graph.addData(0, epoch, r_trn)
                self.Graph.update()

        logging.info("Best epoch: %6d, with error: %5.2f%%" % (best_epoch, best_error))
        if self.VDS is not None:
            # calculate errors on VALIDATION data
            self.Trainer.module.params[:] = bestweights.copy()
            if isinstance(self.DS, SequentialDataSet):
                r_val = 100. * (1.0-testOnSequenceData(self.Trainer.module, self.VDS))
            else:
                out, trueclass = self.Trainer.testOnClassData(return_targets=True, dataset=self.VDS)
                r_val = 100. * (1.0-Validator.classificationPerformance(out, trueclass))
            logging.info("Result on evaluation data: %5.2f%%" % r_val)
            
        self.trainCurve = (learncurve_x, learncurve_y, valcurve_y)
예제 #8
0
    def runTraining(self, convergence=0, **kwargs):
        """ Trains the network on the stored dataset. If convergence is >0, check after that many epoch increments
        whether test error is going down again, and stop training accordingly. """
        assert isinstance(self.Trainer, Trainer)
        if self.Graph is not None:
            self.Graph.setLabels(x='epoch', y='% classification error')
            self.Graph.setLegend(['training','test'],loc='lower right')
        epoch = 0
        inc = self.epoinc
        best_error = 100.0
        best_epoch = 0
        learncurve_x = [0]
        learncurve_y = [0.0]
        valcurve_y = [0.0]
        converged = False
        convtest = 0
        if convergence>0:
            logging.info("Convergence criterion: %d batches of %d epochs w/o improvement" % (convergence, inc))
        while epoch<=self.maxepochs and not converged:
            self.Trainer.trainEpochs(inc)
            epoch+=inc
            learncurve_x.append(epoch)
            # calculate errors on TRAINING data
            if isinstance(self.DS, SequentialDataSet):
                r_trn = 100. * (1.0-testOnSequenceData(self.Trainer.module, self.DS))
            else:
                # FIXME: messy - validation does not belong into the Trainer...
                out, trueclass = self.Trainer.testOnClassData(return_targets=True)
                r_trn = 100. * (1.0-Validator.classificationPerformance(out, trueclass))
            learncurve_y.append(r_trn)
            if self.TDS is None:
                logging.info("epoch: %6d,  err_trn: %5.2f%%" % (epoch, r_trn))
            else:
                # calculate errors on TEST data
                if isinstance(self.DS, SequentialDataSet):
                    r_tst = 100. * (1.0-testOnSequenceData(self.Trainer.module, self.TDS))
                else:
                    # FIXME: messy - validation does not belong into the Trainer...
                    out, trueclass = self.Trainer.testOnClassData(return_targets=True, dataset=self.TDS)
                    r_tst = 100. * (1.0-Validator.classificationPerformance(out, trueclass))
                valcurve_y.append(r_tst)
                if r_tst < best_error:
                    best_epoch = epoch
                    best_error = r_tst
                    bestweights = self.Trainer.module.params.copy()
                    convtest = 0
                else:
                    convtest += 1
                logging.info("epoch: %6d,  err_trn: %5.2f%%,  err_tst: %5.2f%%,  best_tst: %5.2f%%" % (epoch, r_trn, r_tst, best_error))
                if self.Graph is not None:
                    self.Graph.addData(1, epoch, r_tst)
                    
                # check if convegence criterion is fulfilled (no improvement after N epoincs)
                if convtest >= convergence:
                    converged = True
                    
            if self.Graph is not None:
                self.Graph.addData(0, epoch, r_trn)
                self.Graph.update()

        logging.info("Best epoch: %6d, with error: %5.2f%%" % (best_epoch, best_error))
        if self.VDS is not None:
            # calculate errors on VALIDATION data
            self.Trainer.module.params[:] = bestweights.copy()
            if isinstance(self.DS, SequentialDataSet):
                r_val = 100. * (1.0-testOnSequenceData(self.Trainer.module, self.VDS))
            else:
                out, trueclass = self.Trainer.testOnClassData(return_targets=True, dataset=self.VDS)
                r_val = 100. * (1.0-Validator.classificationPerformance(out, trueclass))
            logging.info("Result on evaluation data: %5.2f%%" % r_val)
            
        self.trainCurve = (learncurve_x, learncurve_y, valcurve_y)
예제 #9
0
        pylab.plot(tsts['input'], ctsts, c='g')

    pylab.xlabel('x')
    pylab.ylabel('y')
    pylab.title('Neuron Number:' + str(nneuron))
    pylab.grid(True)
    plotname = os.path.join(plotdir, ('jpq2layers_plot' + str(iter)))
    pylab.savefig(plotname)


# set-up the neural network
nneuron = 5
mom = 0.98
netname = "LSL-" + str(nneuron) + "-" + str(mom)
mv = ModuleValidator()
v = Validator()

#create the test DataSet
x = numpy.arange(0.0, 1.0 + 0.01, 0.01)
s = 0.5 + 0.4 * numpy.sin(2 * numpy.pi * x)
tsts = SupervisedDataSet(1, 1)
tsts.setField('input', x.reshape(len(x), 1))
tsts.setField('target', s.reshape(len(s), 1))
#read the train DataSet from file
trndata = SupervisedDataSet.loadFromFile(os.path.join(os.getcwd(), 'trndata'))

myneuralnet = os.path.join(os.getcwd(), 'myneuralnet.xml')
if os.path.isfile(myneuralnet):
    n = NetworkReader.readFrom(myneuralnet, name=netname)
    #calculate the test DataSet based on the trained Neural Network
    ctsts = mv.calculateModuleOutput(n, tsts)
예제 #10
0
        pylab.plot(tsts['input'], ctsts, c='g')

    pylab.xlabel('x')
    pylab.ylabel('y')
    pylab.title('Neuron Number:' + str(nneuron))
    pylab.grid(True)
    plotname = os.path.join(plotdir, ('jpq2layers_plot' + str(iter)))
    pylab.savefig(plotname)


# set-up the neural network
nneuron = 5
mom = 0.98
netname = "LSL-" + str(nneuron) + "-" + str(mom)
mv = ModuleValidator()
v = Validator()
n = FeedForwardNetwork(name=netname)
inLayer = LinearLayer(1, name='in')
hiddenLayer = SigmoidLayer(nneuron, name='hidden0')
outLayer = LinearLayer(1, name='out')
biasinUnit = BiasUnit(name="bhidden0")
biasoutUnit = BiasUnit(name="bout")
n.addInputModule(inLayer)
n.addModule(hiddenLayer)
n.addModule(biasinUnit)
n.addModule(biasoutUnit)
n.addOutputModule(outLayer)
in_to_hidden = FullConnection(inLayer, hiddenLayer)
bias_to_hidden = FullConnection(biasinUnit, hiddenLayer)
bias_to_out = FullConnection(biasoutUnit, outLayer)
hidden_to_out = FullConnection(hiddenLayer, outLayer)