def test(): DS = loadPybrainData() train, test = DS.splitWithProportion(0.1) fnn = joblib.load(PKL) # 预测test情况 output = fnn.activateOnDataset(test) # ann.activate(onedata)可以只对一个数据进行预测 outputs = [] target = [] count = 0 for out in output: outs = out.argmax() outputs.append(outs) for tar in test['target']: ta = tar.argmax() target.append(ta) for i in range(0, len(target)): if outputs[i] == target[i]: count += 1 right = count / len(target) #单个字符正确率 rate = (right**4) print("分类正确率是:%.4f%%" % (rate * 100)) v = Validator() print(u'均方和差为:', v.MSE(output, test['target'])) #计算test的原始值和预测值的均方差和,两者格式必须相等
def dataset_eval(dataset): """Return dataset hit rate and MSE""" # Transform output values to bit vectors, similar to the targets predicted = bit_array_transform(ff_network.activate(x) for x in dataset['input']) target = dataset['target'] # Lists of positions holding predicted and target classes to compare predicted_pos = [list(x).index(1) for x in predicted] target_pos = [list(x).index(1) for x in target] hits = Validator.classificationPerformance(predicted_pos, target_pos) mse = Validator.MSE(predicted, target) return hits, mse
def nn(): DS = ClassificationDataSet(28, 1, nb_classes=4) train = pickle.load(open('train_extracted_df.pkl', 'r')) y = train["median_relevance"] kfold_train_test = pickle.load(open('kfold_train_test.pkl', 'r')) features = ['query_tokens_in_title', 'query_tokens_in_description', 'percent_query_tokens_in_description', 'percent_query_tokens_in_title', 'query_length', 'description_length', 'title_length', 'two_grams_in_q_and_t', 'two_grams_in_q_and_d', 'q_mean_of_training_relevance', 'q_median_of_training_relevance', 'avg_relevance_variance', 'average_title_1gram_similarity_1', 'average_title_2gram_similarity_1', 'average_title_1gram_similarity_2', 'average_title_2gram_similarity_2', 'average_title_1gram_similarity_3', 'average_title_2gram_similarity_3', 'average_title_1gram_similarity_4', 'average_title_2gram_similarity_4', 'average_description_1gram_similarity_1', 'average_description_2gram_similarity_1', 'average_description_2gram_similarity_2', 'average_description_1gram_similarity_2', 'average_description_1gram_similarity_3', 'average_description_2gram_similarity_3', 'average_description_1gram_similarity_4', 'average_description_2gram_similarity_4'] train = train[features] for i in range(len(y)): DS.addSample(train.values[i],y[i]) X = DS['input'] Y = DS['target'] dataTrain, dataTest = DS.splitWithProportion(0.8) xTrain, yTrain = dataTrain['input'], dataTrain['target'] xTest, yTest = dataTest['input'], dataTest['target'] #fnn = RecurrentNetwork() fnn = FeedForwardNetwork() #fnn=buildNetwork(1,40,1,hiddenclass=TanhLayer, bias=True, outclass=SoftmaxLayer) #fnn=buildNetwork(1,40,1,hiddenclass=LSTMLayer, bias=True, outclass=SoftmaxLayer) inLayer = LinearLayer(28, name='inLayer') hiddenLayer = SigmoidLayer(40, name='hiddenLayer0') outLayer =LinearLayer(4, name='outLayer') fnn.addInputModule(inLayer) fnn.addModule(hiddenLayer) fnn.addOutputModule(outLayer) in_to_hidden = FullConnection(inLayer, hiddenLayer) hidden_to_out = FullConnection(hiddenLayer, outLayer) fnn.addConnection(in_to_hidden) fnn.addConnection(hidden_to_out) fnn.sortModules() trainer = BackpropTrainer(fnn, DS, verbose = True, learningrate=0.01) #trainer.trainUntilConvergence(maxEpochs=1000) trainer.trainEpochs(epochs=5) prediction = fnn.activateOnDataset(dataTest) out=[] total_score = 0 for i in prediction: class_index = max(xrange(len(i)), key=i.__getitem__) out.append(class_index+1) print str((class_index+1-yTest[class_index+1])/yTest[class_index+1]) df=pd.DataFrame(out,columns=['predict']) df['real']=dataTest['target'] coun = 0 for i,row in df.iterrows(): if row[0]== row[1]: coun+=1 print coun print "df['real']", df['real'],type(df['real'][0]) print "df['predict']",df['predict'],type(df['predict'][0]) print df v=Validator() #v.MSE(out,dataTest['target']) print "out",out print "dataTest['target']",dataTest['target']
def evalRNNOnSeqClassificationDataset(net, testing_dataset, verbose = False, silent = False): # Fetch targets and calculate the modules output on dataset. # Output and target are in one-of-many format. The class for each sequence is # determined by first summing the probabilities for each individual sample over # the sequence, and then finding its maximum. target = testing_dataset.getField("target") outputs = [] # print net for seq in testing_dataset._provideSequences(): net.reset() # print 'seq:' # print seq for i in xrange(len(seq)): output = net.activate(seq[i][0]) outputs.append(output.copy()) outputs = array(outputs) # determine last indices of the sequences inside dataset ends = SequenceHelper.getSequenceEnds(testing_dataset) ##format = "%d"*len(ends) summed_output = zeros(testing_dataset.outdim) # class_output and class_target will store class labels instead of # one-of-many values class_output = [] class_target = [] for j in xrange(len(outputs)): # sum up the output values of one sequence # print outputs[j] summed_output += outputs[j] # print j, output[j], " --> ", summed_output # if we reached the end of the sequence if j in ends: # print '------------------------------------------' # convert summed_output and target to class labels class_output.append(argmax(summed_output)) class_target.append(argmax(target[j])) # reset the summed_output to zeros summed_output = zeros(testing_dataset.outdim) ##print format % tuple(class_output) ##print format % tuple(class_target) class_output = array(class_output) class_target = array(class_target) # print class_target # print class_output accuracy = Validator.classificationPerformance(class_output, class_target) return (class_output, class_target, accuracy)
def testOnSequenceData(module, dataset): """ Fetch targets and calculate the modules output on dataset. Output and target are in one-of-many format. The class for each sequence is determined by argmax OF THE LAST ITEM IN THE SEQUENCE. """ target = dataset.getField("target") output = ModuleValidator.calculateModuleOutput(module, dataset) # determine last indices of the sequences inside dataset ends = SequenceHelper.getSequenceEnds(dataset) class_output = array([argmax(output[end]) for end in ends]) class_target = array([argmax(target[end]) for end in ends]) return Validator.classificationPerformance(class_output, class_target)
def __init__(self, evolino_network, dataset, **kwargs): """ @param evolino_network: an instance of NetworkWrapper() @param dataset: The evaluation dataset @param evalfunc: Compares output to target values and returns a scalar, denoting the fitness. Defaults to -mse(output, target). @param wtRatio: Float array of two values denoting the ratio between washout and training length. Defaults to [1,2] @param verbosity: Verbosity level. Defaults to 0 """ Filter.__init__(self) ap = KWArgsProcessor(self, kwargs) ap.add( 'verbosity', default=0 ) ap.add( 'evalfunc', default=lambda output, target: -Validator.MSE(output, target) ) ap.add( 'wtRatio', default=array([1,2], float) ) self.network = evolino_network self.dataset = dataset self.max_fitness = -Infinity
def runTraining(self, convergence=0, **kwargs): """ Trains the network on the stored dataset. If convergence is >0, check after that many epoch increments whether test error is going down again, and stop training accordingly. """ assert isinstance(self.Trainer, Trainer) if self.Graph is not None: self.Graph.setLabels(x='epoch', y='% classification error') self.Graph.setLegend(['training','test'],loc='lower right') epoch = 0 inc = self.epoinc best_error = 100.0 best_epoch = 0 learncurve_x = [0] learncurve_y = [0.0] valcurve_y = [0.0] converged = False convtest = 0 if convergence>0: logging.info("Convergence criterion: %d batches of %d epochs w/o improvement" % (convergence, inc)) while epoch<=self.maxepochs and not converged: self.Trainer.trainEpochs(inc) epoch+=inc learncurve_x.append(epoch) # calculate errors on TRAINING data if isinstance(self.DS, SequentialDataSet): r_trn = 100. * (1.0-testOnSequenceData(self.Trainer.module, self.DS)) else: # FIXME: messy - validation does not belong into the Trainer... out, trueclass = self.Trainer.testOnClassData(return_targets=True) r_trn = 100. * (1.0-Validator.classificationPerformance(out, trueclass)) learncurve_y.append(r_trn) if self.TDS is None: logging.info("epoch: %6d, err_trn: %5.2f%%" % (epoch, r_trn)) else: # calculate errors on TEST data if isinstance(self.DS, SequentialDataSet): r_tst = 100. * (1.0-testOnSequenceData(self.Trainer.module, self.TDS)) else: # FIXME: messy - validation does not belong into the Trainer... out, trueclass = self.Trainer.testOnClassData(return_targets=True, dataset=self.TDS) r_tst = 100. * (1.0-Validator.classificationPerformance(out, trueclass)) valcurve_y.append(r_tst) if r_tst < best_error: best_epoch = epoch best_error = r_tst bestweights = self.Trainer.module.params.copy() convtest = 0 else: convtest += 1 logging.info("epoch: %6d, err_trn: %5.2f%%, err_tst: %5.2f%%, best_tst: %5.2f%%" % (epoch, r_trn, r_tst, best_error)) if self.Graph is not None: self.Graph.addData(1, epoch, r_tst) # check if convegence criterion is fulfilled (no improvement after N epoincs) if convtest >= convergence: converged = True if self.Graph is not None: self.Graph.addData(0, epoch, r_trn) self.Graph.update() logging.info("Best epoch: %6d, with error: %5.2f%%" % (best_epoch, best_error)) if self.VDS is not None: # calculate errors on VALIDATION data self.Trainer.module.params[:] = bestweights.copy() if isinstance(self.DS, SequentialDataSet): r_val = 100. * (1.0-testOnSequenceData(self.Trainer.module, self.VDS)) else: out, trueclass = self.Trainer.testOnClassData(return_targets=True, dataset=self.VDS) r_val = 100. * (1.0-Validator.classificationPerformance(out, trueclass)) logging.info("Result on evaluation data: %5.2f%%" % r_val) self.trainCurve = (learncurve_x, learncurve_y, valcurve_y)
pylab.plot(tsts['input'], ctsts, c='g') pylab.xlabel('x') pylab.ylabel('y') pylab.title('Neuron Number:' + str(nneuron)) pylab.grid(True) plotname = os.path.join(plotdir, ('jpq2layers_plot' + str(iter))) pylab.savefig(plotname) # set-up the neural network nneuron = 5 mom = 0.98 netname = "LSL-" + str(nneuron) + "-" + str(mom) mv = ModuleValidator() v = Validator() #create the test DataSet x = numpy.arange(0.0, 1.0 + 0.01, 0.01) s = 0.5 + 0.4 * numpy.sin(2 * numpy.pi * x) tsts = SupervisedDataSet(1, 1) tsts.setField('input', x.reshape(len(x), 1)) tsts.setField('target', s.reshape(len(s), 1)) #read the train DataSet from file trndata = SupervisedDataSet.loadFromFile(os.path.join(os.getcwd(), 'trndata')) myneuralnet = os.path.join(os.getcwd(), 'myneuralnet.xml') if os.path.isfile(myneuralnet): n = NetworkReader.readFrom(myneuralnet, name=netname) #calculate the test DataSet based on the trained Neural Network ctsts = mv.calculateModuleOutput(n, tsts)
pylab.plot(tsts['input'], ctsts, c='g') pylab.xlabel('x') pylab.ylabel('y') pylab.title('Neuron Number:' + str(nneuron)) pylab.grid(True) plotname = os.path.join(plotdir, ('jpq2layers_plot' + str(iter))) pylab.savefig(plotname) # set-up the neural network nneuron = 5 mom = 0.98 netname = "LSL-" + str(nneuron) + "-" + str(mom) mv = ModuleValidator() v = Validator() n = FeedForwardNetwork(name=netname) inLayer = LinearLayer(1, name='in') hiddenLayer = SigmoidLayer(nneuron, name='hidden0') outLayer = LinearLayer(1, name='out') biasinUnit = BiasUnit(name="bhidden0") biasoutUnit = BiasUnit(name="bout") n.addInputModule(inLayer) n.addModule(hiddenLayer) n.addModule(biasinUnit) n.addModule(biasoutUnit) n.addOutputModule(outLayer) in_to_hidden = FullConnection(inLayer, hiddenLayer) bias_to_hidden = FullConnection(biasinUnit, hiddenLayer) bias_to_out = FullConnection(biasoutUnit, outLayer) hidden_to_out = FullConnection(hiddenLayer, outLayer)