def train(self, xmlfile_list): """ Calculate the column identification model given the columned xmlfile list """ parser = etree.XMLParser(recover=True) featpile = [] labelpile = [] print("loading") for xmlfile in xmlfile_list: root = etree.parse(xmlfile, parser).getroot() htmlnode = root[0] feats, labels = self.getAllFeature(htmlnode) if len(feats) > 0: featpile.append(self.formatFeature(feats)) labelpile.append(array([LABEL[k] for k in labels])) allFeats = vstack(featpile) allLabels = hstack(labelpile) print(len(allFeats)) print(len(allLabels)) print("load complete") models = {} for key in self.label_set: print("training", key) keyLabel = numpy.int8(allLabels == LABEL[key]) models[key] = adaboost.train(allFeats, keyLabel, balance_factor=15.0) return models
from scipy import * from zhiqiang import adaboost ##tset = array([[1,2,3],[0,5,1]]) ##lset = array([1,0]) ##param = adaboost.train(tset,lset) ##testset = array([[9,10,-1],[-1,2,3]]) ##pls = adaboost.predict(testset,param) from scipy import io dataset = io.loadmat('G:/dataset/mnist/mnist_all.mat') trainset = concatenate((dataset['train0'],dataset['train1'])) trainset = trainset.astype('float') (n1,m) = dataset['train1'].shape (n0,m) = dataset['train0'].shape labels = ones(n1+n0) labels[:n0] = -1 param = adaboost.train(trainset, labels, 2) testset = concatenate((dataset['test0'],dataset['test1'])) testset = testset.astype('float') pls = adaboost.predict(testset,param) tn0 = len(dataset['test0']) tn = len(testset) tls = ones(tn) tls[:tn0] = -1 print(sum(tls == pls))