def SelectAtt(examples): """ select the best attribute to split the examples, Based on IG """ length = len(examples[0]) p = [IG(examples, i + 1) for i in range(length - 1)] ans = ut.getMaxPos(p) #print p[ans[0]] return ans[0] + 1
def SelectAtt(examples, featureSet = []): """ select the best attribute to split the examples, Based on IG """ if len(featureSet) == 0: featureSet = range(len(examples[0]) - 1) p = [IG(examples, i + 1) for i in featureSet] ans = ut.getMaxPos(p) #print p[ans[0]] return ans[0] + 1
def getThreshold(egs, att): """ given egs and attribute, choose the `best` threshold for this att to construct binary decision tree """ if egs == []: return 0 tmp = [eg[att] for eg in egs] mn = min(tmp) delta = (max(tmp) - min(tmp)) / N tmp = [ig(egs, att, i * delta + mn) for i in range(N)] return ut.getMaxPos(tmp)[0] * delta + mn
def cross_validation(crossData, K = 3): """ given a cross training set return the error rate """ num_cross = len(crossData) totError = 0 totNum = 0 for i in range(num_cross): f = leastSquare(crossData[i][0]) for x in crossData[i][1]: totNum = totNum + 1 if ut.getMaxPos(f(x[1:]).tolist())[0] != x[0] - 1: totError = totError + 1 return (totError + 0.) / totNum
def training(self, trainData, mode = 'diag', subSpaceDim = 2): # num of classes K = len(trainData) # num of features D = len(trainData[1][0]) # mk = {} for k in range(1, K + 1): mk[k] = ut.getMean(trainData[k], 'array') # m = ut.getMean(reduce(ut.add, [trainData[k] for k in range(1, K + 1)])) # f = lambda k: len(trainData[k]) * np.dot( mk[k] - m, (mk[k] - m).transpose() ) S_B = reduce(ut.add, [f(k) for k in range(1, K - 1)]) # construct S_W S_W = [] Sk = range(K + 1) if mode == 'diag': S_W = np.diag([1] * D) else: g = lambda x, y: np.dot((x - y), (x - y).transpose()) for k in range(1, K + 1 ): Sk[k] = reduce(ut.add, [g(ut.ls2Vec(x), mk[k]) for x in trainData[k]]) S_W = reduce(ut.add, Sk[1:]) # [egs, vecs] = np.linalg.eigh(np.dot(np.linalg.inv(S_W), S_B)) pos = ut.getMaxPos(egs, subSpaceDim) W = ut.arrayExtract(vecs, pos) # now we will construct trainingData set in the # sub-feature space, then run Gaussian generative # method to train the new trainData set to get a # a classifier def projectFunc(v): tmp = np.dot(ut.ls2Vec(v).transpose(), W).tolist() return tmp[0] for k in range(K): trainData[k + 1] = map(projectFunc, trainData[k + 1]) self.projectFunc = projectFunc self.func = GaussianGM.GaussianGM(trainData)
def f(w, w0, x): tmp = [np.dot(ww.transpose(), ut.ls2Vec(x))[0, 0] + ww0 for (ww, ww0) in zip(w, w0)] tmp = ut.getMaxPos(tmp) return tmp[0] + 1