Beispiel #1
0
def SelectAtt(examples):
    """ select the best attribute 
        to split the examples,
        Based on IG
    """
    length = len(examples[0])
    p = [IG(examples, i + 1) for i in range(length - 1)]
    ans = ut.getMaxPos(p)
    #print p[ans[0]]
    return ans[0] + 1
Beispiel #2
0
def SelectAtt(examples, featureSet = []):
    """ select the best attribute 
        to split the examples,
        Based on IG
    """
    if len(featureSet) == 0:
        featureSet = range(len(examples[0]) - 1)
    p = [IG(examples, i + 1) for i in featureSet]
    ans = ut.getMaxPos(p)
    #print p[ans[0]]
    return ans[0] + 1
Beispiel #3
0
def getThreshold(egs, att):
    """
        given egs and attribute,
        choose the `best` threshold for this att to construct binary decision tree
    """
    if egs == []:
        return 0
    tmp = [eg[att] for eg in egs]
    mn = min(tmp)
    delta = (max(tmp) - min(tmp)) / N
    tmp = [ig(egs, att, i * delta + mn) for i in range(N)]
    return ut.getMaxPos(tmp)[0] * delta + mn
Beispiel #4
0
def cross_validation(crossData, K = 3):
    """
        given a cross training set
        return the error rate
    """
    num_cross = len(crossData)
    totError = 0
    totNum = 0
    for i in range(num_cross):
        f = leastSquare(crossData[i][0])
        for x in crossData[i][1]:
            totNum = totNum + 1
            if ut.getMaxPos(f(x[1:]).tolist())[0] != x[0] - 1:
                totError = totError + 1
    return (totError + 0.) / totNum
Beispiel #5
0
    def training(self, trainData, mode = 'diag', subSpaceDim = 2):
        # num of classes
        K = len(trainData)
        # num of features
        D = len(trainData[1][0])
        #
        mk = {}
        for k in range(1, K + 1):
            mk[k] = ut.getMean(trainData[k], 'array')
        #
        m = ut.getMean(reduce(ut.add, [trainData[k] for k in range(1, K + 1)]))
        
        #
        f = lambda k: len(trainData[k]) * np.dot( mk[k] - m, (mk[k] - m).transpose() )
        S_B = reduce(ut.add, [f(k) for k in range(1, K - 1)])
        # construct S_W
        S_W = []
        Sk = range(K + 1)
        if mode == 'diag':
            S_W = np.diag([1] * D)
        else:
            g = lambda x, y: np.dot((x - y), (x - y).transpose())
            for k in range(1, K + 1 ):
                Sk[k] = reduce(ut.add, [g(ut.ls2Vec(x), mk[k]) for x in trainData[k]])
                      
            S_W = reduce(ut.add, Sk[1:])
        #
        [egs, vecs] = np.linalg.eigh(np.dot(np.linalg.inv(S_W), S_B))
        
         
        pos = ut.getMaxPos(egs, subSpaceDim)
        
        W = ut.arrayExtract(vecs, pos)

        # now we will construct trainingData set in the
        # sub-feature space, then run Gaussian generative 
        # method to train the new trainData set to get a
        # a classifier
        def projectFunc(v):
            tmp = np.dot(ut.ls2Vec(v).transpose(), W).tolist()
            return tmp[0]
        for k in range(K):
            trainData[k + 1] = map(projectFunc, trainData[k + 1])
        self.projectFunc = projectFunc
        self.func = GaussianGM.GaussianGM(trainData)
Beispiel #6
0
 def f(w, w0, x):
     tmp = [np.dot(ww.transpose(), ut.ls2Vec(x))[0, 0] + ww0 for (ww, ww0) in zip(w, w0)]
     tmp = ut.getMaxPos(tmp)
     return tmp[0] + 1