コード例 #1
0
def test_GeometricCorner(display=0):
    #Create Test Data: defined on a 3x3 square comprising (0,3) and (0,3)
    n = 1000
    x = np.random.random_sample((n, 2)) * 3
    #Let's create a dividing line on x = 1
    y = np.reshape(np.logical_and(x[:, 0] > 2, x[:, 1] > 1), (1, n))

    dataSet = np.hstack((x, y.T))
    dataLength = len(dataSet)

    if display:
        plt.scatter(x[:, 0], x[:, 1], c=y)
        plt.show()

    DT =  BranchNode().\
      set_maxDepth(2).\
      set_regressorIndex(2).\
      set_columnTypes([0,0,1])

    for i in range(0, 1):
        #print DT.findMinimumSplit(dataSet[np.random.choice(range(0,dataLength),size=dataLength*.5,replace=False)],1,2,0,1)
        DT = DT.set_dataSet(dataSet[np.random.choice(range(0, dataLength),
                                                     size=dataLength * .5,
                                                     replace=False)])
        print DT.constructTreeFromNode()
コード例 #2
0
def test_FindMinimumSplit_DiscreteContinuous():
    n = 100
    xValues = [0, 1, 2]
    xProbs1 = [0, 0, 1]
    xProbs2 = [.5, .5, 0]
    l = []
    for i in range(0, 20):
        x = np.reshape(
            np.concatenate((np.random.choice(xValues, size=n * .5, p=xProbs1),
                            np.random.choice(xValues, size=n * .5,
                                             p=xProbs2))), (1, n))
        y = np.reshape(
            np.concatenate((
                np.random.random(n * .5) * .5 +
                10,  #Average 10.25 (range 0.5) <- lower variance should be taken
                np.random.random(n * .5) * 2 + 20)),  #Average 21.00 (range 2)
            (1, n))

        DT = BranchNode()
        dataSet = np.vstack((x, y)).T
        DT = DT.findMinimumSplit(dataSet, 0, 1, 1, 0)
        splitIndex = DT.decisionPoint
        l.append(splitIndex)

    print DT
    if collections.Counter(l).most_common()[0][0] == 2.0:
        print '*\t\tDiscrete Continuous Test passed'
    else:
        print '**\t\tDiscrete Continuous Test failed'
コード例 #3
0
def test_GiniImpurity():
    n = 100
    DT = BranchNode().set_impurityType('i')

    data = np.arange(1, n)
    maxIG = DT.impurityFunction(data)

    data = np.ones(n)
    minIG = DT.impurityFunction(data)

    data = np.concatenate((np.zeros(n - 1), np.ones(1)))
    midIG = DT.impurityFunction(data)

    if minIG == 0 and (minIG < midIG < maxIG):
        print '*\t\tGini Impurity Test passed'
    else:
        print '**\t\tGini Impurity Test failed'
コード例 #4
0
def test_FindMinimumSplit_DiscreteDiscrete():
    n = 50
    yValues = [0, 1, 2]
    xValues = ['a', 'b', 'c']
    xProbs1 = [1, 0, 0]
    yProbs1 = [.3, .3, .4]
    yProbs2 = [0, 1, 0]
    xProbs2 = [0, 1, 0]
    y = np.concatenate((np.random.choice(yValues, size=n, p=yProbs1),
                        np.random.choice(yValues, size=n, p=yProbs2)))
    x = np.concatenate((np.random.choice(xValues, size=n, p=xProbs1),
                        np.random.choice(xValues, size=n, p=xProbs2)))

    DT = BranchNode()
    dataSet = np.vstack((x, y)).T
    DT = DT.findMinimumSplit(dataSet, 0, 1, 1, 1)
    if DT.trueVal == '1' and DT.decisionPoint == 'b' and DT.trueValProportions == 1.0:
        print '*\t\tDiscrete Discrete Test passed'
    else:
        print '**\t\tDiscrete Discrete Test failed'
コード例 #5
0
def test_FindMinimumSplit_ContinuousContinuous_1():
    l = []
    for i in range(0, 100):
        n = 100
        y = np.arange(0, n)
        x = np.concatenate((
            np.random.random(n * .5) * 2 + 10,  #average 11 (max 12)
            np.random.random(n * .5) * 2 + 12))  #average 13 (min 12)

        DT = BranchNode()
        dataSet = np.vstack((x, y)).T
        DT = DT.findMinimumSplit(dataSet, 0, 1, 0, 0)
        l.append(DT.decisionPoint)

    print DT
    decisionPoint = np.mean(l)
    if decisionPoint < 12.5 and decisionPoint > 11.5:
        print '*\t\tContinuous Continuous Test 1 passed'
    else:
        print '**\t\tContinuous Continuous Test 1 failed'
    print np.mean(l)
    print np.std(l)
コード例 #6
0
def test_FindMinimumSplit_ContinuousDiscrete():
    n = 50
    yValues = [0, 1, 2]
    yProbs1 = [.5, .5, 0]
    yProbs2 = [0, .1, .9]
    x = np.concatenate((
        np.random.random(n) * 10 + 10,  #average 15 (max 20)
        np.random.random(n) * 10 + 20))  #average 25 (min 20)
    y = np.concatenate((np.random.choice(yValues, size=n, p=yProbs1),
                        np.random.choice(yValues, size=n, p=yProbs2)))

    DT = BranchNode()
    DT = DT.set_depth(5)
    DT = DT.set_impurityType('g')
    dataSet = np.vstack((x, y)).T
    DT = DT.findMinimumSplit(dataSet, 0, 1, 0, 1)
    # DT.establishProportions()
    print DT
    decisionPoint = DT.decisionPoint
    if decisionPoint < 21 and decisionPoint > 20:
        print '*\t\tContinuous Discrete passed'
    else:
        print '**\t\tContinuous Discrete failed'